Vector Data I/O in Python¶
There are various different file formats and data sources for geographic information. This tutorial will show some typical examples how to read (and write) data from different sources.
- To see all possible file formats supported by GDAL, execute following:
[1]:
# See all available drivers supported by GDAL
import fiona
from fiona._drivers import GDALEnv
env = GDALEnv()
# This will print all available Drivers supported by GDAL
# All these can be used for reading data from them
# and most often also writing data into
env.start().drivers().keys()
[1]:
dict_keys(['PCIDSK', 'netCDF', 'PDS4', 'JP2OpenJPEG', 'PDF', 'MBTiles', 'EEDA', 'ESRI Shapefile', 'MapInfo File', 'UK .NTF', 'OGR_SDTS', 'S57', 'DGN', 'OGR_VRT', 'REC', 'Memory', 'BNA', 'CSV', 'NAS', 'GML', 'GPX', 'LIBKML', 'KML', 'GeoJSON', 'GeoJSONSeq', 'ESRIJSON', 'TopoJSON', 'Interlis 1', 'Interlis 2', 'OGR_GMT', 'GPKG', 'SQLite', 'OGR_DODS', 'WAsP', 'PostgreSQL', 'OpenFileGDB', 'XPlane', 'DXF', 'CAD', 'Geoconcept', 'GeoRSS', 'GPSTrackMaker', 'VFK', 'PGDUMP', 'OSM', 'GPSBabel', 'SUA', 'OpenAir', 'OGR_PDS', 'WFS', 'WFS3', 'HTF', 'AeronavFAA', 'EDIGEO', 'GFT', 'SVG', 'CouchDB', 'Cloudant', 'Idrisi', 'ARCGEN', 'SEGUKOOA', 'SEGY', 'XLS', 'ODS', 'XLSX', 'ElasticSearch', 'Carto', 'AmigoCloud', 'SXF', 'Selafin', 'JML', 'PLSCENES', 'CSW', 'VDV', 'GMLAS', 'MVT', 'TIGER', 'AVCBin', 'AVCE00', 'NGW', 'HTTP'])
[2]:
# Available drivers in geopandas/fiona. Same as: fiona.supported_drivers
import geopandas as gpd
gpd.io.file.fiona.drvsupport.supported_drivers
[2]:
{'AeronavFAA': 'r',
'ARCGEN': 'r',
'BNA': 'raw',
'DXF': 'raw',
'CSV': 'raw',
'OpenFileGDB': 'r',
'ESRIJSON': 'r',
'ESRI Shapefile': 'raw',
'GeoJSON': 'rw',
'GeoJSONSeq': 'rw',
'GPKG': 'rw',
'GML': 'raw',
'GPX': 'raw',
'GPSTrackMaker': 'raw',
'Idrisi': 'r',
'MapInfo File': 'raw',
'DGN': 'raw',
'PCIDSK': 'r',
'S57': 'r',
'SEGY': 'r',
'SUA': 'r',
'TopoJSON': 'r'}
Read / write Shapefile¶
[ ]:
import geopandas as gpd
# Read file from Shapefile
fp = "L2_data/Finland.shp"
data = gpd.read_file(fp)
# Write to Shapefile (just make a copy)
outfp = "L2_data/Finland_copy.shp"
data.to_file(outfp)
Read / write GeoJSON¶
[ ]:
import geopandas as gpd
# Read file from GeoJSON
fp = "L2_data/Finland.geojson"
data = gpd.read_file(fp, driver="GeoJSON")
# Write to GeoJSON (just make a copy)
outfp = "L2_data/Finland_copy.geojson"
data.to_file(outfp, driver="GeoJSON")
Read / write KML¶
[ ]:
import geopandas as gpd
# Enable KML driver
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
# Read file from KML
fp = "L2_data/Finland.kml"
data = gpd.read_file(fp)
# Write to KML (just make a copy)
outfp = "L2_data/Finland_copy.kml"
data.to_file(outfp, driver="KML")
Read / write Geopackage¶
[ ]:
import geopandas as gpd
# Read file from Geopackage
fp = "L2_data/Finland.gpkg"
data = gpd.read_file(fp)
# Write to Geopackage (just make a copy)
outfp = "L2_data/Finland_copy.gpkg"
data.to_file(outfp, driver="GPKG")
Read / write GeoDatabase¶
[ ]:
import geopandas as gpd
# Read file from File Geodatabase
fp = "L2_data/Finland.gdb"
data = gpd.read_file(fp, driver="FileGDB", layer='country')
# Write to same FileGDB (just add a new layer)
outfp = "L2_data/Finland.gdb"
data.to_file(outfp, driver="FileGDB", layer="country_copy")
Read / write MapInfo Tab¶
[ ]:
# Read file from MapInfo Tab
fp = "L2_data/Finland.tab"
data = gpd.read_file(fp, driver="MapInfo File")
# Write to same FileGDB (just add a new layer)
outfp = "L2_data/Finland_copy.tab"
data.to_file(outfp, driver="MapInfo File")
Read PostGIS database using psycopg2¶
[ ]:
import geopandas as gpd
import psycopg2
# Create connection to database with psycopg2 module (update params according your db)
conn, cursor = psycopg2.connect(dbname='my_postgis_database', user='my_usrname', password='my_pwd',
host='123.22.432.16', port=5432)
# Specify sql query
sql = "SELECT * FROM MY_TABLE;"
# Read data from PostGIS
data = gpd.read_postgis(sql=sql, con=conn)
Read / write PostGIS database using SqlAlchemy + GeoAlchemy¶
[ ]:
from sqlalchemy.engine.url import URL
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy.orm import sessionmaker
from geoalchemy2 import WKTElement, Geometry
# Update with your db parameters
HOST = '123.234.345.16'
DB = 'my_database'
USER = 'my_user'
PORT = 5432
PWD = 'my_password'
# Database info
db_url = URL(drivername='postgresql+psycopg2', host=HOST, database=DB,
username=USER, port=PORT, password=PWD)
# Create engine
engine = create_engine(db_url)
# Init Metadata
meta = MetaData()
# Load table definitions from db
meta.reflect(engine)
# Create session
Session = sessionmaker(bind=engine)
session = Session()
# ========================
# Read data from PostGIS
# ========================
# Specify sql query
sql = "SELECT * FROM finland;"
# Pull the data
data = gpd.read_postgis(sql=sql, con=engine)
# Close session
session.close()
# =========================================
# Write data to PostGIS (make a copy table)
# =========================================
# Coordinate Reference System (srid)
crs = 4326
# Target table
target_table = 'finland_copy'
# Convert Shapely geometries to WKTElements into column 'geom' (default in PostGIS)
data['geom'] = data['geometry'].apply(lambda row: WKTElement(row.wkt, srid=crs))
# Drop Shapely geometries
data = data.drop('geometry', axis=1)
# Write to PostGIS (overwrite if table exists, be careful with this! )
# Possible behavior: 'replace', 'append', 'fail'
data.to_sql(target_table, engine, if_exists='replace', index=False)
Read / write Spatialite database¶
[ ]:
import geopandas as gpd
import sqlite3
import shapely.wkb as swkb
from sqlalchemy import create_engine, event
# DB path
dbfp = 'L2_data/Finland.sqlite'
# Name for the table
tbl_name = 'finland'
# SRID (crs of your data)
srid = 4326
# Parse Geometry type of the input Data
gtype = data.geom_type.unique()
assert len(gtype) == 1, "Mixed Geometries! Cannot insert into SQLite table."
geom_type = gtype[0].upper()
# Initialize database engine
engine = create_engine('sqlite:///{db}'.format(db=dbfp), module=sqlite)
# Initialize table without geometries
geo = data.drop(['geometry'], axis=1)
with sqlite3.connect(dbfp) as conn:
geo.to_sql(tbl_name, conn, if_exists='replace', index=False)
# Enable spatialite extension
with sqlite3.connect(dbfp) as conn:
conn.enable_load_extension(True)
conn.load_extension("mod_spatialite")
conn.execute("SELECT InitSpatialMetaData(1);")
# Add geometry column with specified CRS with defined geometry typehaving two dimensions
conn.execute(
"SELECT AddGeometryColumn({table}, 'wkb_geometry',\
{srid}, {geom_type}, 2);".format(table=tbl_name, srid=srid, geom_type=geom_type)
)
# Convert Shapely geometries into well-known-binary format
data['geometry'] = data['geometry'].apply(lambda geom: swkb.dumps(geom))
# Push to database (overwrite if table exists)
data.to_sql(tbl_name, engine, if_exists='replace', index=False)
Read Web Feature Service (WFS)¶
[ ]:
import geopandas as gpd
import requests
import geojson
# Specify the url for the backend. Here we are using data from Statistics Finland: https://www.stat.fi/org/avoindata/paikkatietoaineistot_en.html
url = 'http://geo.stat.fi/geoserver/vaestoruutu/wfs'
# Specify parameters (read data in json format).
# Available feature types in this particular data source: http://geo.stat.fi/geoserver/vaestoruutu/wfs?service=wfs&version=2.0.0&request=describeFeatureType
params = dict(service='WFS', version='2.0.0', request='GetFeature',
typeName='vaestoruutu:vaki2017_5km', outputFormat='json')
# Fetch data from WFS using requests
r = requests.get(url, params=params)
# Create GeoDataFrame from geojson
data = gpd.GeoDataFrame.from_features(geojson.loads(r.content))