This page was generated from source/notebooks/L2/data_io.ipynb.
Binder badge
Binder badge

Vector Data I/O from various formats / sources

In GIS, there are various different data formats and sources (such as databases or WFS) that can be used to read the data. This tutorial will show some typical examples how to read (and write) from different alternatives.

  • To see all possible file formats supported by GDAL, execute following:
In [44]:
# See all available drivers supported by GDAL
import fiona
from fiona._drivers import GDALEnv
env = GDALEnv()

# This will print all available Drivers supported by GDAL
# All these can be used for reading data from them
# and most often also writing data into

env.start().drivers().keys()
Out[44]:
dict_keys(['PCIDSK', 'netCDF', 'JP2OpenJPEG', 'PDF', 'DB2ODBC', 'ESRI Shapefile', 'MapInfo File', 'UK .NTF', 'OGR_SDTS', 'S57', 'DGN', 'OGR_VRT', 'REC', 'Memory', 'BNA', 'CSV', 'NAS', 'GML', 'GPX', 'KML', 'GeoJSON', 'OGR_GMT', 'GPKG', 'SQLite', 'ODBC', 'WAsP', 'PGeo', 'MSSQLSpatial', 'PostgreSQL', 'OpenFileGDB', 'XPlane', 'DXF', 'CAD', 'Geoconcept', 'GeoRSS', 'GPSTrackMaker', 'VFK', 'PGDUMP', 'OSM', 'GPSBabel', 'SUA', 'OpenAir', 'OGR_PDS', 'WFS', 'HTF', 'AeronavFAA', 'Geomedia', 'EDIGEO', 'GFT', 'SVG', 'CouchDB', 'Cloudant', 'Idrisi', 'ARCGEN', 'SEGUKOOA', 'SEGY', 'XLS', 'ODS', 'XLSX', 'ElasticSearch', 'Walk', 'Carto', 'AmigoCloud', 'SXF', 'Selafin', 'JML', 'PLSCENES', 'CSW', 'VDV', 'GMLAS', 'TIGER', 'AVCBin', 'AVCE00', 'HTTP'])

Read / write Shapefile

In [34]:
import geopandas as gpd

# Read file from Shapefile
fp = "L2_data/Finland.shp"
data = gpd.read_file(fp)

# Write to Shapefile (just make a copy)
outfp = "L2_data/Finland_copy.shp"
data.to_file(outfp)

Read / write GeoJSON

In [3]:
import geopandas as gpd

# Read file from GeoJSON
fp = "L2_data/Finland.geojson"
data = gpd.read_file(fp, driver="GeoJSON")

# Write to GeoJSON (just make a copy)
outfp = "L2_data/Finland_copy.geojson"
data.to_file(outfp, driver="GeoJSON")

Read / write KML

In [4]:
import geopandas as gpd

# Enable KML driver
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

# Read file from KML
fp = "L2_data/Finland.kml"
data = gpd.read_file(fp)

# Write to KML (just make a copy)
outfp = "L2_data/Finland_copy.kml"
data.to_file(outfp, driver="KML")

Read / write Geopackage

In [7]:
import geopandas as gpd

# Read file from Geopackage
fp = "L2_data/Finland.gpkg"
data = gpd.read_file(fp)

# Write to Geopackage (just make a copy)
outfp = "L2_data/Finland_copy.gpkg"
data.to_file(outfp, driver="GPKG")

Read / write GeoDatabase

In [8]:
import geopandas as gpd

# Read file from File Geodatabase
fp = "L2_data/Finland.gdb"
data = gpd.read_file(fp, driver="FileGDB", layer='country')

# Write to same FileGDB (just add a new layer)
outfp = "L2_data/Finland.gdb"
data.to_file(outfp, driver="FileGDB", layer="country_copy")

Read / write MapInfo Tab

In [9]:
# Read file from MapInfo Tab
fp = "L2_data/Finland.tab"
data = gpd.read_file(fp, driver="MapInfo File")

# Write to same FileGDB (just add a new layer)
outfp = "L2_data/Finland_copy.tab"
data.to_file(outfp, driver="MapInfo File")

Read PostGIS database using psycopg2

In [10]:
import geopandas as gpd
import psycopg2

# Create connection to database with psycopg2 module (update params according your db)
conn, cursor = psycopg2.connect(dbname='my_postgis_database', user='my_usrname', password='my_pwd',
                                host='123.22.432.16', port=5432)

# Specify sql query
sql = "SELECT * FROM MY_TABLE;"

# Read data from PostGIS
data = gpd.read_postgis(sql=sql, con=conn)

Read / write PostGIS database using SqlAlchemy + GeoAlchemy

In [11]:
from sqlalchemy.engine.url import URL
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy.orm import sessionmaker
from geoalchemy2 import WKTElement, Geometry

# Update with your db parameters
HOST = '123.234.345.16'
DB = 'my_database'
USER = 'my_user'
PORT = 5432
PWD = 'my_password'

# Database info
db_url = URL(drivername='postgresql+psycopg2', host=HOST, database=DB,
                   username=USER, port=PORT, password=PWD)

# Create engine
engine = create_engine(db_url)

# Init Metadata
meta = MetaData()

# Load table definitions from db
meta.reflect(engine)

# Create session
Session = sessionmaker(bind=engine)
session = Session()

# ========================
# Read data from PostGIS
# ========================

# Specify sql query
sql = "SELECT * FROM finland;"

# Pull the data
data = gpd.read_postgis(sql=sql, con=engine)

# Close session
session.close()

# =========================================
# Write data to PostGIS (make a copy table)
# =========================================

# Coordinate Reference System (srid)
crs = 4326

# Target table
target_table = 'finland_copy'

# Convert Shapely geometries to WKTElements into column 'geom' (default in PostGIS)
data['geom'] = data['geometry'].apply(lambda row: WKTElement(row.wkt, srid=crs))

# Drop Shapely geometries
data = data.drop('geometry', axis=1)

# Write to PostGIS (overwrite if table exists, be careful with this! )
# Possible behavior: 'replace', 'append', 'fail'

data.to_sql(target_table, engine, if_exists='replace', index=False)

Read / write Spatialite database

In [39]:
import geopandas as gpd
import sqlite3
import shapely.wkb as swkb
from sqlalchemy import create_engine, event

# DB path
dbfp = 'L2_data/Finland.sqlite'

# Name for the table
tbl_name = 'finland'

# SRID (crs of your data)
srid = 4326

# Parse Geometry type of the input Data
gtype = data.geom_type.unique()
assert len(gtype) == 1, "Mixed Geometries! Cannot insert into SQLite table."
geom_type = gtype[0].upper()

# Initialize database engine
engine = create_engine('sqlite:///{db}'.format(db=dbfp), module=sqlite)

# Initialize table without geometries
geo = data.drop(['geometry'], axis=1)

with sqlite3.connect(dbfp) as conn:
    geo.to_sql(tbl_name, conn, if_exists='replace', index=False)

# Enable spatialite extension
with sqlite3.connect(dbfp) as conn:
    conn.enable_load_extension(True)
    conn.load_extension("mod_spatialite")
    conn.execute("SELECT InitSpatialMetaData(1);")
    # Add geometry column with specified CRS with defined geometry typehaving two dimensions
    conn.execute(
        "SELECT AddGeometryColumn({table}, 'wkb_geometry',\
        {srid}, {geom_type}, 2);".format(table=tbl_name, srid=srid, geom_type=geom_type)
    )

# Convert Shapely geometries into well-known-binary format
data['geometry'] = data['geometry'].apply(lambda geom: swkb.dumps(geom))

# Push to database (overwrite if table exists)
data.to_sql(tbl_name, engine, if_exists='replace', index=False)

Read Web Feature Service (WFS)

In [28]:
import geopandas as gpd
import requests
import geojson

# Specify the url for the backend
url = 'http://geo.stat.fi/geoserver/vaestoruutu/wfs'

# Specify parameters (read data in json format)
params = dict(service='WFS', version='2.0.0', request='GetFeature',
         typeName='vaestoruutu:vaki2017_5km', outputFormat='json')

# Fetch data from WFS using requests
r = requests.get(url, params=params)

# Create GeoDataFrame from geojson
data = gpd.GeoDataFrame.from_features(geojson.loads(r.content))