Functions to get the province shapes.

fetch_natural_earth_records(country_iso2_code='CN')

fetch the province/state level (1st admin level) from the NATURAL_EARTH data store and make a file

Parameters:
  • country_iso2_code (str, default: 'CN' ) –

    the country code (iso_a2) for which provincial records will be extracted. None will not filter (untestetd) Defaults to 'CN'

Returns: Records: the natural earth records

Source code in workflow/scripts/build_province_shapes.py
def fetch_natural_earth_records(country_iso2_code="CN") -> object:
    """fetch the province/state level (1st admin level) from the
            NATURAL_EARTH data store and make a file

    Args:
        country_iso2_code (str, optional): the country code (iso_a2) for which
             provincial records will be extracted. None will not filter (untestetd) Defaults to 'CN'
    Returns:
        Records: the natural earth records
    """

    shpfilename = shpreader.natural_earth(
        resolution=NATURAL_EARTH_RESOLUTION, category="cultural", name=NATURAL_EARTH_DATA_SET
    )
    reader = shpreader.Reader(shpfilename)
    logger.info("Succesfully downloaded natural earth shapefiles")
    provinces_states = reader.records()

    def filter_country_code(records: object, target_iso_a2_code="CN") -> list:
        """filter provincial/state (admin level 1) records for one country

        Args:
            records (shpreader.Reader.records): the records object from cartopy
                    shpreader for natural earth dataset
            target_iso_a2_code (str, optional): the country code (iso_a2) for which
                    provincial records will be extracted. Defaults to 'CN'.

        Returns:
            list: records list
        """
        results = []
        for rec in records:
            if rec.attributes["iso_a2"] == target_iso_a2_code:
                results.append(rec)

        return results

    # TODO test with none
    if country_iso2_code is not None:
        provinces_states = filter_country_code(
            provinces_states, target_iso_a2_code=country_iso2_code
        )

    return provinces_states

records_to_data_frame(records)

dump irrelevant info and make records into a GeoDataFrame that matches the PROV_NAMES

Parameters:
  • records (object) –

    the cartopy shpread records from natural earth

Returns:
  • GeoDataFrame

    gpd.GeoDataFrame: the cleaned up & sorted data in a format that can be saved

Source code in workflow/scripts/build_province_shapes.py
def records_to_data_frame(records: object) -> gpd.GeoDataFrame:
    """dump irrelevant info and make records into a GeoDataFrame that matches the PROV_NAMES

    Args:
        records (object): the cartopy shpread records from natural earth

    Returns:
        gpd.GeoDataFrame: the cleaned up & sorted data in a format that can be saved
    """

    records[0].attributes["name"]
    d = {"province": [r.attributes["name_en"] for r in records]}
    geo = [r.geometry for r in records]
    gdf = gpd.GeoDataFrame(d, geometry=geo)
    gdf.sort_values(by="province", inplace=True)
    # remove white spaces
    gdf["province"] = gdf.province.str.replace(" ", "")

    filtered = gdf[gdf.province.isin(PROV_NAMES)]

    if not filtered.province.to_list() == sorted(PROV_NAMES):
        raise ValueError(
            "Built cut-out does not have the right provinces"
            + "- do your province lists have white spaces?"
        )

    return filtered

save_province_data(provinces_gdf, crs=CRS, output_file=DEFAULT_SHAPE_OUTPATH)

save to file

Parameters:
  • provinces_gdf (GeoDataFrame) –

    the cleaned up province records

  • crs (int, default: CRS ) –

    the crs in epsg format. Defaults to CRS.

  • output_file (pathlike, default: DEFAULT_SHAPE_OUTPATH ) –

    the output path. defaults to DEFAULT_SHAPE_OUTPATH

Source code in workflow/scripts/build_province_shapes.py
def save_province_data(
    provinces_gdf: gpd.GeoDataFrame,
    crs: int = CRS,
    output_file: os.PathLike = DEFAULT_SHAPE_OUTPATH,
):
    """save to file

    Args:
        provinces_gdf (GeoDataFrame): the cleaned up province records
        crs (int, optional): the crs in epsg format. Defaults to CRS.
        output_file (os.pathlike): the output path. defaults to DEFAULT_SHAPE_OUTPATH
    """
    provinces_gdf.set_crs(epsg=crs, inplace=True)  # WGS84
    provinces_gdf.to_file(os.path.abspath(output_file))