Skip to content

Build population gridcell map

build_gridded_population(prov_pop_path, pop_density_raster_path, cutout_path, province_shape_path, gridded_pop_out)

Build a gridded population DataFrame by matching population density to the cutout grid cells. This DataFrame is a sparse matrix of the population and shape BusesxCutout_gridcells where buses are the provinces

Parameters:

Name Type Description Default
prov_pop_path PathLike

Path to the province population count file (hdf5).

required
pop_density_raster_path PathLike

Path to the population density raster file.

required
cutout_path PathLike

Path to the cutout file containing the grid.

required
province_shape_path PathLike

Path to the province shape file.

required
gridded_pop_out PathLike

output file path.

required
Source code in workflow/scripts/build_population_gridcell_map.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def build_gridded_population(
    prov_pop_path: PathLike,
    pop_density_raster_path: PathLike,
    cutout_path: PathLike,
    province_shape_path: PathLike,
    gridded_pop_out: PathLike,
):
    """Build a gridded population DataFrame by matching population density to the cutout grid cells.
    This DataFrame is a sparse matrix of the population and shape BusesxCutout_gridcells
      where buses are the provinces

    Args:
        prov_pop_path (PathLike): Path to the province population count file (hdf5).
        pop_density_raster_path (PathLike): Path to the population density raster file.
        cutout_path (PathLike): Path to the cutout file containing the grid.
        province_shape_path (PathLike): Path to the province shape file.
        gridded_pop_out (PathLike): output file path.
    """

    with pd.HDFStore(prov_pop_path, mode="r") as store:
        pop_province = store["population"]

    prov_poly = read_province_shapes(province_shape_path)
    pop_density = read_pop_density(pop_density_raster_path, prov_poly, crs=CRS)

    cutout = atlite.Cutout(cutout_path)
    grid_points = cutout.grid
    # this is in polygons but need points for sjoin with pop dnesity to work
    grid_points.to_crs(3857, inplace=True)
    grid_points["geometry"] = grid_points.centroid
    grid_points.to_crs(CRS, inplace=True)

    # match cutout grid to province
    # cutout_pts_in_prov = gpd.tools.sjoin(grid_points, prov_poly,
    # how="left", predicate="intersects")
    # TODO: do you want to dropna here?
    cutout_pts_in_prov = gpd.tools.sjoin(
        grid_points, prov_poly, how="left", predicate="intersects"
    )  # .dropna()
    cutout_pts_in_prov.rename(
        columns={"index_right": "province_index", "province": "province_name"},
        inplace=True,
    )

    # match cutout grid to province
    cutout_pts_in_prov = gpd.tools.sjoin(grid_points, prov_poly, how="left", predicate="intersects")
    cutout_pts_in_prov.rename(
        columns={"index_right": "province_index", "province": "province_name"},
        inplace=True,
    )
    # cutout_pts_in_prov.dropna(inplace=True)

    # TODO CRS, think about whether this makes sense or need grid interp
    merged = gpd.tools.sjoin_nearest(
        cutout_pts_in_prov.to_crs(3857), pop_density.to_crs(3857), how="inner"
    )
    merged = merged.to_crs(CRS)
    # points outside china are NaN, need to rename to keep the index cutout after agg
    # otherwise the spare matrix will not match the cutoutpoints
    #  (smarter would be to change the cutout)
    merged.fillna({"province_name": "OutsideChina"}, inplace=True)

    points_in_provinces = pd.DataFrame(index=cutout_pts_in_prov.index)
    # normalise pop per province and make a loc_id/province table
    points_in_provinces = (
        merged.groupby("province_name")["pop_density"]
        .apply(lambda x: x / x.sum())
        .unstack(fill_value=0.0)
        .T
    )
    # now get rid of the outside china "province"
    points_in_provinces.drop(columns="OutsideChina", inplace=True)
    points_in_provinces.index.name = ""
    points_in_provinces.fillna(0.0, inplace=True)

    points_in_provinces *= pop_province

    with pd.HDFStore(gridded_pop_out, mode="w", complevel=4) as store:
        store["population_gridcell_map"] = points_in_provinces

build_population_map(prov_pop_path, pop_density_raster_path, cutout_path, province_shape_path, gridded_pop_out)

Build a gridded population DataFrame by matching population density to the cutout grid cells. This DataFrame is a sparse matrix of the population and shape BusesxCutout_gridcells where buses are the provinces

Parameters:

Name Type Description Default
prov_pop_path PathLike

Path to the province population count file (hdf5).

required
pop_density_raster_path PathLike

Path to the population density raster file.

required
cutout_path PathLike

Path to the cutout file containing the grid.

required
province_shape_path PathLike

Path to the province shape file.

required
gridded_pop_out PathLike

output file path.

required
Source code in workflow/scripts/build_population_gridcell_map.py
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def build_population_map(
    prov_pop_path: PathLike,
    pop_density_raster_path: PathLike,
    cutout_path: PathLike,
    province_shape_path: PathLike,
    gridded_pop_out: PathLike,
):
    """Build a gridded population DataFrame by matching population density to the cutout grid cells.
    This DataFrame is a sparse matrix of the population and shape BusesxCutout_gridcells
      where buses are the provinces

    Args:
        prov_pop_path (PathLike): Path to the province population count file (hdf5).
        pop_density_raster_path (PathLike): Path to the population density raster file.
        cutout_path (PathLike): Path to the cutout file containing the grid.
        province_shape_path (PathLike): Path to the province shape file.
        gridded_pop_out (PathLike): output file path.
    """

    # =============== load data ===================
    with pd.HDFStore(prov_pop_path, mode="r") as store:
        pop_province_count = store["population"]

    # CFSR points and Provinces
    pop_ww = load_cfrs_data(pop_density_raster_path)

    prov_poly = gpd.read_file(province_shape_path)[["province", "geometry"]]
    prov_poly.set_index("province", inplace=True)
    prov_poly = prov_poly.reindex(PROV_NAMES)
    prov_poly.reset_index(inplace=True)

    # load renewable profiles & grid & extract gridpoints
    cutout = atlite.Cutout(cutout_path)
    grid_points = cutout.grid
    grid_points.to_crs(3857, inplace=True)
    grid_points["geometry"] = grid_points.centroid
    grid_points.to_crs(CRS, inplace=True)

    # match cutout grid to province
    cutout_pts_in_prov = gpd.tools.sjoin(grid_points, prov_poly, how="left", predicate="intersects")
    cutout_pts_in_prov.rename(
        columns={"index_right": "province_index", "province": "province_name"},
        inplace=True,
    )

    # Province masks merged with population density
    # TODO: THIS REQUIRES EXPLANATION - can't just use random crs :||
    cutout_pts_in_prov = cutout_pts_in_prov.to_crs(3857)
    pop_ww = pop_ww.to_crs(3857)

    merged = gpd.tools.sjoin_nearest(cutout_pts_in_prov, pop_ww, how="inner")
    merged = merged.to_crs(CRS)

    # normalised pop distribution per province
    # need an extra province for points not in the province, otherwise lose cutout grid index
    merged.fillna({"province_name": "OutsideChina"}, inplace=True)
    points_in_provinces = pd.DataFrame(index=cutout_pts_in_prov.index)
    points_in_provinces = (
        merged.groupby("province_name")["pop_density"]
        .apply(lambda x: x / x.sum())
        .unstack(fill_value=0.0)
        .T
    )
    # Cleanup the matrix: get rid of the outside china "province" et
    points_in_provinces.drop(columns="OutsideChina", inplace=True)
    points_in_provinces.index.name = ""
    points_in_provinces.fillna(0.0, inplace=True)

    # go from normalised distribution to head count
    points_in_provinces *= pop_province_count
    with pd.HDFStore(gridded_pop_out, mode="w", complevel=4) as store:
        store["population_gridcell_map"] = points_in_provinces

load_cfrs_data(target)

load CFRS_grid.nc type files into a geodatafram

Parameters:

Name Type Description Default
target PathLike

the abs path

required

Returns:

Type Description
GeoDataFrame

gpd.GeoDataFrame: the data in gdf

Source code in workflow/scripts/build_population_gridcell_map.py
38
39
40
41
42
43
44
45
46
47
48
49
50
def load_cfrs_data(target: PathLike) -> gpd.GeoDataFrame:
    """load  CFRS_grid.nc type files into a geodatafram

    Args:
        target (PathLike): the abs path

    Returns:
        gpd.GeoDataFrame: the data in gdf
    """
    pop_density = xr.open_dataarray(target).to_dataset(name="pop_density")
    pop_ww = xarr_to_gdf(pop_density, var_name="pop_density")  # TODO is the CRS correct?

    return pop_ww

xarr_to_gdf(xarr, var_name, x_var='x', y_var='y', crs=CRS)

convert an xarray to GDF

Parameters:

Name Type Description Default
xarr DataArray

the input array

required
var_name str

the array variable to be converted.

required
x_var str

the x dimension. Defaults to "x".

'x'
y_var str

the y dimension. Defaults to "y".

'y'
crs _type_

the crs. Defaults to CRS.

CRS

Returns:

Type Description
GeoDataFrame

gpd.GeoDataFrame: geodata frame in chosen CRS

Source code in workflow/scripts/build_population_gridcell_map.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def xarr_to_gdf(
    xarr: xr.DataArray, var_name: str, x_var="x", y_var="y", crs=CRS
) -> gpd.GeoDataFrame:
    """convert an xarray to GDF

    Args:
        xarr (xr.DataArray): the input array
        var_name (str): the array variable to be converted.
        x_var (str, optional): the x dimension. Defaults to "x".
        y_var (str, optional): the y dimension. Defaults to "y".
        crs (_type_, optional): the crs. Defaults to CRS.

    Returns:
        gpd.GeoDataFrame: geodata frame in chosen CRS
    """
    df = xarr.to_dataframe()
    df.reset_index(inplace=True)
    return gpd.GeoDataFrame(
        df[var_name], geometry=gpd.points_from_xy(df[x_var], df[y_var]), crs=crs
    )