Rules for building the population data by region
build_population(data_path=None)
Build the population data by region
Parameters: |
|
---|
Source code in workflow/scripts/build_population.py
def build_population(data_path: os.PathLike = None):
"""Build the population data by region
Args:
data_path (os.PathLike, optional): the path to the pop csv. Defaults to None.
"""
if data_path is None:
data_path = snakemake.input.population
population = YEARBOOK_DATA2POP * load_pop_csv(csv_path=data_path)
population.name = "population"
population.to_hdf(snakemake.output.population, key=population.name)
load_pop_csv(csv_path)
Load the national bureau of statistics of China population (Yearbook - Population, table 2.5 pop at year end by Region)
Parameters: |
|
---|
Returns: |
|
---|
Raises: ValueError: if the province names are not as expected
Source code in workflow/scripts/build_population.py
def load_pop_csv(csv_path: os.PathLike) -> pd.DataFrame:
"""Load the national bureau of statistics of China population
(Yearbook - Population, table 2.5 pop at year end by Region)
Args:
csv_path (os.Pathlike): the csv path
Returns:
pd.DataFrame: the population for constants.POP_YEAR by province
Raises:
ValueError: if the province names are not as expected
"""
df = pd.read_csv(csv_path, index_col=0, header=0)
df = df.apply(pd.to_numeric)
df = df[POP_YEAR][df.index.isin(PROV_NAMES)]
if not sorted(df.index.to_list()) == sorted(PROV_NAMES):
raise ValueError(
f"Province names do not match {sorted(df.index.to_list())} != {sorted(PROV_NAMES)}"
)
return df