Rules for building the population data by region

build_population(data_path=None)

Build the population data by region

Parameters:
  • data_path (PathLike, default: None ) –

    the path to the pop csv. Defaults to None.

Source code in workflow/scripts/build_population.py
def build_population(data_path: os.PathLike = None):
    """Build the population data by region

    Args:
        data_path (os.PathLike, optional): the path to the pop csv. Defaults to None.
    """

    if data_path is None:
        data_path = snakemake.input.population

    population = YEARBOOK_DATA2POP * load_pop_csv(csv_path=data_path)
    population.name = "population"
    population.to_hdf(snakemake.output.population, key=population.name)

load_pop_csv(csv_path)

Load the national bureau of statistics of China population (Yearbook - Population, table 2.5 pop at year end by Region)

Parameters:
  • csv_path (Pathlike) –

    the csv path

Returns:
  • DataFrame

    pd.DataFrame: the population for constants.POP_YEAR by province

Raises: ValueError: if the province names are not as expected

Source code in workflow/scripts/build_population.py
def load_pop_csv(csv_path: os.PathLike) -> pd.DataFrame:
    """Load the national bureau of statistics of China population
    (Yearbook - Population, table 2.5 pop at year end by Region)

    Args:
        csv_path (os.Pathlike): the csv path

    Returns:
        pd.DataFrame: the population for constants.POP_YEAR by province
    Raises:
        ValueError: if the province names are not as expected
    """

    df = pd.read_csv(csv_path, index_col=0, header=0)
    df = df.apply(pd.to_numeric)
    df = df[POP_YEAR][df.index.isin(PROV_NAMES)]
    if not sorted(df.index.to_list()) == sorted(PROV_NAMES):
        raise ValueError(
            f"Province names do not match {sorted(df.index.to_list())} != {sorted(PROV_NAMES)}"
        )
    return df