Skip to content

Technoecon etl

Extract data from Remind, transform it for pypsa PyPSA and write it to files

make_pypsa_like_costs(frames)

translate the REMIND costs into pypsa format for a single region.

Parameters:

Name Type Description Default
frames dict[DataFrame]

dictionary with the REMIND data tables to be transformed. Region-filtered

required

Returns: pd.DataFrame: DataFrame containing cost data for a region.

Source code in src/rpycpl/technoecon_etl.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def make_pypsa_like_costs(
    frames: dict[pd.DataFrame],
) -> pd.DataFrame:
    """translate the REMIND costs into pypsa format for a single region.

    Args:
        frames: dictionary with the REMIND data tables to be transformed. Region-filtered
    Returns:
        pd.DataFrame: DataFrame containing cost data for a region.
    """

    # check single region or region already removed
    regions_filtered = not any(["region" in df.columns for df in frames.values()])
    if not regions_filtered and any(
        [df.region.nunique() > 1 for df in frames.values() if "region" in df.columns]
    ):
        raise Warning("The dataframes are not region-filtered. Not supported.")
    elif not regions_filtered:
        frames.update(
            {k: df.drop(columns=["region"]) for k, df in frames.items() if "region" in df.columns}
        )

    years = frames["capex"].year.unique()
    capex = transform_capex(frames["capex"])

    # transform the data
    vom = transform_vom(frames["tech_data"].query("parameter == 'omv'"))
    fom = transform_fom(frames["tech_data"].query("parameter == 'omf'"))
    lifetime = transform_lifetime(frames["tech_data"].query("parameter == 'lifetime'"))

    co2_intens = transform_co2_intensity(frames["co2_intensity"], years)
    eta = transform_efficiency(frames["eta"], years)
    fuel_costs = transform_fuels(frames["fuel_costs"])
    discount_rate = transform_discount_rate(frames["discount_r"])

    del frames

    # stitch together in pypsa format
    cost_frames = {
        "capex": capex,
        "efficiency": eta,
        "fuel": fuel_costs,
        "co2": co2_intens,
        "lifetime": lifetime,
        "vom": vom,
        "fom": fom,
        "discount_rate": discount_rate,
    }

    # TODO Can do more efficient operations with join
    # add years to table with time-indep data
    for label, frame in cost_frames.items():
        if "year" not in frame.columns:
            cost_frames[label] = expand_years(frame, capex.year.unique())
    # add missing techs for tech agnostic data
    for label, frame in cost_frames.items():
        if "technology" not in frame.columns:
            cost_frames[label] = pd.concat(
                [frame.assign(technology=tech) for tech in capex.technology.unique()]
            )
    column_order = ["technology", "year", "parameter", "value", "unit", "source"]

    # merge the dataframes for the region
    costs_remind = pd.concat(
        [frame[column_order] for frame in cost_frames.values()], axis=0
    ).reset_index(drop=True)
    costs_remind.sort_values(by=["technology", "year", "parameter"], key=key_sort, inplace=True)

    return costs_remind.query("year in @years")

map_to_pypsa_tech(remind_costs_formatted, pypsa_costs, mappings, weights, years=None, currency_conversion=1)

Map the REMIND technology names to pypsa technoloies using the conversions specified in the map config

Parameters:

Name Type Description Default
remind_costs_formatted DataFrame

DataFrame containing REMIND cost data.

required
pypsa_costs DataFrame

DataFrame containing pypsa cost data.

required
mappings DataFrame

DataFrame containing the mapping funcs and names from REMIND to pypsa technologies.

required
weights DataFrame

DataFrame containing the weights.

required
years Iterable

years to be used. Defaults to None (use remidn dat)

None
currency_conversion float

conversion factor for currency (PyPSA to REMIND).

1

Returns: pd.DataFrame: DataFrame with mapped technology names.

Source code in src/rpycpl/technoecon_etl.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
def map_to_pypsa_tech(
    remind_costs_formatted: pd.DataFrame,
    pypsa_costs: pd.DataFrame,
    mappings: pd.DataFrame,
    weights: pd.DataFrame,
    years: list | Iterable = None,
    currency_conversion: float = 1,
) -> pd.DataFrame:
    """Map the REMIND technology names to pypsa technoloies using the conversions specified in the
    map config

    Args:
        remind_costs_formatted (pd.DataFrame): DataFrame containing REMIND cost data.
        pypsa_costs (pd.DataFrame): DataFrame containing pypsa cost data.
        mappings (pd.DataFrame): DataFrame containing the mapping funcs and names from
            REMIND to pypsa technologies.
        weights (pd.DataFrame): DataFrame containing the weights.
        years (Iterable, optional): years to be used. Defaults to None (use remidn dat)
        currency_conversion (float, optional): conversion factor for currency (PyPSA to REMIND).
    Returns:
        pd.DataFrame: DataFrame with mapped technology names.
    """
    if years is None:
        years = remind_costs_formatted.year.unique()
    else:
        years = pd.Index(years, dtype=int)

    # direct mapping of remind
    use_remind = (
        mappings.query("mapper == 'use_remind'")
        .drop("unit", axis=1)
        .merge(
            remind_costs_formatted,
            left_on=["reference", "parameter"],
            right_on=["technology", "parameter"],
            how="left",
        )
    )
    use_remind.drop(columns=["technology"], inplace=True)

    direct_input = mappings.query("mapper == 'set_value'").rename(columns={"reference": "value"})
    direct_input = direct_input.assign(source="direct_input from coupling mapping")
    direct_input = expand_years(direct_input, years)

    # pypsa values
    from_pypsa = _use_pypsa(mappings, pypsa_costs, years, "constant", currency_conversion)
    from_pypsa.drop(columns=["technology"], inplace=True)

    # techs with proxy learnign
    proxy_learning = _learn_investment_from_proxy(
        mappings, pypsa_costs, remind_costs_formatted, ref_year=years.min()
    )
    if not proxy_learning.empty:
        proxy_learning.loc[:, "further description"] = "proxy learning from REMIND"
    # TODO check weighing is by right quantities
    # weighed by remind tech basket
    weighed_basket = _weigh_remind_by(remind_costs_formatted, weights, mappings)
    # format for output
    direct_input.rename(
        columns={"PyPSA_tech": "technology", "comment": "further description"},
        inplace=True,
    )
    use_remind.rename(
        columns={"PyPSA_tech": "technology", "comment": "further description"},
        inplace=True,
    )
    from_pypsa.rename(
        columns={"PyPSA_tech": "technology", "comment": "further description"},
        inplace=True,
    )
    proxy_learning.rename(
        columns={"PyPSA_tech": "technology", "comment": "further description"},
        inplace=True,
    )
    weighed_basket.rename(
        columns={"PyPSA_tech": "technology", "comment": "further description"},
        inplace=True,
    )

    output_frames = [
        direct_input,
        use_remind,
        from_pypsa,
        proxy_learning,
        weighed_basket,
    ]
    output = pd.concat([df[OUTP_COLS] for df in output_frames if not df.empty], axis=0)
    output = output.assign(year=output.year.astype(int))
    return output.sort_values(["year", "technology", "parameter"], key=key_sort).reset_index(
        drop=True
    )

transform_capex(capex)

Transform the CAPEX data from REMIND to pypsa.

Parameters:

Name Type Description Default
capex DataFrame

DataFrame containing REMIND capex data.

required

Returns: pd.DataFrame: Transformed capex data.

Source code in src/rpycpl/technoecon_etl.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
def transform_capex(capex: pd.DataFrame) -> pd.DataFrame:
    """Transform the CAPEX data from REMIND to pypsa.

    Args:
        capex (pd.DataFrame): DataFrame containing REMIND capex data.
    Returns:
        pd.DataFrame: Transformed capex data.
    """
    capex.loc[:, "value"] *= UNIT_CONVERSION["capex"]
    capex = capex.assign(source="REMIND " + capex.technology, parameter="investment", unit="USD/MW")
    store_techs = STOR_TECHS
    for stor in store_techs:
        capex.loc[capex["technology"] == stor, "unit"] = "USD/MWh"
    return capex

transform_co2_intensity(co2_intensity, years)

Transform the CO2 intensity data from REMIND to pypsa.

Parameters:

Name Type Description Default
co2_intensity DataFrame

DataFrame containing REMIND CO2 intensity data.

required
years list | Index

relevant years data.

required

Returns:

Type Description
DataFrame

pd.DataFrame: Transformed CO2 intensity data.

Source code in src/rpycpl/technoecon_etl.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def transform_co2_intensity(co2_intensity: pd.DataFrame, years: list | pd.Index) -> pd.DataFrame:
    """Transform the CO2 intensity data from REMIND to pypsa.

    Args:
        co2_intensity (pd.DataFrame): DataFrame containing REMIND CO2 intensity data.
        years (list | pd.Index): relevant years data.

    Returns:
        pd.DataFrame: Transformed CO2 intensity data.
    """
    # TODO Co2 equivalent
    co2_intens = co2_intensity.rename(
        columns={
            "carrier": "from_carrier",
            "all_enty_1": "to_carrier",
            "all_enty_2": "emission_type",
            "all_enty.1": "to_carrier",
            "all_enty.2": "emission_type",
        },
    )
    co2_intens = co2_intens.query("to_carrier == 'seel' & emission_type == 'co2' & year in @years")
    co2_intens = co2_intens.assign(
        parameter="CO2 intensity",
        unit="t_CO2/MWh_th",
        source=co2_intens.technology + " REMIND",
    )
    co2_intens.loc[:, "value"] *= UNIT_CONVERSION["co2_intensity"]
    return co2_intens

transform_efficiency(eff_data, years)

Transform the efficiency data from REMIND to pypsa.

Parameters:

Name Type Description Default
eff_data DataFrame

DataFrame containing REMIND efficiency data.

required
years list | Index

relevant years.

required

Returns: pd.DataFrame: Transformed efficiency data.

Source code in src/rpycpl/technoecon_etl.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def transform_efficiency(eff_data: pd.DataFrame, years: list | pd.Index) -> pd.DataFrame:
    """Transform the efficiency data from REMIND to pypsa.

    Args:
        eff_data (pd.DataFrame): DataFrame containing REMIND efficiency data.
        years (list | pd.Index): relevant years.
    Returns:
        pd.DataFrame: Transformed efficiency data.
    """
    eta = eff_data.query("year in @years")
    eta = eta.assign(source=eta.technology + " REMIND", unit="p.u.", parameter="efficiency")

    # Special treatment for nuclear: Efficiencies are in TWa/Mt=8760 TWh/Tg_U
    #  -> convert to MWh/g_U to match with fuel costs in USD/g_U
    eta.loc[eta["technology"].isin(["fnrs", "tnrs"]), "value"] *= 8760 / 1e6
    eta.loc[eta["technology"].isin(["fnrs", "tnrs"]), "unit"] = "MWh/g_U"
    # Special treatment for battery: Efficiencies in costs.csv should be roundtrip
    eta.loc[eta["technology"] == "btin", "value"] **= 2

    return eta

transform_fom(fom)

Transform the Fixed Operational Maintenance data from REMIND to pypsa.

Parameters:

Name Type Description Default
fom DataFrame

DataFrame containing REMIND FOM data.

required

Returns: pd.DataFrame: Transformed FOM data.

Source code in src/rpycpl/technoecon_etl.py
217
218
219
220
221
222
223
224
225
226
227
228
229
def transform_fom(fom: pd.DataFrame) -> pd.DataFrame:
    """Transform the Fixed Operational Maintenance data from REMIND to pypsa.

    Args:
        fom (pd.DataFrame): DataFrame containing REMIND FOM data.
    Returns:
        pd.DataFrame: Transformed FOM data.
    """
    fom.loc[:, "value"] *= UNIT_CONVERSION["FOM"]
    fom = fom.assign(source=fom.technology + " REMIND")
    fom = fom.assign(unit="percent", parameter="FOM")

    return fom

transform_lifetime(lifetime)

Transform the lifetime data from REMIND to pypsa.

Parameters:

Name Type Description Default
lifetime DataFrame

DataFrame containing REMIND lifetime data.

required

Returns: pd.DataFrame: Transformed lifetime data.

Source code in src/rpycpl/technoecon_etl.py
245
246
247
248
249
250
251
252
253
254
def transform_lifetime(lifetime: pd.DataFrame) -> pd.DataFrame:
    """Transform the lifetime data from REMIND to pypsa.

    Args:
        lifetime (pd.DataFrame): DataFrame containing REMIND lifetime data.
    Returns:
        pd.DataFrame: Transformed lifetime data.
    """
    lifetime = lifetime.assign(unit="years", source=lifetime.technology + " REMIND", inplace=True)
    return lifetime

transform_vom(vom)

Transform the Variable Operational Maintenance data from REMIND to pypsa. Args: vom (pd.DataFrame): DataFrame containing REMIND VOM data. Returns: pd.DataFrame: Transformed VOM data.

Source code in src/rpycpl/technoecon_etl.py
257
258
259
260
261
262
263
264
265
266
def transform_vom(vom: pd.DataFrame) -> pd.DataFrame:
    """Transform the Variable Operational Maintenance data from REMIND to pypsa.
    Args:
        vom (pd.DataFrame): DataFrame containing REMIND VOM data.
    Returns:
        pd.DataFrame: Transformed VOM data.
    """
    vom.loc[:, "value"] *= UNIT_CONVERSION["VOM"]
    vom = vom.assign(unit="USD/MWh", source=vom.technology + " REMIND", parameter="VOM")
    return vom

validate_mappings(mappings)

validate the mapping of the technologies to pypsa technologies Args: mappings (pd.DataFrame): DataFrame containing the mapping funcs and names from REMIND to pypsa technologies. Raises: ValueError: if mappers not allowed ValueError: if columns not expected ValueError: if proxy learning (use_remind_with_learning_from) is used for something other than invest

Source code in src/rpycpl/technoecon_etl.py
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
def validate_mappings(mappings: pd.DataFrame):
    """validate the mapping of the technologies to pypsa technologies
    Args:
        mappings (pd.DataFrame): DataFrame containing the mapping funcs and names
            from REMIND to pypsa technologies.
    Raises:
        ValueError: if mappers not allowed
        ValueError: if columns not expected
        ValueError: if proxy learning (use_remind_with_learning_from) is used
            for something other than invest

    """

    # validate columns
    EXPECTED_COLUMNS = [
        "PyPSA_tech",
        "parameter",
        "mapper",
        "reference",
        "unit",
        "comment",
    ]
    if not sorted(mappings.columns) == sorted(EXPECTED_COLUMNS):
        raise ValueError(f"Invalid mapping. Allowed columns are: {EXPECTED_COLUMNS}")

    # validate mappers allowed
    forbidden_mappers = set(mappings.mapper.unique()).difference(MAPPING_FUNCTIONS)
    if forbidden_mappers:
        raise ValueError(f"Forbidden mappers found in mappings: {forbidden_mappers}")

    # validate proxy learning
    proxy_learning = mappings.query("mapper == 'use_remind_with_learning_from'")
    proxy_params = set(proxy_learning.parameter)
    if proxy_params.difference({"investment"}):
        raise ValueError(f"Proxy learning is only allowed for investment but Found: {proxy_params}")

    # validate numeric
    set_vals = mappings.query("mapper == 'set_value'")["reference"]
    try:
        set_vals.astype(float)
    except ValueError as e:
        raise ValueError(f"set_value reference values must be numeric but: {e}")

    # check uniqueness
    counts = mappings.groupby(["PyPSA_tech", "parameter"]).count()
    repeats = counts[counts.values > 1]
    if len(repeats):
        raise ValueError(f"Mappings are not unique: n repeats:\n {repeats} ")
    # should validate that remind references are actually in the remind export

    if "value" in mappings.columns:
        raise ValueError("Mapping has an unexpected 'value' column")

validate_output(df_out, costs_remind)

validate the output data Args: df_out (pd.DataFrame): DataFrame containing the output data costs_remind (pd.DataFrame): DataFrame containing the formatted remind data

Source code in src/rpycpl/technoecon_etl.py
633
634
635
636
637
638
639
640
641
642
643
644
def validate_output(df_out: pd.DataFrame, costs_remind: pd.DataFrame):
    """validate the output data
    Args:
        df_out (pd.DataFrame): DataFrame containing the output data
        costs_remind (pd.DataFrame): DataFrame containing the formatted remind data
    """

    missing_vals = df_out.value.isna().any()
    if missing_vals:
        raise ValueError(f"Missing values or nans in output data: {df_out[df_out.value.isna()]}")

    n_expected = costs_remind.technology.nunique() * costs_remind.year.nunique()

validate_remind_data(costs_remind, mappings)

validate the remind cost data Args: remind_data (pd.DataFrame): DataFrame containing the remind data

Source code in src/rpycpl/technoecon_etl.py
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
def validate_remind_data(costs_remind: pd.DataFrame, mappings: pd.DataFrame):
    """validate the remind cost data
    Args:
        remind_data (pd.DataFrame): DataFrame containing the remind data
    """
    requested_data = mappings.query("mapper.str.contains('remind')")[
        ["PyPSA_tech", "parameter", "reference"]
    ].explode("reference")
    data = requested_data.explode("reference").merge(
        costs_remind.rename(columns={"technology": "reference"}),
        on=["parameter", "reference"],
        how="left",
    )
    data = data[["PyPSA_tech", "reference", "year", "parameter", "value"]]
    missing = data[(data.isna()).any(axis=1)]
    if not missing.empty:
        raise ValueError(
            f"Missing data in REMIND for (first <10 rows)\n{missing.drop_duplicates().head(10)}"
            "\nCheck the mappings and the remind data."
            " Hint: are your reference lists consistently separated by ',' or ', '?"
        )