Skip to content

Plot statistics

Plot statistical analysis and summary charts for energy system results.

This module creates statistical plots including capacity factors, cost breakdowns, energy balances, and other key performance indicators for the PyPSA-China model. Adapted from PyPSA-Eur by PyPSA-China authors.

add_second_xaxis(data, ax, label, **kwargs)

Add a secondary X-axis to the plot.

Parameters:

Name Type Description Default
data Series

The data to plot. Its values will be plotted on the secondary X-axis.

required
ax Axes

The main matplotlib Axes object.

required
label str

The label for the secondary X-axis.

required
**kwargs

Optional keyword arguments for plot styling.

{}
Source code in workflow/scripts/plot_statistics.py
def add_second_xaxis(data: pd.Series, ax, label, **kwargs):
    """
    Add a secondary X-axis to the plot.

    Args:
        data (pd.Series): The data to plot. Its values will be plotted on the secondary X-axis.
        ax (matplotlib.axes.Axes): The main matplotlib Axes object.
        label (str): The label for the secondary X-axis.
        **kwargs: Optional keyword arguments for plot styling.
    """
    defaults = {"color": "red", "text_offset": 0.5, "markersize": 8, "fontsize": 9}
    kwargs.update(defaults)

    ax2 = ax.twiny()
    # y_pos creates a sequence of integers (e.g., [0, 1, 2, 3]) to serve
    # as distinct vertical positions for each data point on the shared Y-axis.
    # This is necessary because data.values are plotted horizontally on the
    # secondary X-axis (ax2), requiring vertical separation for clarity.
    y_pos = range(len(data))

    ax2.plot(
        data.values,
        y_pos,
        marker="o",
        linestyle="",
        color=kwargs["color"],
        markersize=kwargs["markersize"],
        label="Generation Share (%)",
    )

    for i, val in enumerate(data.values):
        ax2.text(
            val + kwargs["text_offset"],
            i,
            f"{val:.1f}%",
            color=kwargs["color"],
            va="center",
            ha="left",
            fontsize=kwargs["fontsize"],
        )

    ax2.set_xlim(left=0)
    ax2.set_xlabel(label)
    ax2.grid(False)
    ax2.tick_params(axis="x", labelsize=kwargs["fontsize"])  # Remove color setting for ticks

    return ax2

filter_small_caps(n, threshold=100)

Drop small capacities for plotting (eliminate numerical zeroes) -> this would be more robust based on the objective cost tolerance

Parameters:

Name Type Description Default
n Network

the pypsa network to remove small comps from

required
threshold int

the removal threshold. Defaults to 100.

100
Source code in workflow/scripts/plot_statistics.py
def filter_small_caps(n: pypsa.Network, threshold=100):
    """Drop small capacities for plotting (eliminate numerical zeroes)
    -> this would be more robust based on the objective cost tolerance

    Args:
        n (pypsa.Network): the pypsa network to remove small comps from
        threshold (int, optional): the removal threshold. Defaults to 100.
    """
    for c in ["links", "generators", "stores", "storage_units"]:
        attr = "e_nom_opt" if c == "stores" else "p_nom_opt"
        comp = getattr(n, c)
        mask = comp[attr] > threshold
        comp = comp.loc[mask]
        setattr(n, c, comp)

fix_load_carriers(n, config)

Set unspecified load carriers to load

Parameters:

Name Type Description Default
n Network

The PyPSA network instance.

required
config dict

the plotting config

required
Source code in workflow/scripts/plot_statistics.py
def fix_load_carriers(n: pypsa.Network, config: dict):
    """Set unspecified load carriers to load

    Args:
        n (pypsa.Network): The PyPSA network instance.
        config (dict): the plotting config
    """
    mask = n.loads.query("carrier==''").index
    n.loads.loc[mask, "carrier"] = "Load"
    n.carriers.loc["Load", ["nice_name", "color"]] = (
        "Load",
        config["tech_colors"]["electric load"],
    )

format_axis_label(name, unit)

Format axis label by cleaning name and conditionally adding unit.

Parameters:

Name Type Description Default
name str

The name/description for the axis.

required
unit str

The unit of measurement (may be empty/None).

required

Returns:

Name Type Description
str str

Formatted label with underscores replaced by spaces, capitalized, and unit only included if non-empty.

Source code in workflow/scripts/plot_statistics.py
def format_axis_label(name: str, unit: str) -> str:
    """Format axis label by cleaning name and conditionally adding unit.

    Args:
        name (str): The name/description for the axis.
        unit (str): The unit of measurement (may be empty/None).

    Returns:
        str: Formatted label with underscores replaced by spaces,
            capitalized, and unit only included if non-empty.
    """
    clean_name = name.replace("_", " ").capitalize()
    if unit and unit.strip():
        return f"{clean_name} [{unit}]"
    return clean_name

plot_capacity_factor(cf_filtered, theo_cf_filtered, ax, colors, **kwargs)

Plot actual and theoretical capacity factors for each technology.

Parameters:

Name Type Description Default
cf_filtered Series

Actual capacity factors indexed by technology.

required
theo_cf_filtered Series

Theoretical capacity factors indexed by technology.

required
ax Axes

The axis to plot on.

required
colors dict

Color mapping for technologies.

required

Returns:

Type Description

matplotlib.axes.Axes: The axis with the plot.

Source code in workflow/scripts/plot_statistics.py
def plot_capacity_factor(
    cf_filtered: pd.Series, theo_cf_filtered: pd.Series, ax: axes.Axes, colors: dict, **kwargs
):
    """
    Plot actual and theoretical capacity factors for each technology.

    Args:
        cf_filtered (pd.Series): Actual capacity factors indexed by technology.
        theo_cf_filtered (pd.Series): Theoretical capacity factors indexed by technology.
        ax (matplotlib.axes.Axes): The axis to plot on.
        colors (dict): Color mapping for technologies.

    Returns:
        matplotlib.axes.Axes: The axis with the plot.
    """
    x_pos = range(len(cf_filtered))
    width = 0.35

    ax.barh(
        [i - width / 2 for i in x_pos],
        cf_filtered.values,
        width,
        color=[colors.get(tech, "lightgrey") for tech in cf_filtered.index],
        alpha=0.8,
        label="Actual CF",
    )
    ax.barh(
        [i + width / 2 for i in x_pos],
        theo_cf_filtered.values,
        width,
        color=[colors.get(tech, "lightgrey") for tech in theo_cf_filtered.index],
        alpha=0.4,
        label="Theoretical CF",
    )

    for i, (tech, cf_val) in enumerate(cf_filtered.items()):
        ax.text(
            cf_val + 0.01,
            i - width / 2,
            f"{cf_val:.2f}",
            va="center",
            ha="left",
            fontsize=8,
            bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.8),
        )
        theo_val = theo_cf_filtered.get(tech, 0)
        ax.text(
            theo_val + 0.01,
            i + width / 2,
            f"{theo_val:.2f}",
            va="center",
            ha="left",
            fontsize=8,
            bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.5),
        )

    ax.set_yticks(list(x_pos))
    ax.set_yticklabels([label.capitalize() for label in cf_filtered.index])
    ax.set_xlabel("Capacity Factor")
    ax.set_xlim(0, max(cf_filtered.max(), theo_cf_filtered.max()) * 1.1)
    ax.grid(False)
    ax.legend()

    return ax

plot_province_peakload_capacity(df_plot, bar_cols, color_list, outp_dir)

Plot province peak load vs installed capacity by technology.

Parameters:

Name Type Description Default
df_plot

DataFrame with provinces as index, columns as technologies and 'Peak Load'.

required
bar_cols

List of technology columns to plot as bars.

required
color_list

List of colors for each technology.

required
outp_dir

Output directory for saving the figure.

required
Source code in workflow/scripts/plot_statistics.py
def plot_province_peakload_capacity(df_plot, bar_cols, color_list, outp_dir):
    """
    Plot province peak load vs installed capacity by technology.

    Args:
        df_plot: DataFrame with provinces as index, columns as technologies and 'Peak Load'.
        bar_cols: List of technology columns to plot as bars.
        color_list: List of colors for each technology.
        outp_dir: Output directory for saving the figure.
    """
    fig, ax = plt.subplots(figsize=(14, 8))

    df_plot[bar_cols].plot(kind="barh", stacked=True, ax=ax, color=color_list, alpha=0.8)
    # Plot peak load as red vertical line
    for i, prov in enumerate(df_plot.index):
        ax.plot(
            df_plot.loc[prov, "Peak Load"],
            i,
            "r|",
            markersize=18,
            label="Peak Load" if i == 0 else "",
        )
    ax.set_xlabel("Capacity [GW]")
    ax.set_ylabel("Province")
    ax.set_title("Peak Load vs Installed Capacity by Province")
    ax.grid(False)
    # Only keep one Peak Load legend
    handles, labels = ax.get_legend_handles_labels()
    seen = set()
    new_handles, new_labels = [], []
    for h, l in zip(handles, labels):
        if l not in seen:
            new_handles.append(h)
            new_labels.append(l)
            seen.add(l)
    ax.legend(new_handles, new_labels, loc="best")
    fig.tight_layout()
    fig.savefig(os.path.join(outp_dir, "province_peakload_capacity.png"))

plot_static_per_carrier(ds, ax, colors, drop_zero_vals=True, add_labels=True, autofigsize=True)

Generic function to plot different statics

Parameters:

Name Type Description Default
ds Series

the data to plot

required
ax Axes

plotting axes

required
colors Series

colors for the carriers

required
drop_zero_vals bool

Drop zeroes from data. Defaults to True.

True
add_labels bool

Add value labels on bars. Defaults to True.

True
autofigsize bool

Automatically size figure based on number of bars. Defaults to True.

True
Source code in workflow/scripts/plot_statistics.py
def plot_static_per_carrier(
    ds: pd.Series,
    ax: axes.Axes,
    colors: pd.Series,
    drop_zero_vals=True,
    add_labels=True,
    autofigsize=True,
):
    """Generic function to plot different statics

    Args:
        ds (pd.Series): the data to plot
        ax (matplotlib.axes.Axes): plotting axes
        colors (pd.Series): colors for the carriers
        drop_zero_vals (bool, optional): Drop zeroes from data. Defaults to True.
        add_labels (bool, optional): Add value labels on bars. Defaults to True.
        autofigsize (bool, optional): Automatically size figure based on number
            of bars. Defaults to True.
    """
    if drop_zero_vals:
        ds = ds[ds != 0]
    ds = ds.dropna()

    n_bars = len(ds)

    # Determine figure size
    if autofigsize:
        bar_height = 0.4  # Height per bar in inches
        fig_height = max(4, min(n_bars * bar_height, 16))  # Between 4 and 16 inches
        figsize = (8, fig_height)
    else:
        figsize = None  # Use matplotlib default

    # Create or get figure
    if not ax:
        fig, ax = plt.subplots(figsize=figsize)
    else:
        fig = ax.get_figure()
        if autofigsize:
            fig.set_size_inches(8, fig_height)

    c = colors[ds.index.get_level_values("carrier")]
    ds = ds.pipe(rename_index)
    label = format_axis_label(ds.attrs["name"], ds.attrs["unit"])
    ds.plot.barh(color=c.values, xlabel=label, ax=ax, alpha=0.9)

    # Remove y-axis label
    ax.set_ylabel("")

    # Adjust spacing between bars
    ax.margins(y=0.01)

    if add_labels:
        ymax = ax.get_xlim()[1] * 1.05
        for i, (index, value) in enumerate(ds.items()):
            align = "left"
            txt = f"{value:.2f}" if value <= 100 else f"{value:.1e}"
            ax.text(ymax, i, txt, va="center", ha=align, fontsize=8)
        # # Add outer y-ticks at the right y-axis frame
        # ax.tick_params(axis="y", direction="out", right=True, left=False)
    ax.grid(axis="y")
    fig.tight_layout()

    return fig

prepare_capacity_factor_data(n, carrier)

Prepare Series for actual and theoretical capacity factors per technology.

Parameters:

Name Type Description Default
n Network

The PyPSA network instance.

required
carrier str

The carrier for which to prepare the data.

required

Returns:

Name Type Description
cf_filtered

Series of actual capacity factors (index: nice_name)

theo_cf_filtered

Series of theoretical capacity factors (index: nice_name)

Source code in workflow/scripts/plot_statistics.py
def prepare_capacity_factor_data(n: pypsa.Network, carrier: str):
    """
    Prepare Series for actual and theoretical capacity factors per technology.

    Args:
        n (pypsa.Network): The PyPSA network instance.
        carrier (str): The carrier for which to prepare the data.

    Returns:
        cf_filtered: Series of actual capacity factors (index: nice_name)
        theo_cf_filtered: Series of theoretical capacity factors (index: nice_name)
    """
    cf_data = n.statistics.capacity_factor(groupby=["carrier"]).dropna()
    if ("Link", "battery") in cf_data.index:
        cf_data.loc[("Link", "battery charger")] = cf_data.loc[("Link", "battery")]
        cf_data.drop(index=("Link", "battery"), inplace=True)
    cf_data = cf_data.groupby(level=1).mean()

    # Theoretical capacity factor
    gen = n.generators.copy()
    p_max_pu = n.generators_t.p_max_pu
    gen["p_nom_used"] = gen["p_nom_opt"].fillna(gen["p_nom"])
    weighted_energy_per_gen = (p_max_pu * gen["p_nom_used"]).sum()
    gen["weighted_energy"] = weighted_energy_per_gen

    gen["nice_name"] = gen["carrier"].map(
        lambda x: n.carriers.loc[x, "nice_name"] if x in n.carriers.index else x
    )
    grouped_energy = gen.groupby("nice_name")["weighted_energy"].sum()
    grouped_capacity = gen.groupby("nice_name")["p_nom_used"].sum()
    theoretical_cf_weighted = grouped_energy / grouped_capacity / len(n.snapshots)

    # Only keep technologies present in both actual and theoretical CF
    common_techs = cf_data.index.intersection(theoretical_cf_weighted.index)
    cf_filtered = cf_data.loc[common_techs]
    theo_cf_filtered = theoretical_cf_weighted.loc[cf_filtered.index]
    # Todo: use config nondispatchable_techs
    non_zero_mask = (cf_filtered != 0) & (theo_cf_filtered != 0)
    cf_filtered = cf_filtered[non_zero_mask]
    theo_cf_filtered = theo_cf_filtered[non_zero_mask]
    cf_filtered = cf_filtered.sort_values(ascending=True)
    theo_cf_filtered = theo_cf_filtered.loc[cf_filtered.index]

    return cf_filtered, theo_cf_filtered

prepare_province_peakload_capacity_data(n, attached_carriers=None)

Prepare DataFrame for province peak load and installed capacity by technology.

Returns:

Name Type Description
df_plot

DataFrame with provinces as index, columns as technologies and 'Peak Load'.

bar_cols

List of technology columns to plot as bars.

color_list

List of colors for each technology.

Source code in workflow/scripts/plot_statistics.py
def prepare_province_peakload_capacity_data(n, attached_carriers=None):
    """
    Prepare DataFrame for province peak load and installed capacity by technology.

    Returns:
        df_plot: DataFrame with provinces as index, columns as technologies and 'Peak Load'.
        bar_cols: List of technology columns to plot as bars.
        color_list: List of colors for each technology.
    """
    # Calculate peak load per province
    load = n.loads.copy()
    load["province"] = load["bus"].map(n.buses["location"])
    peak_load = n.loads_t.p_set.groupby(load["province"], axis=1).sum().max()
    peak_load = peak_load / PLOT_CAP_UNITS  # ensure peak load is in GW

    # Calculate installed capacity per province and technology using optimal_capacity
    ds = n.statistics.optimal_capacity(groupby=["location", "carrier"]).dropna()
    valid_components = ["Generator", "StorageUnit", "Link"]
    ds = ds.loc[ds.index.get_level_values(0).isin(valid_components)]
    if ("Link", "battery") in ds.index:
        ds.loc[("Link", "battery charger")] = ds.loc[("Link", "battery")]
        ds = ds.drop(index=("Link", "battery"))
    if "stations" in ds.index.get_level_values(2):
        ds = ds.drop("stations", level=2)
    if "load shedding" in ds.index.get_level_values(2):
        ds = ds.drop("load shedding", level=2)
    ds = ds.groupby(level=[1, 2]).sum()
    ds.index = pd.MultiIndex.from_tuples(
        [
            (prov, n.carriers.loc[carrier, "nice_name"] if carrier in n.carriers.index else carrier)
            for prov, carrier in ds.index
        ],
        names=["province", "nice_name"],
    )
    cap_by_prov_tech = ds.unstack(level=-1).fillna(0)
    cap_by_prov_tech = cap_by_prov_tech.abs() / PLOT_CAP_UNITS

    if "Battery Discharger" in cap_by_prov_tech.columns:
        cap_by_prov_tech = cap_by_prov_tech.drop(columns="Battery Discharger")
    if "AC" in cap_by_prov_tech.columns:
        cap_by_prov_tech = cap_by_prov_tech.drop(columns="AC")
    # Only keep columns in attached_carriers if provided
    if attached_carriers is not None:
        # Ensure nice_name mapping for attached_carriers
        attached_nice_names = [
            n.carriers.loc[c, "nice_name"] if c in n.carriers.index else c
            for c in attached_carriers
        ]
        cap_by_prov_tech = cap_by_prov_tech[
            [c for c in cap_by_prov_tech.columns if c in attached_nice_names]
        ]

    # Merge peak load and capacity
    df_plot = cap_by_prov_tech.copy()
    df_plot["Peak Load"] = peak_load

    # Bar columns: exclude Peak Load, only keep nonzero
    bar_cols = [c for c in df_plot.columns if c != "Peak Load"]
    bar_cols = [c for c in bar_cols if df_plot[c].sum() > 0]
    color_list = [
        n.carriers.set_index("nice_name").color.get(tech, "lightgrey") for tech in bar_cols
    ]
    return df_plot, bar_cols, color_list

Set link capacity to output and not input. PyPSA uses input link capacities but typically want to report output capacities (e.g MWel)

Parameters:

Name Type Description Default
n Network

The PyPSA network instance.

required
carriers list

List of carrier names to adjust.

required

Returns:

Type Description
DataFrame

pd.DataFrame: the original link capacities.

Source code in workflow/scripts/plot_statistics.py
def set_link_output_capacities(n: pypsa.Network, carriers: list) -> pd.DataFrame:
    """Set link capacity to output and not input.
    PyPSA uses input link capacities but typically want to report output
    capacities (e.g MWel)

    Args:
        n (pypsa.Network): The PyPSA network instance.
        carriers (list): List of carrier names to adjust.

    Returns:
        pd.DataFrame: the original link capacities.
    """
    # Temporarily save original link capacities
    original_p_nom_opt = n.links.p_nom_opt.copy()

    # For links where bus1 is AC, multiply capacity by efficiency
    # coefficient to get AC side capacity
    ac_links = n.links[n.links.bus1.map(n.buses.carrier).isin(carriers)].index
    n.links.loc[ac_links, "p_nom_opt"] *= n.links.loc[ac_links, "efficiency"]

    # ignore lossy link dummies
    pseudo_links = n.links.query("Link.str.contains('reversed') & capital_cost ==0 ").index
    n.links.loc[pseudo_links, "p_nom_opt"] = 0

    return original_p_nom_opt