External data

Some datasets are large. To avoid having large JSON-documents with all the data for all scenarios, data can be sideloaded in an external data store. Currently CSV, HDF5, and SQLite are supported. The easiest way to create a dataset with either method is to use the helper methods in create_csv_timeseries.py, create_hdf5_timeseries.py, or create_sqlite_timeseries.py. See the complete listings below.

CSV

The structure for CSV-files is a 2D-array, where each line is starting with one timestamp in milliseconds (UNIX epoch), and all values for all scenarios are comma-separated.

[timestamp_ms],[value_scenario_0],...,[scenario_n]
# timestamp_ms, scenario_0, scenario_1, scenario_2, scenario_3, scenario_4, scenario_5, scenario_6, scenario_7, scenario_8, scenario_9, scenario_10
1,1, 1, 101, 201, 301, 401, 501, 601, 701, 801, 901
2,2, 2, 102, 202, 302, 402, 502, 602, 702, 802, 902
3,3, 3, 103, 203, 303, 403, 503, 603, 703, 803, 903

Warning

Lines starting with ‘#’ are ignored. The parser is very bare-bones and will choke on regular CSV-headers unless starting with ‘#’. It is adviced to not use headers in CSV-files.

Usage with txy:

"external_reference": {
  "type": "csv",
  "filename": "example_dataset.csv"
},

HDF5

"external_reference": {
  "type": "hdf5",
  "filename": "example_dataset.h5",
  "path": "/global/calc_water_value_period"
},

name : optional, string

SQLite

Usage with txy:

"external_reference": {
  "type": "sqlite",
  "filename": "example_dataset.sqlite"
  "path": "table_name"
},

CSV Example

Create CSV timeseries
import csv
import os

import numpy as np


def unix_epoch(dt: np.datetime64):
    """
    Converts a numpy datetime to UNIX timestamp in milliseconds resolution
    """
    v = np.datetime64(dt).astype("datetime64[ms]").astype("int64")
    return v


def create_csv_timeseries(file: str, timestamps, scenarios):

    timestamps_unix = list(map(lambda r: unix_epoch(r), timestamps))
    num_timestamps = len(timestamps_unix)
    num_scenarios = len(scenarios)

    scenariolabels = ["Scenario_{}".format(num) for num in range(num_scenarios)]

    with open(file, "w", newline="") as csvfile:
        csvwriter = csv.writer(
            csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
        )

        csvwriter.writerow(["# Timestamps MS"] + scenariolabels)

        for n in range(num_timestamps):

            ts = timestamps_unix[n]  # Timestamp/unix epoch in seconds
            scen = scenarios[:, n]  # Slice with all scenarios for given timestamp

            # ts.append(scen)
            elements = [ts] + list(scen)

            # print(ts)
            csvwriter.writerow(elements)

            pass
        pass
    pass


if __name__ == "__main__":

    create_csv_timeseries(
        "water_value_period.csv",
        ["2024-01-01T00:00:00", "2026-12-28T00:00:00"],
        np.array([[1, 0]]),
    )

H5 Example

Create HDF5 timeseries
import os

import h5py
import numpy as np
from create_csv_timeseries import create_csv_timeseries
from create_sqlite_timeseries import create_sqlite_timeseries


def unix_epoch(dt: np.datetime64):
    """
    Converts a numpy datetime to UNIX timestamp in milliseconds resolution
    """
    v = np.datetime64(dt).astype("datetime64[ms]").astype("int64")
    return v


def create_hdf5_timeseries(file: str, group: str, timestamps, scenarios):
    """
    Creates a timeseries compatible HDF5 dataset, returns the structured
    data compatible with the LTM API JSON external_reference schema.

    `scenario` is a double list containing scenarios and values. It represents
    a 2D matrix.

    """
    if len(timestamps) <= 0 or len(scenarios) <= 0:
        print(f"Not creating a empty dataset for {group}")
        return

    if len(timestamps) != len(scenarios[0]):
        print(f"Count timestamps != count values for {group}")
        return

    timestamps_unix = list(map(lambda r: unix_epoch(r), timestamps))

    num_timestamps = len(timestamps_unix)
    num_scenarios = len(scenarios)

    with h5py.File(file, "a") as f:
        grp = f.require_group(group)

        times = grp.require_dataset(
            "times", shape=(None), dtype="int64", data=timestamps_unix
        )

        # Gammel feil-transpose
        vals = grp.require_dataset(
            "vals",
            shape=(num_timestamps, num_scenarios),
            chunks=(min(num_timestamps, 10000), 1),
            maxshape=(num_timestamps, num_scenarios),
            dtype="float32",
            data=scenarios,
        )

        # vals = grp.require_dataset(
        #     "vals",
        #     shape=(num_scenarios, num_timestamps),
        #     chunks=(1, min(num_timestamps, 10000)),
        #     maxshape=(num_scenarios, num_timestamps),
        #     dtype="float32",
        #     data=scenarios,
        # )

        j = {"external_reference": {"type": "hdf5", "filename": file, "path": group}}

        # Uncomment to dump JSON
        # print(json.dumps(j, indent=2))

        return j

    return {}


if __name__ == "__main__":

    file = "example_dataset.h5"

    if os.path.isfile(file):
        os.remove(file)

    # 2D test
    create_hdf5_timeseries(
        file,
        "/test/test_2d",
        [
            "2024-01-01T00:00:00",
            "2024-01-15T00:00:00",
            "2024-01-31T00:00:00",
            "2027-01-01T00:00:00",
        ],
        [
            [1, 2, 3, 4],
            [5, 6, 7, 8],
            [9, 10, 11, 12],
        ],
    )

    # Global settings
    create_hdf5_timeseries(
        file,
        "/global/calc_water_value_period",
        *(["2024-01-01T00:00:00", "2026-12-28T00:00:00"], [[1, 0]]),
    )

    create_hdf5_timeseries(
        file,
        "/global/simulation_price_period",
        *(
            [
                "2024-01-01T00:00:00",
                "2024-01-02T00:00:00",
                "2024-01-03T00:00:00",
                "2024-01-04T00:00:00",
                "2024-01-05T00:00:00",
            ],
            [[1, 2, 3, 4, 5]],
        ),
    )

    # Defaults for spill cost / load penalty
    spill_cost = (["1900-01-01T00:00:00"], np.array([[0.0001]]))
    load_penalty = (["1900-01-01T00:00:00"], np.array([[900.0]]))

    create_hdf5_timeseries(file, "/defaults/spill_cost", *spill_cost)
    create_hdf5_timeseries(file, "/defaults/load_penalty", *load_penalty)

    create_csv_timeseries("example_spill_cost.csv", *spill_cost)
    create_csv_timeseries("example_load_penalty.csv", *load_penalty)

    # DCLines
    dcline_a_fwd_capacity = (
        [
            "2024-01-01T00:00:00",
            "2025-01-01T00:00:00",
            "2026-01-01T00:00:00",
            "2027-01-01T00:00:00",
        ],
        [[50, 75, 100, 95]],
    )

    dcline_a_bck_capacity = (
        [
            "2024-01-01T00:00:00",
            "2025-01-01T00:00:00",
            "2026-01-01T00:00:00",
            "2027-01-01T00:00:00",
        ],
        [[1, 2, 3, 7]],
    )

    create_hdf5_timeseries(file, "/dclines/dcline_a_fwd", *dcline_a_fwd_capacity)
    create_hdf5_timeseries(file, "/dclines/dcline_a_bck", *dcline_a_bck_capacity)

    # Market steps
    create_hdf5_timeseries(
        file,
        "/market/numedal_import/price",
        [
            "2024-01-01T00:00:00",
            "2024-01-15T00:00:00",
            "2024-01-31T00:00:00",
            "2027-01-01T00:00:00",
        ],
        [[12, 24, 12, 423]],
    )

    create_sqlite_timeseries(
        "example_dataset.sqlite",
        "market_numedal_import_price",
        [
            "2024-01-01T00:00:00",
            "2024-01-15T00:00:00",
            "2024-01-31T00:00:00",
            "2027-01-01T00:00:00",
        ],
        np.array([[12, 24, 12, 423]]),
    )

    create_csv_timeseries(
        "example_numedal_import_price.csv",
        [
            "2024-01-01T00:00:00",
            "2024-01-15T00:00:00",
            "2024-01-31T00:00:00",
            "2027-01-01T00:00:00",
        ],
        np.array([[12, 24, 12, 423]]),
    )

    create_hdf5_timeseries(
        file,
        "/market/numedal_import/capacity",
        [
            "2024-01-01T00:00:00",
            "2024-01-15T00:00:00",
            "2024-01-31T00:00:00",
            "2027-01-01T00:00:00",
        ],
        [[0.0, 6.2815, 0.000001, 0.0]],
    )

    create_hdf5_timeseries(
        file,
        "/market/numedal_export/price",
        [
            "2024-01-01T00:00:00",
            "2024-01-15T00:00:00",
            "2024-01-31T00:00:00",
            "2027-01-01T00:00:00",
        ],
        [[12, 24, 12, 423]],
    )

    create_hdf5_timeseries(
        file,
        "/market/numedal_export/capacity",
        [
            "2024-01-01T00:00:00",
            "2024-01-15T00:00:00",
            "2024-01-31T00:00:00",
            "2027-01-01T00:00:00",
        ],
        [[0.0, -6.2815, -0.000001, 0.0]],
    )

SQLite Example

Create SQLite timeseries
import os
import sqlite3

import numpy as np


def unix_epoch(dt: np.datetime64):
    """
    Converts a numpy datetime to UNIX timestamp in milliseconds resolution
    """
    v = np.datetime64(dt).astype("datetime64[ms]").astype("int64")
    return v


def create_sqlite_table(conn: sqlite3.Connection, table_name: str, num_scenarios: int):
    # Replace group separator from HDF5 with underscores
    table_name = table_name.replace("/", "_")
    scenario_names = ["scenario_{}".format(num) for num in range(num_scenarios)]
    scenario_columns = ", ".join(f"{name} REAL" for name in scenario_names)

    cursor = conn.cursor()
    cursor.execute("PRAGMA journal_mode=WAL;")
    cursor.execute("PRAGMA journal_mode;")
    wal_mode = cursor.fetchone()
    assert wal_mode[0] == "wal"

    # Up to the user to prevent SQL injection for table name. Named
    # parameters are not supported for names, only literal values.
    cursor.execute(
        f"""
            DROP TABLE IF EXISTS {table_name};
        """
    )
    cursor.execute(
        f"""
            CREATE TABLE {table_name} (
                timestamp INTEGER PRIMARY KEY,
                {scenario_columns}
            )
        """
    )

    conn.commit()

    pass


def create_insert_statement(table_name: str, num_scenarios: int):
    table_name = table_name.replace("/", "_")
    scenario_names = ["scenario_{}".format(num) for num in range(num_scenarios)]
    scenario_columns = ", ".join(f"{name}" for name in scenario_names)
    scenario_placeholders = ", ".join(f"?" for name in scenario_names)

    return f"INSERT INTO {table_name} (timestamp, {scenario_columns}) VALUES (?, {scenario_placeholders})"


def create_sqlite_timeseries(file: str, group: str, timestamps, scenarios):
    timestamps_unix = list(map(lambda r: unix_epoch(r), timestamps))
    num_timestamps = len(timestamps_unix)
    num_scenarios = len(scenarios)

    conn = sqlite3.connect(file)
    create_sqlite_table(conn, group, num_scenarios)

    insert_sql = create_insert_statement(group, num_scenarios)

    cursor = conn.cursor()

    for n in range(num_timestamps):

        ts = int(timestamps_unix[n])  # Timestamp/unix epoch in seconds
        scen = scenarios[:, n]  # Slice with all scenarios for given timestamp

        # ts.append(scen)
        elements = [ts] + list(scen)

        # print(ts)
        # spamwriter.writerow(elements)
        cursor.execute(insert_sql, elements)

        pass

    conn.commit()
    pass

if __name__ == "__main__":
    num_scenarios = 5

    watervalue_years = 1
    start_date = np.datetime64("2024-01-01")
    end_date = start_date + np.timedelta64(watervalue_years * 52, "W")
    timestamps = np.arange(
        start_date,
        end_date,
        np.timedelta64(1, "h"),
    )

    num_values = len(timestamps)

    # x = np.linspace(0, 2 * np.pi, num_values * scenarios)
    # y = np.sin(x) * 2 + np.random.random(num_values * scenarios) * 0.2 + 1

    x = np.linspace(0, num_values * num_scenarios, num_values * num_scenarios)
    y = x

    y.shape = (num_scenarios, num_values)

    create_sqlite_timeseries("example", "/hdf5/group/name", timestamps, y)

    pass

Example JSON file with external references:

JSON model
{
    "$schema": "https://gitlab.sintef.no/energy/ltm/pyltmapi/-/raw/main/model.schema.json",
    "model": {
        "global_settings": {
            "name": "global_settings",
            "output_path": "testout_external/",
            "calc_water_value_flag": {
                "external_reference": {
                    "type": "csv",
                    "filename": "water_value_period.csv"
                },
                "unit": "ON/OFF"
            },
            "historical_period": {
                "timestamps": [
                    "1980-01-01T00:00:00Z",
                    "2010-01-01T00:00:00Z"
                ],
                "scenarios": [
                    [
                        1,
                        0
                    ]
                ],
                "unit": "ON/OFF"
            },
            "simulation_price_period": {
                "#comment": "prisavsnitt -> PRISREKKE.DATA",
                "external_reference": {
                    "type": "csv",
                    "filename": "water_value_period.csv"
                }
            }
        },
        "dclines": [
            {
                "#comment": "txy A->B and B->A",
                "name": "DC_A",
                "forward_capacity": {
                    "external_reference": {
                        "type": "hdf5",
                        "filename": "example_dataset.h5",
                        "path": "/dclines/dcline_a_fwd"
                    }
                },
                "backward_capacity": {
                    "external_reference": {
                        "type": "hdf5",
                        "filename": "example_dataset.h5",
                        "path": "/dclines/dcline_a_bck"
                    }
                },
                "loss_percentage": 1.0,
                "forward_cost": 2.0,
                "backward_cost": 3.0,
                "metadata": {
                    "foo": "bar",
                    "baz": 42,
                    "object": {
                        "ting": "tang"
                    }
                }
            }
        ],
        "market_steps": [
            {
                "name": "numedal - market 1",
                "price": {
                    "external_reference": {
                        "type": "sqlite",
                        "filename": "example_dataset.sqlite",
                        "path": "market_numedal_import_price"
                    }
                },
                "capacity": {
                    "external_reference": {
                        "type": "hdf5",
                        "filename": "example_dataset.h5",
                        "path": "/market/numedal_import/capacity"
                    }
                }
            },
            {
                "name": "NUMEDAL-EKSPORT 1",
                "price": {
                    "external_reference": {
                        "type": "hdf5",
                        "filename": "example_dataset.h5",
                        "path": "/market/numedal_export/price"
                    }
                },
                "capacity": {
                    "external_reference": {
                        "type": "hdf5",
                        "filename": "example_dataset.h5",
                        "path": "/market/numedal_export/capacity"
                    }
                }
            }
        ],
        "main_price_model": {
            "#comment": "hovedprisrekke"
        },
        "busbars": [
            {
                "name": "numedal"
            },
            {
                "name": "otta"
            },
            {
                "#comment": "water_value_auto_weights",
                "name": "test_case_101"
            },
            {
                "#comment": "water_value_manual_weights",
                "name": "test_case_102",
                "water_value_weights": {
                    "inflow_conditions": 1,
                    "weights": [
                        [
                            18,
                            [
                                1.0,
                                5.0,
                                9.0,
                                20.0,
                                9.0,
                                5.0,
                                1.0
                            ]
                        ],
                        [
                            40,
                            [
                                1.0,
                                5.0,
                                9.0,
                                20.0,
                                9.0,
                                5.0,
                                1.0
                            ]
                        ],
                        [
                            70,
                            [
                                1.0,
                                5.0,
                                9.0,
                                20.0,
                                9.0,
                                5.0,
                                1.0
                            ]
                        ]
                    ]
                }
            }
        ],
        "connections": [
            {
                "from": "numedal - market 1",
                "to": "numedal"
            },
            {
                "from": "NUMEDAL-EKSPORT 1",
                "to": "numedal"
            },
            {
                "from": "otta",
                "to": "DC_A"
            },
            {
                "from": "DC_A",
                "to": "numedal"
            }
        ]
    }
}