External data¶
Some datasets are large. To avoid having large JSON-documents with all the data for all scenarios, data can be sideloaded in an external data store. Currently CSV, HDF5, and SQLite are supported. The easiest way to create a dataset with either method is to use the helper methods in create_csv_timeseries.py, create_hdf5_timeseries.py, or create_sqlite_timeseries.py. See the complete listings below.
CSV¶
The structure for CSV-files is a 2D-array, where each line is starting with one timestamp in milliseconds (UNIX epoch), and all values for all scenarios are comma-separated.
[timestamp_ms],[value_scenario_0],...,[scenario_n]
# timestamp_ms, scenario_0, scenario_1, scenario_2, scenario_3, scenario_4, scenario_5, scenario_6, scenario_7, scenario_8, scenario_9, scenario_10
1,1, 1, 101, 201, 301, 401, 501, 601, 701, 801, 901
2,2, 2, 102, 202, 302, 402, 502, 602, 702, 802, 902
3,3, 3, 103, 203, 303, 403, 503, 603, 703, 803, 903
Warning
Lines starting with ‘#’ are ignored. The parser is very bare-bones and will choke on regular CSV-headers unless starting with ‘#’. It is adviced to not use headers in CSV-files.
Usage with txy:
"external_reference": {
"type": "csv",
"filename": "example_dataset.csv"
},
HDF5¶
"external_reference": {
"type": "hdf5",
"filename": "example_dataset.h5",
"path": "/global/calc_water_value_period"
},
name : optional, string¶
SQLite¶
Usage with txy:
"external_reference": {
"type": "sqlite",
"filename": "example_dataset.sqlite"
"path": "table_name"
},
CSV Example¶
Create CSV timeseries
import csv
import os
import numpy as np
def unix_epoch(dt: np.datetime64):
"""
Converts a numpy datetime to UNIX timestamp in milliseconds resolution
"""
v = np.datetime64(dt).astype("datetime64[ms]").astype("int64")
return v
def create_csv_timeseries(file: str, timestamps, scenarios):
timestamps_unix = list(map(lambda r: unix_epoch(r), timestamps))
num_timestamps = len(timestamps_unix)
num_scenarios = len(scenarios)
scenariolabels = ["Scenario_{}".format(num) for num in range(num_scenarios)]
with open(file, "w", newline="") as csvfile:
csvwriter = csv.writer(
csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
csvwriter.writerow(["# Timestamps MS"] + scenariolabels)
for n in range(num_timestamps):
ts = timestamps_unix[n] # Timestamp/unix epoch in seconds
scen = scenarios[:, n] # Slice with all scenarios for given timestamp
# ts.append(scen)
elements = [ts] + list(scen)
# print(ts)
csvwriter.writerow(elements)
pass
pass
pass
if __name__ == "__main__":
create_csv_timeseries(
"water_value_period.csv",
["2024-01-01T00:00:00", "2026-12-28T00:00:00"],
np.array([[1, 0]]),
)
H5 Example¶
Create HDF5 timeseries
import os
import h5py
import numpy as np
from create_csv_timeseries import create_csv_timeseries
from create_sqlite_timeseries import create_sqlite_timeseries
def unix_epoch(dt: np.datetime64):
"""
Converts a numpy datetime to UNIX timestamp in milliseconds resolution
"""
v = np.datetime64(dt).astype("datetime64[ms]").astype("int64")
return v
def create_hdf5_timeseries(file: str, group: str, timestamps, scenarios):
"""
Creates a timeseries compatible HDF5 dataset, returns the structured
data compatible with the LTM API JSON external_reference schema.
`scenario` is a double list containing scenarios and values. It represents
a 2D matrix.
"""
if len(timestamps) <= 0 or len(scenarios) <= 0:
print(f"Not creating a empty dataset for {group}")
return
if len(timestamps) != len(scenarios[0]):
print(f"Count timestamps != count values for {group}")
return
timestamps_unix = list(map(lambda r: unix_epoch(r), timestamps))
num_timestamps = len(timestamps_unix)
num_scenarios = len(scenarios)
with h5py.File(file, "a") as f:
grp = f.require_group(group)
times = grp.require_dataset(
"times", shape=(None), dtype="int64", data=timestamps_unix
)
# Gammel feil-transpose
vals = grp.require_dataset(
"vals",
shape=(num_timestamps, num_scenarios),
chunks=(min(num_timestamps, 10000), 1),
maxshape=(num_timestamps, num_scenarios),
dtype="float32",
data=scenarios,
)
# vals = grp.require_dataset(
# "vals",
# shape=(num_scenarios, num_timestamps),
# chunks=(1, min(num_timestamps, 10000)),
# maxshape=(num_scenarios, num_timestamps),
# dtype="float32",
# data=scenarios,
# )
j = {"external_reference": {"type": "hdf5", "filename": file, "path": group}}
# Uncomment to dump JSON
# print(json.dumps(j, indent=2))
return j
return {}
if __name__ == "__main__":
file = "example_dataset.h5"
if os.path.isfile(file):
os.remove(file)
# 2D test
create_hdf5_timeseries(
file,
"/test/test_2d",
[
"2024-01-01T00:00:00",
"2024-01-15T00:00:00",
"2024-01-31T00:00:00",
"2027-01-01T00:00:00",
],
[
[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
],
)
# Global settings
create_hdf5_timeseries(
file,
"/global/calc_water_value_period",
*(["2024-01-01T00:00:00", "2026-12-28T00:00:00"], [[1, 0]]),
)
create_hdf5_timeseries(
file,
"/global/simulation_price_period",
*(
[
"2024-01-01T00:00:00",
"2024-01-02T00:00:00",
"2024-01-03T00:00:00",
"2024-01-04T00:00:00",
"2024-01-05T00:00:00",
],
[[1, 2, 3, 4, 5]],
),
)
# Defaults for spill cost / load penalty
spill_cost = (["1900-01-01T00:00:00"], np.array([[0.0001]]))
load_penalty = (["1900-01-01T00:00:00"], np.array([[900.0]]))
create_hdf5_timeseries(file, "/defaults/spill_cost", *spill_cost)
create_hdf5_timeseries(file, "/defaults/load_penalty", *load_penalty)
create_csv_timeseries("example_spill_cost.csv", *spill_cost)
create_csv_timeseries("example_load_penalty.csv", *load_penalty)
# DCLines
dcline_a_fwd_capacity = (
[
"2024-01-01T00:00:00",
"2025-01-01T00:00:00",
"2026-01-01T00:00:00",
"2027-01-01T00:00:00",
],
[[50, 75, 100, 95]],
)
dcline_a_bck_capacity = (
[
"2024-01-01T00:00:00",
"2025-01-01T00:00:00",
"2026-01-01T00:00:00",
"2027-01-01T00:00:00",
],
[[1, 2, 3, 7]],
)
create_hdf5_timeseries(file, "/dclines/dcline_a_fwd", *dcline_a_fwd_capacity)
create_hdf5_timeseries(file, "/dclines/dcline_a_bck", *dcline_a_bck_capacity)
# Market steps
create_hdf5_timeseries(
file,
"/market/numedal_import/price",
[
"2024-01-01T00:00:00",
"2024-01-15T00:00:00",
"2024-01-31T00:00:00",
"2027-01-01T00:00:00",
],
[[12, 24, 12, 423]],
)
create_sqlite_timeseries(
"example_dataset.sqlite",
"market_numedal_import_price",
[
"2024-01-01T00:00:00",
"2024-01-15T00:00:00",
"2024-01-31T00:00:00",
"2027-01-01T00:00:00",
],
np.array([[12, 24, 12, 423]]),
)
create_csv_timeseries(
"example_numedal_import_price.csv",
[
"2024-01-01T00:00:00",
"2024-01-15T00:00:00",
"2024-01-31T00:00:00",
"2027-01-01T00:00:00",
],
np.array([[12, 24, 12, 423]]),
)
create_hdf5_timeseries(
file,
"/market/numedal_import/capacity",
[
"2024-01-01T00:00:00",
"2024-01-15T00:00:00",
"2024-01-31T00:00:00",
"2027-01-01T00:00:00",
],
[[0.0, 6.2815, 0.000001, 0.0]],
)
create_hdf5_timeseries(
file,
"/market/numedal_export/price",
[
"2024-01-01T00:00:00",
"2024-01-15T00:00:00",
"2024-01-31T00:00:00",
"2027-01-01T00:00:00",
],
[[12, 24, 12, 423]],
)
create_hdf5_timeseries(
file,
"/market/numedal_export/capacity",
[
"2024-01-01T00:00:00",
"2024-01-15T00:00:00",
"2024-01-31T00:00:00",
"2027-01-01T00:00:00",
],
[[0.0, -6.2815, -0.000001, 0.0]],
)
SQLite Example¶
Create SQLite timeseries
import os
import sqlite3
import numpy as np
def unix_epoch(dt: np.datetime64):
"""
Converts a numpy datetime to UNIX timestamp in milliseconds resolution
"""
v = np.datetime64(dt).astype("datetime64[ms]").astype("int64")
return v
def create_sqlite_table(conn: sqlite3.Connection, table_name: str, num_scenarios: int):
# Replace group separator from HDF5 with underscores
table_name = table_name.replace("/", "_")
scenario_names = ["scenario_{}".format(num) for num in range(num_scenarios)]
scenario_columns = ", ".join(f"{name} REAL" for name in scenario_names)
cursor = conn.cursor()
cursor.execute("PRAGMA journal_mode=WAL;")
cursor.execute("PRAGMA journal_mode;")
wal_mode = cursor.fetchone()
assert wal_mode[0] == "wal"
# Up to the user to prevent SQL injection for table name. Named
# parameters are not supported for names, only literal values.
cursor.execute(
f"""
DROP TABLE IF EXISTS {table_name};
"""
)
cursor.execute(
f"""
CREATE TABLE {table_name} (
timestamp INTEGER PRIMARY KEY,
{scenario_columns}
)
"""
)
conn.commit()
pass
def create_insert_statement(table_name: str, num_scenarios: int):
table_name = table_name.replace("/", "_")
scenario_names = ["scenario_{}".format(num) for num in range(num_scenarios)]
scenario_columns = ", ".join(f"{name}" for name in scenario_names)
scenario_placeholders = ", ".join(f"?" for name in scenario_names)
return f"INSERT INTO {table_name} (timestamp, {scenario_columns}) VALUES (?, {scenario_placeholders})"
def create_sqlite_timeseries(file: str, group: str, timestamps, scenarios):
timestamps_unix = list(map(lambda r: unix_epoch(r), timestamps))
num_timestamps = len(timestamps_unix)
num_scenarios = len(scenarios)
conn = sqlite3.connect(file)
create_sqlite_table(conn, group, num_scenarios)
insert_sql = create_insert_statement(group, num_scenarios)
cursor = conn.cursor()
for n in range(num_timestamps):
ts = int(timestamps_unix[n]) # Timestamp/unix epoch in seconds
scen = scenarios[:, n] # Slice with all scenarios for given timestamp
# ts.append(scen)
elements = [ts] + list(scen)
# print(ts)
# spamwriter.writerow(elements)
cursor.execute(insert_sql, elements)
pass
conn.commit()
pass
if __name__ == "__main__":
num_scenarios = 5
watervalue_years = 1
start_date = np.datetime64("2024-01-01")
end_date = start_date + np.timedelta64(watervalue_years * 52, "W")
timestamps = np.arange(
start_date,
end_date,
np.timedelta64(1, "h"),
)
num_values = len(timestamps)
# x = np.linspace(0, 2 * np.pi, num_values * scenarios)
# y = np.sin(x) * 2 + np.random.random(num_values * scenarios) * 0.2 + 1
x = np.linspace(0, num_values * num_scenarios, num_values * num_scenarios)
y = x
y.shape = (num_scenarios, num_values)
create_sqlite_timeseries("example", "/hdf5/group/name", timestamps, y)
pass
Example JSON file with external references:¶
JSON model
{
"$schema": "https://gitlab.sintef.no/energy/ltm/pyltmapi/-/raw/main/model.schema.json",
"model": {
"global_settings": {
"name": "global_settings",
"output_path": "testout_external/",
"calc_water_value_flag": {
"external_reference": {
"type": "csv",
"filename": "water_value_period.csv"
},
"unit": "ON/OFF"
},
"historical_period": {
"timestamps": [
"1980-01-01T00:00:00Z",
"2010-01-01T00:00:00Z"
],
"scenarios": [
[
1,
0
]
],
"unit": "ON/OFF"
},
"simulation_price_period": {
"#comment": "prisavsnitt -> PRISREKKE.DATA",
"external_reference": {
"type": "csv",
"filename": "water_value_period.csv"
}
}
},
"dclines": [
{
"#comment": "txy A->B and B->A",
"name": "DC_A",
"forward_capacity": {
"external_reference": {
"type": "hdf5",
"filename": "example_dataset.h5",
"path": "/dclines/dcline_a_fwd"
}
},
"backward_capacity": {
"external_reference": {
"type": "hdf5",
"filename": "example_dataset.h5",
"path": "/dclines/dcline_a_bck"
}
},
"loss_percentage": 1.0,
"forward_cost": 2.0,
"backward_cost": 3.0,
"metadata": {
"foo": "bar",
"baz": 42,
"object": {
"ting": "tang"
}
}
}
],
"market_steps": [
{
"name": "numedal - market 1",
"price": {
"external_reference": {
"type": "sqlite",
"filename": "example_dataset.sqlite",
"path": "market_numedal_import_price"
}
},
"capacity": {
"external_reference": {
"type": "hdf5",
"filename": "example_dataset.h5",
"path": "/market/numedal_import/capacity"
}
}
},
{
"name": "NUMEDAL-EKSPORT 1",
"price": {
"external_reference": {
"type": "hdf5",
"filename": "example_dataset.h5",
"path": "/market/numedal_export/price"
}
},
"capacity": {
"external_reference": {
"type": "hdf5",
"filename": "example_dataset.h5",
"path": "/market/numedal_export/capacity"
}
}
}
],
"main_price_model": {
"#comment": "hovedprisrekke"
},
"busbars": [
{
"name": "numedal"
},
{
"name": "otta"
},
{
"#comment": "water_value_auto_weights",
"name": "test_case_101"
},
{
"#comment": "water_value_manual_weights",
"name": "test_case_102",
"water_value_weights": {
"inflow_conditions": 1,
"weights": [
[
18,
[
1.0,
5.0,
9.0,
20.0,
9.0,
5.0,
1.0
]
],
[
40,
[
1.0,
5.0,
9.0,
20.0,
9.0,
5.0,
1.0
]
],
[
70,
[
1.0,
5.0,
9.0,
20.0,
9.0,
5.0,
1.0
]
]
]
}
}
],
"connections": [
{
"from": "numedal - market 1",
"to": "numedal"
},
{
"from": "NUMEDAL-EKSPORT 1",
"to": "numedal"
},
{
"from": "otta",
"to": "DC_A"
},
{
"from": "DC_A",
"to": "numedal"
}
]
}
}