Source code for aerosandbox.tools.webplotdigitizer_reader
"""
A series of utilities for working with CSV data extracted from WebPlotDigitizer.
https://automeris.io/WebPlotDigitizer/
https://github.com/ankitrohatgi/WebPlotDigitizer
"""
import numpy as np
from typing import Dict
from pathlib import Path
from typing import Union
[docs]def string_to_float(s: str) -> float:
"""Converts a string input to a float. If not possible, returns NaN."""
try:
return float(s)
except ValueError:
return np.nan
[docs]def remove_nan_rows(a: np.ndarray) -> np.ndarray:
"""Removes any rows in a 2D ndarray where any of the entries are NaN."""
nan_rows = np.any(np.isnan(a), axis=1)
return a[~nan_rows, :]
[docs]def read_webplotdigitizer_csv(
filename: Union[Path, str],
) -> Dict[str, np.ndarray]:
"""
Reads a CSV file produced by WebPlotDigitizer (https://automeris.io/WebPlotDigitizer/).
If there's only one data series, produces a Dict with key "data" and value 2D ndarray.
If there are multiple data series, produces a Dict with keys of the names and values of 2D ndarrays.
2D ndarrays are sorted by their X-values before being returned.
Args:
filename: Filename, as a string or pathlib Path, or equivalent.
Returns: A dictionary where keys are series names and values are data points.
"""
delimiter = ","
with open(filename, "r") as f:
lines = f.readlines()
has_titles = np.any([
np.isnan(string_to_float(s))
for s in lines[0].split(delimiter)
])
if has_titles:
titles = lines[0].split(delimiter)[::2]
first_data_row = 2
else:
titles = ["data"]
first_data_row = 0
all_data = np.array([
[string_to_float(item) for item in line.split(delimiter)]
for line in lines[first_data_row:]
], dtype=float)
output = {}
for i, title in enumerate(titles):
series = all_data[:, 2 * i: 2 * i + 2]
all_nan_rows = np.all(np.isnan(series), axis=1)
series = series[~all_nan_rows, :]
sort_order = np.argsort(series[:, 0])
output[title] = series[sort_order, :]
return output