import os
import csv, json
import logging
from .utils import setup_logger
[docs]class ExperimentSuitMetrics:
"""
Uses the given path to write and read experiment metrics
and meta information.
If the last segment of the path is not existent it will be created.
Creating a new object pointing to an already existing metrics path will
reconstruct all metrics files that were written.
WARNING: The reconstructred files will be locked for appending and writing. Can be unlocked by using the unlock() method.
Parameters:
base_path (str): Where to save the experiments? No recursive creation of directories.
verbose (bool): Set debugg mode?
"""
def __init__(self, base_path, verbose=False):
self.logger = setup_logger(verbose, name="ExperimentSuitMetrics", default_log_level=logging.WARN)
self.BASE_PATH = base_path
self.META_FILE_PATH = os.path.join(base_path, ".meta.json")
self.__setup_dir(base_path)
# Keep track of written experiment metrics (Code 0=File was loaded, 1=File created)
self.experiment_files = {}
# CSV Parameters
self.delimiter = " "
self.quotechar = "\""
self.quoting = csv.QUOTE_MINIMAL
self.__load_experiments()
def __setup_dir(self, path):
"""
Setup a directory for a suit of experiment metrics.
"""
# Try to create directory if non existen
if not os.path.exists(path):
os.mkdir(path)
# Create non-existent meta.json file
if not os.path.exists(self.META_FILE_PATH):
# base_content = {"models": [], "dataset": {}, "params": {}, "acquisition_function": [], "run": []}
base_content = {"experiments": []}
self.write_meta(base_content)
# ----------
# Read/Write files
# -------------------------
[docs] def write_line(self, experiment_name, values, filter_keys=None, filter_nan=True):
"""
Writes a new line into one of the experiment files.
Creating the experiment file if it not already exists.
Parameter:
experiment_name (str): The name of the experiment performed.
values (dict): A dictionary of values to write to the experiment file.
filter_keys (list(str)): A list of str keys to filter keys of given values dictionary.
"""
# Filter out empty values
if filter_nan and isinstance(values, dict):
values = dict(filter(lambda elem: elem[1] is not None, values.items()))
values = self._resolve_dict(values)
# Filter specific keys
if filter_keys is not None and isinstance(filter_keys, str):
values = {key: values[key] for key in filter_keys}
filename = self._add_extension(experiment_name, "csv")
file_path = os.path.join(self.BASE_PATH, filename)
# Was metrics file reconstructed and is locked?
if experiment_name in self.experiment_files and self.experiment_files[experiment_name] == 0:
error_msg = "File {} was reconstructed and is locked. Use .unlock(experiment_name) to open this file up for writing.".format(experiment_name)
raise ValueError(error_msg)
mode = self.__get_mode(experiment_name)
with open(file_path, mode) as csv_file:
fieldnames = list(values.keys())
csv_writer = self.__get_csv_writer(csv_file, fieldnames)
# Experiment file non-existent? Overwrite mode?
if (experiment_name not in self.experiment_files) or (self.experiment_files[experiment_name] == 2):
self.experiment_files[experiment_name] = 1
csv_writer.writeheader()
csv_writer.writerow(values)
[docs] def read(self, experiment_name):
"""
Read metrics from a specific experiment.
Parameters:
experiment_name (str): The experiment to read from.
Returns:
(list(dict)) of accumulated experiment metrics.
"""
# .csv extension in filename?
experiment_name = self._add_extension(experiment_name, "csv")
values = []
experiment_file_path = os.path.join(self.BASE_PATH, experiment_name)
with open(experiment_file_path, "r") as csv_file:
csv_reader = self.__get_csv_reader(csv_file)
for row in csv_reader:
values.append(row)
return values
[docs] def unlock(self, experiment_name):
"""
Unlocks a reconstructed file to be available to write it again.
Parameters:
experiment_name (str): Name of the expierment to unlock for appending.
"""
if not experiment_name in self.experiment_files:
return
self.experiment_files[experiment_name] = 1
[docs] def unlock_all(self):
"""
Unlocks all locked files, being able to append to files again.
"""
for key, value in self.experiment_files.items():
if value == 0:
self.experiment_files[key] = 1
[docs] def overwrite(self, experiment_name):
"""
Mark reconstructed experiment metrics to be overwriten.
Parameters:
experiment_name (str): Name of the experiment to mark for overwriting.
"""
if experiment_name not in self.experiment_files:
return
self.experiment_files[experiment_name] = 2
# ---------
# Utilities
# --------------------
def _resolve_dict(self, values, prefix=None):
"""
Resolves a dictionary into a flat pandas dataframe like structure.
Nested dictionaries are getting prefixed with parent key.
Parameters:
values (dict): A dictionary of keys. Can include dictionaries with single level nesting.
Returns:
(dict) a flattened dictionary.
"""
flattened_dict = {}
for key, value in values.items():
# Copy flat values into flattened dictionary
prefixed_key = key if prefix is None else (prefix + "_" + key)
if not isinstance(values[key], dict):
flattened_dict[prefixed_key] = value
continue
resolved = self._resolve_dict(values[key], prefix=prefixed_key)
flattened_dict.update(resolved)
return flattened_dict
def _add_extension(self, filename, ext):
"""
Adds an extension to a filename.
Parameters:
filename (str): The filename to check for the extension
ext (str): The file extension to add and check for
Returns:
(str) the file name with a file extension appended.
"""
if ext not in filename:
return filename + "." + ext
return filename
def __load_experiments(self):
"""
Reconstrcut metrics from files available files.
"""
if not os.path.exists(self.BASE_PATH):
return
dir_content = os.listdir(self.BASE_PATH)
for element in dir_content:
# Skip sub-directories
element_path = os.path.join(self.BASE_PATH, element)
if not os.path.isfile(element_path):
continue
# Skip meta file
if ".meta.json" in element:
continue
# Strip extension off of filename
name, ext = os.path.splitext(element)
self.experiment_files[name] = 0
# -----------
# Getter/-Setter
# ------------------
def __get_mode(self, experiment_name):
default_mode = "a"
if experiment_name not in self.experiment_files:
return default_mode
status = self.experiment_files[experiment_name]
if status == 2:
return "w"
return default_mode
def __get_csv_params(self):
return {
"delimiter": self.delimiter,
"quotechar": self.quotechar,
"quoting": self.quoting
}
def __get_csv_writer(self, file, fieldnames):
csv_params = self.__get_csv_params()
return csv.DictWriter(file, fieldnames, **csv_params)
def __get_csv_reader(self, file):
csv_params = self.__get_csv_params()
return csv.DictReader(file, **csv_params)
[docs] def get_dataset_info(self):
"""
Read
Returns:
(dict) containing meta information about the used dataset for the experiment
"""
meta = self.read_meta()
return meta.get("dataset", None)