model_data_builder
Module for ModelDataBuilder class
ModelDataBuilder
Builder for creating Model data objects (parsing rules and datasets read from the input file to Rule and DatasetData objects)
Source code in entities/model_data_builder.py
class ModelDataBuilder:
"""Builder for creating Model data objects (parsing rules and datasets
read from the input file to Rule and DatasetData objects)"""
def __init__(self, logger: ILogger) -> None:
"""Create ModelDataBuilder"""
self._rule_parsers = list(rule_parsers())
self._logger = logger
def parse_yaml_data(self, contents: dict[Any, Any]) -> IModelData:
"""Parse the Yaml input file into a data object
Raises:
AttributeError: when version is not available from the input file
"""
input_version = self._parse_input_version(contents)
if not input_version:
raise AttributeError(name="Version not available from input file")
input_datasets = list(self._parse_input_datasets(contents))
output_path = self._parse_output_dataset(contents)
output_variables = self._parse_save_only_variables(contents)
rules = list(self._parse_rules(contents))
model_data = YamlModelData("Model 1", input_version)
model_data.datasets = input_datasets
model_data.output_path = output_path
model_data.output_variables = list(output_variables)
model_data.rules = rules
return model_data
def _parse_input_version(self, contents: dict[Any, Any]) -> Optional[List[int]]:
input_version = None
try:
# read version string
version_string: str = get_dict_element("version", contents)
# check existence of version_string
if len(str(version_string)) == 0 or version_string is None:
self._logger.log_error(
f"Version ('{version_string}')" + " in input yaml is missing"
)
else:
# split string into 3 list items
version_list = version_string.split(".", 2)
# convert str[] to int[]
input_version = list(map(int, version_list))
except (ValueError, AttributeError, TypeError) as exception:
self._logger.log_error(f"Invalid version in input yaml: {exception}")
return None
return input_version
def _parse_input_datasets(self, contents: dict[str, Any]) -> Iterable[IDatasetData]:
input_datasets: List[dict[str, Any]] = get_dict_element("input-data", contents)
for input_dataset in input_datasets:
yield DatasetData(get_dict_element("dataset", input_dataset))
def _parse_output_dataset(self, contents: dict[str, Any]) -> Path:
output_data: dict[str, Any] = get_dict_element("output-data", contents)
return Path(output_data["filename"])
def _parse_save_only_variables(self, contents: dict[str, Any]) -> Iterable[str]:
output_data: dict[str, Any] = get_dict_element("output-data", contents)
save_only_variables = output_data.get("save_only_variables", [])
# Convert to list if not already one
if isinstance(save_only_variables, str):
save_only_variables = [save_only_variables]
return save_only_variables
def _parse_rules(self, contents: dict[str, Any]) -> Iterable[IRuleData]:
rules: List[dict[str, Any]] = get_dict_element("rules", contents)
for rule in rules:
rule_type_name = list(rule.keys())[0]
rule_dict = rule[rule_type_name]
parser = self._get_rule_data_parser(rule_type_name)
yield parser.parse_dict(rule_dict, self._logger)
def _get_rule_data_parser(self, rule_name: str) -> IParserRuleBase:
for parser in rule_parsers():
if parser.rule_type_name == rule_name:
return parser
raise KeyError(f"No parser for {rule_name}")
__init__(self, logger)
special
Create ModelDataBuilder
Source code in entities/model_data_builder.py
def __init__(self, logger: ILogger) -> None:
"""Create ModelDataBuilder"""
self._rule_parsers = list(rule_parsers())
self._logger = logger
parse_yaml_data(self, contents)
Parse the Yaml input file into a data object
Exceptions:
Type | Description |
---|---|
AttributeError |
when version is not available from the input file |
Source code in entities/model_data_builder.py
def parse_yaml_data(self, contents: dict[Any, Any]) -> IModelData:
"""Parse the Yaml input file into a data object
Raises:
AttributeError: when version is not available from the input file
"""
input_version = self._parse_input_version(contents)
if not input_version:
raise AttributeError(name="Version not available from input file")
input_datasets = list(self._parse_input_datasets(contents))
output_path = self._parse_output_dataset(contents)
output_variables = self._parse_save_only_variables(contents)
rules = list(self._parse_rules(contents))
model_data = YamlModelData("Model 1", input_version)
model_data.datasets = input_datasets
model_data.output_path = output_path
model_data.output_variables = list(output_variables)
model_data.rules = rules
return model_data