Skip to content

model_data_builder

Module for ModelDataBuilder class

ModelDataBuilder

Builder for creating Model data objects (parsing rules and datasets read from the input file to Rule and DatasetData objects)

Source code in entities/model_data_builder.py
class ModelDataBuilder:
    """Builder for creating Model data objects (parsing rules and datasets
    read from the input file to Rule and DatasetData objects)"""

    def __init__(self, logger: ILogger) -> None:
        """Create ModelDataBuilder"""
        self._rule_parsers = list(rule_parsers())
        self._logger = logger

    def parse_yaml_data(self, contents: dict[Any, Any]) -> IModelData:
        """Parse the Yaml input file into a data object

        Raises:
            AttributeError: when version is not available from the input file
        """
        input_version = self._parse_input_version(contents)
        if not input_version:
            raise AttributeError(name="Version not available from input file")

        input_datasets = list(self._parse_input_datasets(contents))
        output_path = self._parse_output_dataset(contents)
        output_variables = self._parse_save_only_variables(contents)
        rules = list(self._parse_rules(contents))

        model_data = YamlModelData("Model 1", input_version)
        model_data.datasets = input_datasets
        model_data.output_path = output_path
        model_data.output_variables = list(output_variables)
        model_data.rules = rules
        return model_data

    def _parse_input_version(self, contents: dict[Any, Any]) -> Optional[List[int]]:
        input_version = None
        try:
            # read version string
            version_string: str = get_dict_element("version", contents)

            # check existence of version_string
            if len(str(version_string)) == 0 or version_string is None:
                self._logger.log_error(
                    f"Version ('{version_string}')" + " in input yaml is missing"
                )
            else:
                # split string into 3 list items
                version_list = version_string.split(".", 2)

                # convert str[] to int[]
                input_version = list(map(int, version_list))

        except (ValueError, AttributeError, TypeError) as exception:
            self._logger.log_error(f"Invalid version in input yaml: {exception}")
            return None

        return input_version

    def _parse_input_datasets(self, contents: dict[str, Any]) -> Iterable[IDatasetData]:
        input_datasets: List[dict[str, Any]] = get_dict_element("input-data", contents)

        for input_dataset in input_datasets:
            yield DatasetData(get_dict_element("dataset", input_dataset))

    def _parse_output_dataset(self, contents: dict[str, Any]) -> Path:
        output_data: dict[str, Any] = get_dict_element("output-data", contents)

        return Path(output_data["filename"])

    def _parse_save_only_variables(self, contents: dict[str, Any]) -> Iterable[str]:
        output_data: dict[str, Any] = get_dict_element("output-data", contents)
        save_only_variables = output_data.get("save_only_variables", [])

        # Convert to list if not already one
        if isinstance(save_only_variables, str):
            save_only_variables = [save_only_variables]

        return save_only_variables

    def _parse_rules(self, contents: dict[str, Any]) -> Iterable[IRuleData]:
        rules: List[dict[str, Any]] = get_dict_element("rules", contents)

        for rule in rules:
            rule_type_name = list(rule.keys())[0]
            rule_dict = rule[rule_type_name]

            parser = self._get_rule_data_parser(rule_type_name)

            yield parser.parse_dict(rule_dict, self._logger)

    def _get_rule_data_parser(self, rule_name: str) -> IParserRuleBase:
        for parser in rule_parsers():
            if parser.rule_type_name == rule_name:
                return parser

        raise KeyError(f"No parser for {rule_name}")

__init__(self, logger) special

Create ModelDataBuilder

Source code in entities/model_data_builder.py
def __init__(self, logger: ILogger) -> None:
    """Create ModelDataBuilder"""
    self._rule_parsers = list(rule_parsers())
    self._logger = logger

parse_yaml_data(self, contents)

Parse the Yaml input file into a data object

Exceptions:

Type Description
AttributeError

when version is not available from the input file

Source code in entities/model_data_builder.py
def parse_yaml_data(self, contents: dict[Any, Any]) -> IModelData:
    """Parse the Yaml input file into a data object

    Raises:
        AttributeError: when version is not available from the input file
    """
    input_version = self._parse_input_version(contents)
    if not input_version:
        raise AttributeError(name="Version not available from input file")

    input_datasets = list(self._parse_input_datasets(contents))
    output_path = self._parse_output_dataset(contents)
    output_variables = self._parse_save_only_variables(contents)
    rules = list(self._parse_rules(contents))

    model_data = YamlModelData("Model 1", input_version)
    model_data.datasets = input_datasets
    model_data.output_path = output_path
    model_data.output_variables = list(output_variables)
    model_data.rules = rules
    return model_data