Source code for luna.common.config

'''
Created on October 17, 2019

@author: pashaa@mskcc.org
'''

import yaml
import yamale
import logging
from jsonpath_ng import parse
from luna.common.utils import get_absolute_path

logger = logging.getLogger(__name__)

[docs]class ConfigSet(): ''' This is a singleton class that can load a collection of configurations from yaml files. ConfigSet loads configurations from yaml files only once on first invocation of this class with the specified yaml file. The class then maintains the configuration in memory in a singleton instance. All new invocations of this class will serve up the same configuration. Each configuration in the collection is identified by a logical name. If a new invocation of this class is created with an existing logical name and a different yaml file, the singleton instance replaces the existing configuration with the newly specified yaml file for the given logical name. ''' __CONFIG_MAP = {} # maps logical name to yaml config file name __SCHEMA_MAP = {} # maps logical name to schema file of yaml config file __INSTANCE = None # singleton instance containing the collection of configs keyed by logical name def __new__(cls, name=None, config_file=None, schema_file=None): # assume one or more collections have already been loaded if name is None or config_file is None: return ConfigSet.__INSTANCE # initialize singleton if ConfigSet.__INSTANCE is None: ConfigSet.__INSTANCE = object.__new__(cls) ConfigSet.__INSTANCE.__config = {} # load or reload config into memory if name not in ConfigSet.__CONFIG_MAP.keys() or \ ConfigSet.__CONFIG_MAP[name] != config_file: ConfigSet.__CONFIG_MAP[name] = config_file ConfigSet.__INSTANCE.__config[name] = ConfigSet._load_config(cls, name) # add schema and validate config if schema_file is not None: ConfigSet.__SCHEMA_MAP[name] = schema_file ConfigSet._validate_config(cls, name) return ConfigSet.__INSTANCE def __init__(self, name=None, config_file=None, schema_file=None): ''' :param name logical name to be given for this configuration. This argument only needs to be provided on first invocation (optional). :param config_file the config file to load. This argument only needs to be provided on first invocation (optional). :param schema_file a schema file for the yaml configuration (optional) :raises yamale.yamale_error.YamaleError if config file is invalid when validated against the schema ''' pass # see __new__() method implementation def _validate_config(cls, name): config_file = ConfigSet.__CONFIG_MAP[name] schema_file = ConfigSet.__SCHEMA_MAP[name] logger.info("validating config " + config_file + " against schema " + schema_file + " for " + name) schema = yamale.make_schema(schema_file) data = yamale.make_data(config_file) yamale.validate(schema, data) def _load_config(cls, name): ''' :param name: logical name of the config to load :return: config generator object :raises: IOError if yaml config file for the specified logical name cannot be found ''' # read config file config_file = ConfigSet.__CONFIG_MAP[name] logger.info("loading config file "+config_file) try: stream = open(config_file, 'r') except IOError as err: logger.error("unable to find a config file with name "+config_file+ ". Please use config.yaml.template to make a "+config_file+". "+str(err)) raise err config = {} for items in yaml.load_all(stream, Loader=yaml.FullLoader): config.update(items) return config def _parse_path(self, path): path_segments = path.split('::', 1) # split just once if len(path_segments) != 2: err = 'Illegal config path: '+path+'. must be of form "name::jsonpath" ' \ 'where name is the logical name of the configuration and jsonpath is the ' \ 'jsonpath into the yaml configuration' logger.error(err) raise ValueError(err) return {'name': path_segments[0], 'jsonpath': path_segments[1]} def _get_match(self, name, jsonpath): jsonpath_expression = parse(jsonpath) return jsonpath_expression.find(ConfigSet.__INSTANCE.__config[name])
[docs] def has_value(self, path): ''' Args: path (str): path to a value in a configuration. The path must be of the form "name::jsonpath" where name is the logical name of the configuration and jsonpath is the jsonpath to value. see config.yaml to generate a jsonpath. See https://pypi.org/project/jsonpath-ng/ jsonpath expressions may be tested here - https://jsonpath.com/ Returns: boolean: true if value is not an empty string, else false. Raises: ValueError: if a configuration with the specified name was never loaded ''' parsed = self._parse_path(path) name= parsed['name'] jsonpath = parsed['jsonpath'] if ConfigSet.__INSTANCE is None or name not in ConfigSet.__INSTANCE.__config.keys(): raise ValueError('configuration with logical name '+name+' was never loaded') if len(self._get_match(name, jsonpath)) == 0: return False else: return True
[docs] def get_value(self, path): ''' Gets the value for the specified jsonpath from the specified configuration. Args: path (str): path to a value in a configuration. The path must be of the form "name::jsonpath" where name is the logical name of the configuration and jsonpath is the jsonpath to value. see config.yaml to generate a jsonpath. See https://pypi.org/project/jsonpath-ng/ jsonpath expressions may be tested here - https://jsonpath.com/ Returns: str: value from config file Raises: ValueError: if no match is found for the specified exception or a configuration with the specified name was never loaded ''' parsed = self._parse_path(path) name = parsed['name'] jsonpath = parsed['jsonpath'] if ConfigSet.__INSTANCE is None or name not in ConfigSet.__INSTANCE.__config.keys(): raise ValueError('configuration with logical name '+name+' was never loaded') match = self._get_match(name, jsonpath) if len(match) == 0: err = 'unable to find a config value for jsonpath: '+jsonpath logger.error(err) raise ValueError(err) return match[0].value
[docs] def get_names(self): ''' :return: a list of logical names of the configs stored in this instance. ''' if ConfigSet.__INSTANCE is not None: return list(ConfigSet.__INSTANCE.__config.keys()) else: return []
[docs] def get_keys(self, name): ''' :param name: logical name of the configuration :return: a list of top-level keys in the config stored in this instance. :raises: ValueError if a configuration with the specified name was never loaded ''' if ConfigSet.__INSTANCE is None or name not in ConfigSet.__INSTANCE.__config.keys(): raise ValueError('configuration with logical name '+name+' was never loaded') return list(ConfigSet.__INSTANCE.__config[name].keys())
[docs] def get_config_set(self, name): ''' :param name: logical name of the configuration :return: a dictonary of top-level keys in the config stored in this instance. :raises: ValueError if a configuration with the specified name was never loaded ''' if ConfigSet.__INSTANCE is None or name not in ConfigSet.__INSTANCE.__config.keys(): raise ValueError('configuration with logical name '+name+' was never loaded') return ConfigSet.__INSTANCE.__config[name]
[docs] def clear(self): ''' clear the entire collection of configurations ''' ConfigSet.__CONFIG_MAP = {} ConfigSet.__SCHEMA_MAP = {} ConfigSet.__INSTANCE = None
if __name__ == '__main__': c1 = ConfigSet('app_config', 'pyluna-pathology/tests/luna/common/test_config.yml') c2 = ConfigSet('app_config', 'pyluna-pathology/tests/luna/common/test_config.yml') c3 = ConfigSet('app_config', 'pyluna-pathology/tests/luna/common/test_config.yml') print(str(c1) + ' ' + str(c1.get_value('app_config::$.spark_application_config[:1]["spark.executor.cores"]'))) print(str(c2) + ' ' + str(c2.get_value('app_config::$.spark_application_config[:1]["spark.executor.cores"]'))) try: print(str(c3) + ' ' + str(c3.get_value('app_config::$.spark_application_config[:1]["doesnt_exist"]'))) except ValueError as ve: print("got expected value error: "+str(ve)) schema_file = get_absolute_path(__file__, '../data_ingestion_template_schema.yml') c4 = ConfigSet(name='data_config', config_file="pyluna-pathology/tests/luna/common/test_data_ingestion_template.yml", schema_file=schema_file) print(str(c4) + ' ' + str(c4.get_value('data_config::$.REQUESTOR')))