Source code for model_organization.config

import os
import os.path as osp
import six
import inspect
import sys
import logging
import logging.config
import yaml
import glob
import fasteners
import copy
from collections import OrderedDict, defaultdict
import model_organization.utils as utils


docstrings = utils.docstrings


def _get_home():
    """Find user's home directory if possible.
    Otherwise, returns None.

    :see:  http://mail.python.org/pipermail/python-list/2005-February/325395.html

    This function is copied from matplotlib version 1.4.3, Jan 2016
    """
    try:
        if six.PY2 and sys.platform == 'win32':
            path = os.path.expanduser(b"~").decode(sys.getfilesystemencoding())
        else:
            path = os.path.expanduser("~")
    except ImportError:
        # This happens on Google App Engine (pwd module is not present).
        pass
    else:
        if os.path.isdir(path):
            return path
    for evar in ('HOME', 'USERPROFILE', 'TMP'):
        path = os.environ.get(evar)
        if path is not None and os.path.isdir(path):
            return path
    return None


[docs]def get_configdir(name): """ Return the string representing the configuration directory. The directory is chosen as follows: 1. If the ``name.upper() + CONFIGDIR`` environment variable is supplied, choose that. 2a. On Linux, choose `$HOME/.config`. 2b. On other platforms, choose `$HOME/.matplotlib`. 3. If the chosen directory exists, use that as the configuration directory. 4. A directory: return None. Notes ----- This function is taken from the matplotlib [1] module References ---------- [1]: http://matplotlib.org/api/""" configdir = os.environ.get('%sCONFIGDIR' % name.upper()) if configdir is not None: return os.path.abspath(configdir) p = None h = _get_home() if ((sys.platform.startswith('linux') or sys.platform.startswith('darwin')) and h is not None): p = os.path.join(h, '.config/' + name) elif h is not None: p = os.path.join(h, '.' + name) if not os.path.exists(p): os.makedirs(p) return p
[docs]def setup_logging(default_path=None, default_level=logging.INFO, env_key=None): """ Setup logging configuration Parameters ---------- default_path: str Default path of the yaml logging configuration file. If None, it defaults to the 'logging.yaml' file in the config directory default_level: int Default: :data:`logging.INFO`. Default level if default_path does not exist env_key: str environment variable specifying a different logging file than `default_path` (Default: 'LOG_CFG') Returns ------- path: str Path to the logging configuration file Notes ----- Function taken from http://victorlin.me/posts/2012/08/26/good-logging-practice-in-python""" path = default_path or os.path.join( os.path.dirname(__file__), 'logging.yaml') value = os.getenv(env_key, None) if env_key is not None else None home = _get_home() if value: path = value if os.path.exists(path): with open(path, 'rt') as f: config = yaml.load(f.read()) for handler in config.get('handlers', {}).values(): if '~' in handler.get('filename', ''): handler['filename'] = handler['filename'].replace( '~', home) logging.config.dictConfig(config) else: path = None logging.basicConfig(level=default_level) return path
[docs]def ordered_yaml_load(stream, Loader=None, object_pairs_hook=OrderedDict): """Loads the stream into an OrderedDict. Taken from http://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml- mappings-as-ordereddicts""" Loader = Loader or yaml.Loader class OrderedLoader(Loader): pass def construct_mapping(loader, node): loader.flatten_mapping(node) return object_pairs_hook(loader.construct_pairs(node)) OrderedLoader.add_constructor( yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping) return yaml.load(stream, OrderedLoader)
[docs]def ordered_yaml_dump(data, stream=None, Dumper=None, **kwds): """Dumps the stream from an OrderedDict. Taken from http://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml- mappings-as-ordereddicts""" Dumper = Dumper or yaml.Dumper class OrderedDumper(Dumper): pass def _dict_representer(dumper, data): return dumper.represent_mapping( yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items()) OrderedDumper.add_representer(OrderedDict, _dict_representer) return yaml.dump(data, stream, OrderedDumper, **kwds)
[docs]def safe_load(fname): """ Load the file fname and make sure it can be done in parallel Parameters ---------- fname: str The path name """ lock = fasteners.InterProcessLock(fname + '.lck') lock.acquire() try: with open(fname) as f: return ordered_yaml_load(f) except: raise finally: lock.release()
[docs]def safe_dump(d, fname, *args, **kwargs): """ Savely dump `d` to `fname` using yaml This method creates a copy of `fname` called ``fname + '~'`` before saving `d` to `fname` using :func:`ordered_yaml_dump` Parameters ---------- d: object The object to dump fname: str The path where to dump `d` Other Parameters ---------------- ``*args, **kwargs`` Will be forwarded to the :func:`ordered_yaml_dump` function """ if osp.exists(fname): os.rename(fname, fname + '~') lock = fasteners.InterProcessLock(fname + '.lck') lock.acquire() try: with open(fname, 'w') as f: ordered_yaml_dump(d, f, *args, **kwargs) except: raise finally: lock.release()
[docs]class Archive(six.text_type): """ Just a dummy string subclass to identify archived experiments """ #: The name of the project inside this archive project = None #: The time when this project has been archived time = None
[docs]class ExperimentsConfig(OrderedDict): """ The configuration of the experiments This class acts like a :class:`collections.OrderedDict` but loads the experiment configuration only when you access the specific item (i.e. via ``d['exp_id']``) """ #: list of str. The keys describing paths for the model. Note that these #: keys here are replaced by the keys in the #: :attr:`~model_organization.ModelOrganizer.paths` attribute of the #: specific :class:`model_organization.ModelOrganizer` instance paths = ['expdir', 'src', 'data', 'input', 'outdata', 'outdir', 'plot_output', 'project_output', 'forcing'] _initialized = True @property def exp_file(self): """The path to the file containing all experiments in the configuration """ return osp.join(self.projects.conf_dir, 'experiments.yml') @property def project_map(self): """A mapping from project name to experiments""" # first update with the experiments in the memory (the others should # already be loaded within the :attr:`exp_files` attribute) for key, val in self.items(): if isinstance(val, dict): l = self._project_map[val['project']] elif isinstance(val, Archive): l = self._project_map[val.project] else: continue if key not in l: l.append(key) return self._project_map @property def exp_files(self): """A mapping from experiment to experiment configuration file Note that this attribute only contains experiments whose configuration has already dumped to the file! """ ret = OrderedDict() # restore the order of the experiments exp_file = self.exp_file if osp.exists(exp_file): for key, val in safe_load(exp_file).items(): ret[key] = val for project, d in self.projects.items(): project_path = d['root'] config_path = osp.join(project_path, '.project') if not osp.exists(config_path): continue for fname in glob.glob(osp.join(config_path, '*.yml')): if fname == '.project.yml': continue exp = osp.splitext(osp.basename(fname))[0] if not isinstance(ret.get(exp), Archive): ret[exp] = osp.join(config_path, exp + '.yml') if exp not in self._project_map[project]: self._project_map[project].append(exp) return ret def __init__(self, projects, d=None, project_map=None): """ Parameters ---------- projects: ProjectConfig The project configuration d: dict An alternative dictionary to initialize from. If not given, the experiments are loaded on the fly from the :attr:`exp_files` attribute project_map: dict A mapping from project to experiments. If not given, it is created when accessing the :attr:`project_map` experiment """ super(ExperimentsConfig, self).__init__() self.projects = projects # necessary switch for python 2 since the item is accessed when setting # it self._initialized = False self._project_map = project_map or defaultdict(list) if projects: if d is not None: for key, val in d.items(): self[key] = val else: # setup the paths for the experiments for key, val in self.exp_files.items(): self[key] = val del self._initialized def __getitem__(self, attr): ret = super(ExperimentsConfig, self).__getitem__(attr) if self._initialized and not isinstance(ret, (dict, Archive)): fname = super(ExperimentsConfig, self).__getitem__(attr) self[attr] = d = safe_load(fname) if isinstance(d, dict): self.fix_paths(d) return d else: return ret def __setitem__(self, key, val): if (isinstance(val, Archive) and key not in self._project_map[val.project]): # make sure the project_map is up-to-date self._project_map[val.project].append(key) super(ExperimentsConfig, self).__setitem__(key, val) def __reduce__(self): # in Python2 do not simply make an OrderedDict, because that # accesses the item itself return self.__class__, (self.projects, self.as_ordereddict(), self._project_map)
[docs] @docstrings.get_sectionsf('ExperimentsConfig.fix_paths', sections=['Parameters', 'Returns']) @docstrings.dedent def fix_paths(self, d, root=None, project=None): """ Fix the paths in the given dictionary to get absolute paths Parameters ---------- d: dict One experiment configuration dictionary root: str The root path of the project project: str The project name Returns ------- dict The modified `d` Notes ----- d is modified in place!""" if root is None and project is None: project = d.get('project') if project is not None: root = self.projects[project]['root'] else: root = d['root'] elif root is None: root = self.projects[project]['root'] elif project is None: pass paths = self.paths for key, val in d.items(): if isinstance(val, dict): d[key] = self.fix_paths(val, root, project) elif key in paths: val = d[key] if isinstance(val, six.string_types) and not osp.isabs(val): d[key] = osp.join(root, val) elif (isinstance(utils.safe_list(val)[0], six.string_types) and not osp.isabs(val[0])): for i in range(len(val)): val[i] = osp.join(root, val[i]) return d
[docs] @docstrings.get_sectionsf('ExperimentsConfig.rel_paths', sections=['Parameters', 'Returns']) @docstrings.dedent def rel_paths(self, d, root=None, project=None): """ Fix the paths in the given dictionary to get relative paths Parameters ---------- %(ExperimentsConfig.fix_paths.parameters)s Returns ------- %(ExperimentsConfig.fix_paths.returns)s Notes ----- d is modified in place!""" if root is None and project is None: project = d.get('project') if project is not None: root = self.projects[project]['root'] else: root = d['root'] elif root is None: root = self.projects[project]['root'] elif project is None: pass paths = self.paths for key, val in d.items(): if isinstance(val, dict): d[key] = self.rel_paths(val, root, project) elif key in paths: val = d[key] if isinstance(val, six.string_types) and osp.isabs(val): d[key] = osp.relpath(val, root) elif (isinstance(utils.safe_list(val)[0], six.string_types) and osp.isabs(val[0])): for i in range(len(val)): val[i] = osp.relpath(val[i], root) return d
[docs] def save(self): """Save the experiment configuration This method stores the configuration of each of the experiments in a file ``'<project-dir>/.project/<experiment>.yml'``, where ``'<project-dir>'`` corresponds to the project directory of the specific ``'<experiment>'``. Furthermore it dumps all experiments to the :attr:`exp_file` configuration file. """ for exp, d in dict(self).items(): if isinstance(d, dict): project_path = self.projects[d['project']]['root'] d = self.rel_paths(copy.deepcopy(d)) fname = osp.join(project_path, '.project', exp + '.yml') if not osp.exists(osp.dirname(fname)): os.makedirs(osp.dirname(fname)) safe_dump(d, fname, default_flow_style=False) exp_file = self.exp_file # to be 100% sure we do not write to the file from multiple processes lock = fasteners.InterProcessLock(exp_file + '.lck') lock.acquire() safe_dump(OrderedDict((exp, val if isinstance(val, Archive) else None) for exp, val in self.items()), exp_file, default_flow_style=False) lock.release()
[docs] def load(self): """Load all experiments in this dictionary into memory """ for key in self: self[key] return self
[docs] def as_ordereddict(self): """Convenience method to convert this object into an OrderedDict""" if six.PY2: d = OrderedDict() copied = dict(self) for key in self: d[key] = copied[key] else: d = OrderedDict(self) return d
[docs] def items(self): # Reimplemented to not load all experiments under python2.7 if six.PY2: d = dict(self) return [(key, d[key]) for key in self] return super(ExperimentsConfig, self).items()
[docs] def iteritems(self): # Reimplemented to not load all experiments under python2.7 if six.PY2: d = dict(self) return iter((key, d[key]) for key in self) return iter(super(ExperimentsConfig, self).items())
[docs] def values(self): # Reimplemented to not load all experiments under python2.7 if six.PY2: d = dict(self) return [d[key] for key in self] return super(ExperimentsConfig, self).values()
[docs] def itervalues(self): # Reimplemented to not load all experiments under python2.7 if six.PY2: d = dict(self) return iter(d[key] for key in self) return iter(super(ExperimentsConfig, self).values())
[docs] def remove(self, experiment): """Remove the configuration of an experiment""" try: project_path = self.projects[self[experiment]['project']]['root'] except KeyError: return config_path = osp.join(project_path, '.project', experiment + '.yml') for f in [config_path, config_path + '~', config_path + '.lck']: if os.path.exists(f): os.remove(f) del self[experiment]
_note = """ Notes ----- Reimplemented to not load all experiments under python2.7""" for _m in ['items', 'iteritems', 'values', 'itervalues']: locals()[_m].__doc__ = ( (inspect.getdoc(getattr(OrderedDict, _m, None)) or '') + _note) del _m, _note
[docs]class ProjectsConfig(OrderedDict): """The project configuration This class stores the configuration from the projects, where each key corresponds to the name of one project and the value to the corresponding configuration. Instances of this class are initialized by a file ``'projects.yml'`` in the configuration directory (see the :attr:`all_projects` attribute) that stores a mapping from project name to project directory path. The configuration for each individual project is then loaded from the ``'<project-dir>/.project/.project.yml'`` file Notes ----- If you move one project has been moved to another directory, make sure to update the ``'projects.yml'`` file (the rest is updated when loading the configuration) """ #: list of str. The keys describing paths for the model. Note that these #: keys here are replaced by the keys in the #: :attr:`~model_organization.ModelOrganizer.paths` attribute of the #: specific :class:`model_organization.ModelOrganizer` instance paths = ['expdir', 'src', 'data', 'input', 'outdata', 'outdir', 'plot_output', 'project_output', 'forcing'] @property def all_projects(self): """The name of the configuration file""" return osp.join(self.conf_dir, 'projects.yml') #: The path to the configuration directory conf_dir = None def __init__(self, conf_dir, d=None): """ Parameters ---------- conf_dir: str The path to the configuration directory containing a file called ``'projects.yml'`` d: dict A dictionary to use to setup this configuration instead of loading them from the disk """ super(ProjectsConfig, self).__init__() self.conf_dir = conf_dir fname = self.all_projects if osp.exists(fname): self.project_paths = project_paths = safe_load(fname) else: self.project_paths = project_paths = OrderedDict() if d is not None: for key, val in d.items(): self[key] = val else: for project, path in project_paths.items(): self[project] = self.fix_paths(safe_load( osp.join(path, '.project', '.project.yml'))) self[project]['root'] = path def __reduce__(self): return self.__class__, (self.conf_dir, OrderedDict(self))
[docs] @docstrings.dedent def fix_paths(self, d, root=None, project=None): """ Fix the paths in the given dictionary to get absolute paths Parameters ---------- %(ExperimentsConfig.fix_paths.parameters)s Returns ------- %(ExperimentsConfig.fix_paths.returns)s Notes ----- d is modified in place!""" if root is None and project is None: project = d.get('project') if project is not None: root = self[project]['root'] else: root = d['root'] elif root is None: root = self[project]['root'] elif project is None: pass paths = self.paths for key, val in d.items(): if isinstance(val, dict): d[key] = self.fix_paths(val, root, project) elif key in paths: val = d[key] if isinstance(val, six.string_types) and not osp.isabs(val): d[key] = osp.join(root, val) elif (isinstance(utils.safe_list(val)[0], six.string_types) and not osp.isabs(val[0])): for i in range(len(val)): val[i] = osp.join(root, val[i]) return d
[docs] @docstrings.get_sectionsf('ExperimentsConfig.rel_paths', sections=['Parameters', 'Returns']) @docstrings.dedent def rel_paths(self, d, root=None, project=None): """ Fix the paths in the given dictionary to get relative paths Parameters ---------- %(ExperimentsConfig.fix_paths.parameters)s Returns ------- %(ExperimentsConfig.fix_paths.returns)s Notes ----- d is modified in place!""" if root is None and project is None: project = d.get('project') if project is not None: root = self[project]['root'] else: root = d['root'] elif root is None: root = self[project]['root'] elif project is None: pass paths = self.paths for key, val in d.items(): if isinstance(val, dict): d[key] = self.rel_paths(val, root, project) elif key in paths: val = d[key] if isinstance(val, six.string_types) and osp.isabs(val): d[key] = osp.relpath(val, root) elif (isinstance(utils.safe_list(val)[0], six.string_types) and osp.isabs(val[0])): for i in range(len(val)): val[i] = osp.relpath(val[i], root) return d
[docs] def save(self): """ Save the project configuration This method dumps the configuration for each project and the project paths (see the :attr:`all_projects` attribute) to the hard drive """ project_paths = OrderedDict() for project, d in OrderedDict(self).items(): if isinstance(d, dict): project_path = d['root'] fname = osp.join(project_path, '.project', '.project.yml') if not osp.exists(osp.dirname(fname)): os.makedirs(osp.dirname(fname)) if osp.exists(fname): os.rename(fname, fname + '~') d = self.rel_paths(copy.deepcopy(d)) safe_dump(d, fname, default_flow_style=False) project_paths[project] = project_path else: project_paths = self.project_paths[project] self.project_paths = project_paths safe_dump(project_paths, self.all_projects, default_flow_style=False)
[docs]class Config(object): """Configuration class for one model organizer""" #: Boolean that is True when the experiments shall be synched with the #: files on the harddisk. Use the :meth:`save` method to store the #: configuration _store = False #: :class:`ExperimentConfig`. The configuration of the experiments experiments = OrderedDict() #: :class:`ProjectsConfig`. The configuration of the projects projects = OrderedDict() #: :class:`OrderedDict`. The global configuration that applies to all #: projects global_config = OrderedDict() def __init__(self, name): self.name = name self.conf_dir = get_configdir(name) self.projects = ProjectsConfig(self.conf_dir) self.experiments = ExperimentsConfig(self.projects) self._globals_file = osp.join(self.conf_dir, 'globals.yml') if osp.exists(self._globals_file): self.global_config = safe_load(self._globals_file) else: self.global_config = OrderedDict()
[docs] def remove_experiment(self, experiment): self.experiments.remove(experiment)
[docs] def save(self): """ Save the entire configuration files """ self.projects.save() self.experiments.save() safe_dump(self.global_config, self._globals_file, default_flow_style=False)