Source code for climaf.classes

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
 Basic types and syntax for a CLIMAF Reference Syntax interpreter and driver
 This is a first protoype, where the interpreter is Python itself


"""
# Created : S.Sénési - 2014

from __future__ import print_function, division, unicode_literals, absolute_import

import re
import string
import copy
import os.path
from collections import defaultdict
from functools import reduce, partial
import six
import warnings
import json
import shutil
import glob
import xarray as xr
from datetime import timedelta

from env.environment import *
from climaf.utils import Climaf_Classes_Error, remove_keys_with_same_values
from climaf.dataloc import isLocal, getlocs, selectFiles, dataloc
from climaf.period import init_period, cperiod, merge_periods, intersect_periods_list,\
    lastyears, firstyears, group_periods, freq_to_minutes
from env.clogging import clogger
from climaf.netcdfbasics import fileHasVar, varsOfFile, attrOfFile, timeLimits, model_id

warnings.filterwarnings("ignore", category=DeprecationWarning)

# Should function ds() try to resolve for period=*
auto_resolve = False


[docs]def derive_cproject(name, parent_name, new_project_facets=list()): """ Create a new project named 'name' from the project 'parent_name' adding the facets listed in 'new_project_facets' if specified. Also derive the location list from the parent project. :param name: name of the new project :param parent_name: name of the source project :param new_project_facets: the list of the facets to add to the new project (could be already present in parent). :return: the new project """ if name in cprojects or any([elt.project == name for elt in locs]): raise Climaf_Classes_Error( "Could not derive a project from an existing one if it already exists: %s." % name) else: cprojects[parent_name].derive(name, new_project_facets) [elt.derive(name) for elt in locs if elt.project == parent_name]
[docs]class cproject(object): def __init__(self, name, *args, **kwargs): """ Declare a project and its facets/attributes in CliMAF (see below) Args: name (string) : project name; do not use the chosen separator in it (see below) args (strings) : attribute names; they are free; do not use the chosen separator in it (see below); **CliMAF anyway will add attributes : project, simulation, variable, period, and domain** kwargs (dict) : can only be used with keywords : - ``sep`` or ``separator`` for indicating the symbol separating facets in the dataset syntax. Defaults to ".". - ``ensemble`` for declaring a list of attribute names which are allowed for defining an ensemble in this project ('simulation' is automatically allowed) - ``use_frequency`` to declare that the frequency can not be derived from time bounds of the file. In this case the facet ``frequency`` is mandatory for the project and a default value must be defined. Returns : a cproject object, which string representation is the pattern later used in CliMAF Refreence Syntax for representing datasets in this project A 'cproject' is the definition of a set of attributes, or facets, which values will completely define a 'dataset' as managed by CliMAF. Its name is one of the possible keys for describing data locations (see :py:class:`~climaf.dataloc.dataloc`) For instance, cproject CMIP5, after its Data Reference Syntax, has attributes : model, simulation (used for rip), experiment, variable, frequency, realm, table, version **A number of projects are built-in**. See :py:mod:`~climaf.projects` A dataset in a cproject declared as :: >>> cproject('MINE','myfreq','myfacet',sep='_') will return :: ${project}_${simulation}_${variable}_${period}_${domain}_${myfreq}_${myfacet} and will have datasets represented as e.g.:: 'MINE_hist_tas_[1980-1999]_global_decadal_gabu' while an example for built-in cproject CMIP5 will be:: 'CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last' The attributes list should include all facets which are useful for distinguishing datasets from each other, and for computing datafile pathnames in the 'generic' organization (see :py:class:`~climaf.dataloc.dataloc`) A default value for a given facet can be specified, by providing a tuple (facet_name,default_value) instead of the facet name. This default value is however of lower priority than the value set using :py:func:`~climaf.classes.cdef` A project can be declared as having non-standard variable names in datafiles, or variables that should undergo re-scaling; see :py:func:`~climaf.classes.calias` A project can be declared as having non-standard frequency names (this is used when accessing datafiles); see :py:func:`~climaf.classes.cfreqs`) """ if name in cprojects: clogger.warning("Redefining project %s" % name) self.project = name # self.facets = [] self.facet_defaults = dict() self.facet_authorized_values = dict() forced = ['project', 'simulation', 'variable', 'period', 'domain'] for f in forced: self.facets.append(f) for a in args: if isinstance(a, tuple): facet_name, facet_default = a self.facet_defaults[facet_name] = facet_default else: facet_name = a if facet_name not in forced: self.facets.append(facet_name) # self.separator = "." if "separator" in kwargs: self.separator = kwargs['separator'] if "sep" in kwargs: self.separator = kwargs['sep'] if self.separator == ",": raise Climaf_Classes_Error( "Character ',' is forbidden as a project separator") cprojects[name] = self self.crs = "" # Build the pattern for the datasets CRS for this cproject for f in self.facets: self.crs += "${%s}%s" % (f, self.separator) self.crs = self.crs[:-1] # Create an attribute hodling the list of facets which are allowed # for defining an ensemble, and put a first facet there self.attributes_for_ensemble = ['simulation'] if 'ensemble' in kwargs: self.attributes_for_ensemble.extend(kwargs["ensemble"]) self.use_frequency = kwargs.get("use_frequency", False) def derive(self, new_name, new_facets=list()): """ Derive a new project from this one with name 'new_name' and possibly new facets listed in 'new_facets' :param new_name: name of the newly created project :param new_facets: list of the new facets :return: the new project """ args = list() for a in self.facets: if a in self.facet_defaults: args.append((a, self.facet_defaults[a])) else: args.append(a) args.extend(new_facets) kwargs = dict() kwargs["separator"] = self.separator if len(self.attributes_for_ensemble) > 1: kwargs["ensemble"] = self.attributes_for_ensemble[1:] return cproject(new_name, *args, **kwargs) def __repr__(self): return self.crs def crs2ds(self, crs): """ Try to interpret string ``crs`` as the CRS of a dataset for the cproject. Return the dataset if OK """ fields = crs.split(self.separator) if len(fields) == len(self.facets): if fields[0] == self.project: kvp = dict() for i, f in enumerate(self.facets): kvp[f] = fields[i] return cdataset(**kvp) def build_cvalid_from_tree_of_files(self, project_name=None): if project_name is None: project_name = self.project # Find out the directory paths to be checked (other keys can be considered by hand) project_locs = [os.path.dirname(loc) for loc in locs if loc.project in [ project_name, ]] # Do not consider root project_locs = [loc.replace( "${root}", self.facet_defaults["root"]) for loc in project_locs] facets_regexp = re.compile(r"\$\{(?P<facet>[^\{^\}]+)\}") list_facets = list() for loc in project_locs: list_facets.append([m.groupdict()["facet"] for m in facets_regexp.finditer(loc)]) dict_facets = defaultdict(list) for (loc, facets) in zip(project_locs, list_facets): loc_list = [loc, ] tmp_loc_list = list() for facet in facets: facet_reg = r"\$\{%s\}" % facet facet_regexp = re.compile(facet_reg) for tmp_loc in loc_list: match = facet_regexp.match(tmp_loc) if match is not None: begin_tmp_loc = tmp_loc[:tmp_loc.find( os.sep, match.end())] begin_tmp_loc = begin_tmp_loc.replace(facet_reg, "*") list_values = glob.glob(begin_tmp_loc) list_values = [val.replace(tmp_loc[:match.start()], "")[:len(tmp_loc) - match.end()] for val in list_values] dict_facets[facet].extend(list_values) tmp_loc_list.extend( [tmp_loc.replace(facet_reg, val) for val in list_values]) else: tmp_loc_list.append(tmp_loc) loc_list, tmp_loc_list = tmp_loc_list, list() for key in dict_facets: dict_facets[key] = sorted(list(set(dict_facets[key]))) return dict_facets def build_cvalid_conf_file_name(self, project_name=None, choice="both"): """ Build cvalid conf file name from project name. :param project_name: name of the default project to be used :param choice: where to look for the conf file, either "user" (in $HOME/.climaf), "default" (in climaf/projects) or "both" :return: a list of possible conf file names """ if project_name is None: project_name = self.project cvalid_user_conf_file = os.sep.join( [os.environ["HOME"], ".climaf", "cvalid_{}.json".format(project_name)]) cvalid_default_conf_file = os.sep.join([os.path.dirname(os.path.abspath(__file__)), "project", "cvalid_{}.json".format(project_name)]) if choice in ["both", ]: return [cvalid_user_conf_file, cvalid_default_conf_file] elif choice in ["user", ]: return [cvalid_user_conf_file, ] elif choice in ["default", ]: return [cvalid_default_conf_file, ] else: raise ValueError("Unknown value for choice: %s" % choice) def initialize_cvalid_values(self, project_name=None): """ Initialize cvalid values for the current project with values defined in a json file, either in the CliMAF' project directory or in the climaf conf directory. :param project_name: name of the project to build the conf file name """ cvalid_conf_files = self.build_cvalid_conf_file_name( project_name=project_name, choice="both") cvalid_conf_files = [f for f in cvalid_conf_files if os.path.isfile(f)] if len(cvalid_conf_files) > 0: cvalid_conf_file = cvalid_conf_files[0] content = json.load(cvalid_conf_file) for key in content: self.cvalid(key, content[key]) def initialize_user_cvalid_values(self, project_name=None, from_tree_of_files=False, force=False): """ Initialize the user's configuration file for project project_name. If the configuration file already exists, do nothing except if force=True. If from_tree_of_file=True, read the tree of files to find out the possible values (not implemented yet). :param project_name: name of the default project :param from_tree_of_files: boolean, should the tree of file be read? :param force: boolean, should an existing user conf file be bypassed? """ cvalid_user_conf_file = self.build_cvalid_conf_file_name( project_name=project_name, choice="user")[0] cvalid_default_conf_file = self.build_cvalid_conf_file_name( project_name=project_name, choice="default")[0] if os.path.isfile(cvalid_user_conf_file): if force: clogger.warning("User's cvalid configuration file %s already exists and force=True, replace it" % cvalid_user_conf_file) os.remove(cvalid_user_conf_file) if from_tree_of_files: content = self.build_cvalid_from_tree_of_files( project_name) json.dump(content, cvalid_user_conf_file) elif os.path.isfile(cvalid_default_conf_file): if not os.path.isdir(os.path.dirname(cvalid_user_conf_file)): os.makedirs(os.path.dirname(cvalid_user_conf_file)) shutil.copyfile(cvalid_default_conf_file, cvalid_user_conf_file) else: clogger.error( "Default cvalid configuration file %s does not exist" % cvalid_default_conf_file) else: clogger.warning("User's cvalid configuration file %s already exists and force=False, do nothing." % cvalid_user_conf_file) elif cvalid_default_conf_file: if not os.path.isdir(os.path.dirname(cvalid_user_conf_file)): os.makedirs(os.path.dirname(cvalid_user_conf_file)) shutil.copyfile(cvalid_default_conf_file, cvalid_user_conf_file) else: clogger.error( "Default cvalid configuration file %s does not exist" % cvalid_default_conf_file) def cvalid(self, attribute, value=None): """Set or get the list of valid values for a CliMAF dataset attribute or facet (such as e.g. 'model', 'simulation' ...). Useful e.g. for constraining those data files which match a dataset definition Example:: >>> cvalid('grid' , [ "gr", "gn", "gr1", "gr2" ]) """ # if attribute not in self.facets: raise Climaf_Classes_Error( "project '%s' doesn't use facet '%s'" % (self.project, attribute)) if value is None: return self.facet_authorized_values.get(attribute, None) else: self.facet_authorized_values[attribute] = value
[docs]def cdef(attribute, value=None, project=None): """ Set or get the default value for a CliMAF dataset attribute or facet (such as e.g. 'model', 'simulation' ...), for use by next calls to :py:class:`~climaf.classes.cdataset()` or to :py:func:`~climaf.classes.ds` Argument 'project' allows to restrict the use/query of the default value to the context of the given 'project'. On can also set the (global) default value for attribute 'project' There is no actual check that 'attribute' is a valid keyword for a call to ``ds`` or ``cdataset`` Example:: >>> cdef('project','OCMPI5') >>> cdef('frequency','monthly',project='OCMPI5') """ if project not in cprojects: raise Climaf_Classes_Error( "project '%s' has not yet been declared" % project) if attribute == 'project': project = None # if project and attribute not in cprojects[project].facets: raise Climaf_Classes_Error( "project '%s' doesn't use facet '%s'" % (project, attribute)) if value is None: rep = cprojects[project].facet_defaults.get(attribute, None) if not rep: rep = cprojects[None].facet_defaults.get(attribute, "") return rep else: cprojects[project].facet_defaults[attribute] = value
cproject(None) cdef("domain", "global") # All Cobject instances are registered in this directory : cobjects = dict() class cobject(object): def __init__(self): # crs is the string expression defining the object # in the CLIMAF Reference Syntax self.crs = "void" def __str__(self): # return "Climaf object : "+self.crs return self.crs def __repr__(self): return self.crs def register(self): pass # cobjects[self.crs]=self # clogger.debug("Object Created ; crs = %s"%(self.crs)) def erase(self): pass # del(cobjects[self.crs]) # clogger.debug("Object deleted ; crs = %s"%(self.crs)) def buildcrs(self): raise NotImplementedError def __eq__(self, other): """ Check the equality of two CliMAF objects. :param other: CliMAF object to be compared :return: boolean indicating whether the CliMAF objects are the same or not """ return isinstance(other, type(self)) and self.crs == other.crs class cdummy(cobject): def __init__(self): """ cdummy class represents dummy arguments in the CRS """ self.crs = self.buildcrs() def buildcrs(self, period=None, crsrewrite=None): return 'ARG' def processDatasetArgs(**kwargs): """ Perfom basic checks on kwargs for functions cdataset and eds regarding the project where the dataset is defined Also complement with default values as handled by the project's definition and by cdef() """ if 'project' in kwargs: project = kwargs['project'] else: project = cdef("project") if project is None: raise Climaf_Classes_Error("Must provide a project (Can use cdef)") elif project not in cprojects: raise Climaf_Classes_Error( "Dataset's project '%s' has not " "been described by a call to cproject()" % project) attval = dict() attval["project"] = project sep = cprojects[project].separator # # Register facets values for facet in cprojects[project].facets: if facet in kwargs and kwargs[facet]: val = kwargs[facet] else: val = cdef(facet, project=project) attval[facet] = val if val: if isinstance(val, list): listval = val else: listval = [val] for lval in listval: if isinstance(lval, six.string_types) and lval.find(sep) >= 0: raise Climaf_Classes_Error( "You cannot use character '%s' when setting '%s=%s' because " "it is the declared separator for project '%s'. " "See help(cproject) for changing it, if needed" % (sep, facet, val, project)) # print "initalizing facet %s with value"%(facet,val) if attval['project'] == 'CMIP5': # Allow for a synonym for 'simulation' in CMIP5 : 'member' if 'member' in kwargs and kwargs['member'] not in [None, '']: attval['simulation'] = kwargs['member'] clogger.info( 'Attribute "member" in project CMIP5 has been translated to "simulation"') # Special processing for CMIP5 fixed fields : handling redundancy in facets if (attval['table'] == 'fx' or attval['period'] == 'fx' or attval['simulation'] == 'r0i0p0' or attval['frequency'] == 'fx'): attval['table'] = 'fx' attval['period'] = 'fx' attval['simulation'] = 'r0i0p0' attval['frequency'] = 'fx' # Special processing for CMIP6 : facet 'simulation' is forbidden (must use 'realization') if (attval['project'] == 'CMIP6') and 'simulation' in kwargs and len(kwargs['simulation']) > 0: raise Climaf_Classes_Error("You cannot use attribute 'simulation' in CMIP6; please use 'realization'. " "This if for kwargs=%s" % repr(kwargs)) errmsg = "" for facet in cprojects[project].facets: if attval[facet] is None: e = "Project '%s' needs facet '%s'. You may use cdef() for setting a default value" \ % (project, facet) errmsg += " " + e if errmsg != "": raise Climaf_Classes_Error(errmsg) # # print "kw="+`kwargs` for facet in attval: # print "checking facet %s"%facet # Facet specific processing if facet == 'period': if attval["period"] == "fx": attval["period"] = cperiod(attval["period"]) elif not isinstance(attval['period'], cperiod) and attval['period'] != "*": attval['period'] = init_period(attval['period']) # Check for typing or user's logic errors if facet not in cprojects[project].facets: e = "Project %s doesn't have facet %s" % (project, facet) errmsg += " " + e if errmsg != "": raise Climaf_Classes_Error(errmsg) if 'period' in attval and not isinstance(attval['period'], cperiod) and attval['period'] not in ["*", ]: Climaf_Classes_Error("at end of process.. : period is not a cperiod") return attval
[docs]class cdataset(cobject): # def __init__(self,project=None,model=None,simulation=None,period=None, # rip=None,frequency=None,domain=None,variable=None,version='last') : def __init__(self, **kwargs): """ Create a CLIMAF dataset. A CLIMAF dataset is a description of what the data (rather than the data itself or a file). It is basically a set of pairs attribute-value. The list of attributes actually used to describe a dataset is defined by the project it refers to. To display the attributes you may use for a given project, type e.g.: >>> cprojects["CMIP5"] For further details on projects , see :py:class:`~climaf.classes.cproject` None of the project's attributes are mandatory arguments, because all attributes defaults to the value set by :py:func:`~climaf.classes.cdef` (which also applies if providing a None value for an attribute) Some attributes have a special format or processing : - period : see :py:func:`~climaf.period.init_period`. See also function :py:func:`climaf.classes.ds` for added flexibility in defining periods as last of first set of years among available data - domain : allowed values are either 'global' or a list for latlon corners ordered as in : [ latmin, latmax, lonmin, lonmax ] - variable : name of the geophysical variable ; this should be : - either a variable actually included in the datafiles, - or a 'derived' variable (see :py:func:`~climaf.operators_derive.derive` ), - or, an aliased variable name (see :py:func:`~climaf.classes.alias` ) - in project CMIP5 , for triplets (frequency, simulation, period, table ) : if any is 'fx' (or 'r0i0p0 for simulation), the others are forced to 'fx' (resp. 'r0i0p0') too. Example, using no default value, and adressing some CMIP5 data :: >>> cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly', >>> simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last') You may use wildcard ('*') in attribute values, and use :py:meth:`~climaf.classes.cdataset.explore` for having CliMAF doing something sensible matching such attributes with available data """ # attval = processDatasetArgs(**kwargs) # # TBD : Next lines for backward compatibility, but should re-engineer self.project = attval["project"] self.simulation = attval['simulation'] self.variable = attval['variable'] # alias is a n-plet : filevar, scale, offset, filenameVar, missing self.period = attval['period'] self.domain = attval['domain'] # self.model = attval.get('model', "*") self.frequency = attval.get('frequency', "*") # Normalized name is annual_cycle, but allow also for 'seasonal' for the time being if self.frequency in ['seasonal', 'annual_cycle']: self.period.fx = True freqs_dic = frequencies.get(self.project, None) # print freqs_dic if freqs_dic: for k in freqs_dic: if freqs_dic[k] == self.frequency and k == 'annual_cycle': self.period.fx = True # self.kvp = attval self.alias = varIsAliased(self.project, self.variable) # if "," in self.variable and self.alias: filevar, scale, offset, units, filenameVar, missing, conditions = self.alias if filevar != self.variable or scale != 1. or offset != 0 or missing: raise Climaf_Classes_Error( "Cannot alias/scale/setmiss on group variable") # Build CliMAF Ref Syntax for the dataset self.crs = self.buildcrs() # self.files = None self.local_copies_of_remote_files = None self.register() def __eq__(self, other): res = super(cdataset, self).__eq__(other) if res: self_kvp = copy.deepcopy(self.kvp) self_kvp["model"] = self.model self_kvp["frequency"] = self.frequency self_kvp["alias"] = self.alias other_kvp = copy.deepcopy(other.kvp) other_kvp["model"] = other.model other_kvp["frequency"] = other.frequency other_kvp["alias"] = other.alias res = res and all([self_kvp[p] == other_kvp[p] for p in self_kvp]) return res def setperiod(self, period): if isinstance(period, six.string_types): period = init_period(period) self.erase() self.period = period self.kvp['period'] = period self.crs = self.buildcrs() self.register() def buildcrs(self, period=None, crsrewrite=None): crs_template = string.Template(cprojects[self.project].crs) dic = self.kvp.copy() if period is not None: dic['period'] = period if isinstance(dic['domain'], list): dic['domain'] = repr(dic['domain']) rep = "ds('%s')" % crs_template.safe_substitute(dic) return rep def errata(self): if self.project == "CMIP6": service = "https://errata.es-doc.org/1/resolve/simple-pid?datasets=" browser = "firefox" try: res = self.explore('resolve') except: raise Climaf_Classes_Error( "Cannot proceed with errata: Cannot resolve ambiguities on %s" % repr(self)) # CMIP6.CMIP.CNRM-CERFACS.CNRM-ESM2-1.1pctCO2.r1i1p1f2.Emon.expfe.gn.v20181018 ref = ".".join(["CMIP6", res.kvp["mip"], res.kvp["institute"], res.kvp["model"], res.kvp["experiment"], res.kvp["realization"], res.kvp["table"], res.kvp["variable"], res.kvp["grid"], "v" + res.kvp["version"]]) clogger.warning("Querying errata service %s using %s" % (service, browser)) os.system("%s %s%s &" % (browser, service, ref)) # voir le fichier api_errata_Atef.py pour faire mieux else: clogger.warning( "No errata service is yet defined for project %s" % self.project) def isLocal(self): # return self.baseFiles().find(":")<0 model = getattr(self, "model", "*") return isLocal(project=self.project, model=model, simulation=self.simulation, frequency=self.frequency, realm=self.kvp.get("realm", "*"), table=self.kvp.get("table", "*")) def isCached(self): """ TBD : analyze if a remote dataset is locally cached """ # clogger.error("TBD - remote datasets are not yet cached") rep = False return rep def oneVarPerFile(self): llocs = getlocs(project=self.project, model=self.model, simulation=self.simulation, frequency=self.frequency, realm=self.kvp.get("realm", "*"), table=self.kvp.get("table", "*")) return all([org for org, freq, url in llocs]) def periodIsFine(self): clogger.debug("always returns False, yet - TBD") return False def domainIsFine(self): clogger.debug("a bit too simple yet (domain=='global')- TBD") return self.domain == 'global' def periodHasOneFile(self): return len(self.baseFiles().split(" ")) < 2 # clogger.debug("always returns False, yet - TBD") # return(False) def hasOneMember(self): clogger.debug("always returns True, yet - TBD") return True def hasExactVariable(self): # Assume that group variable do not need aliasing if "," in self.variable: return True clogger.debug("always returns False, yet - TBD") return False def missingIsOK(self): if self.alias is None: return True else: _, _, _, _, _, missing, _ = self.alias return missing is None def matches_conditions(self, conditions): """ Return True if, for all keys in dict conditions, the kvp value of object for same key is among condition's values (which can be a list) Example : with conditions={ "model":"CanESM5" , "version": ["20180103", "20190112"] } the method will return True for both versions of that model """ if conditions is None: return True for key in conditions: values = conditions[key] if not isinstance(values, list): values = [values, ] if self.kvp[key] not in values: return False return True def check_if_dict_ambiguous(self, input_dict): ambiguous_dict = dict() non_ambigous_dict = dict() for (kw, val) in input_dict.items(): if isinstance(val, list): if len(val) > 1: ambiguous_dict[kw] = val else: non_ambigous_dict[kw] = val[0] elif kw in ['variable', ]: # Should take care of aliasing to fileVar matching_vars = set() paliases = aliases.get(self.project, []) for variable in paliases: if val == paliases[variable][0]: matching_vars.add(variable) if len(matching_vars) == 0: # No filename variable in aliases matches actual filename non_ambigous_dict[kw] = val elif len(matching_vars) == 1: # One variable has a filename variable which matches the retrieved filename non_ambigous_dict[kw] = matching_vars[0] else: ambiguous_dict[kw] = (val, matching_vars) else: non_ambigous_dict[kw] = val return non_ambigous_dict, ambiguous_dict
[docs] def glob(self, what=None, periods=None, split=None, use_frequency=False): """Datafile exploration for a dataset which possibly has wildcards (* and ?) in attributes/facets. Returns info regarding matching datafile or directories: - if WHAT = 'files' , returns a string of all data filenames - otherwise, returns a list of facet/value dictionnaries for matching data (or a pair, see below) In last case, data file periods are not returned if arg PERIODS is None and data search is optimized for the project. In that case, the globbing is done on data directories and not on data files, which is much faster. If PERIODS is not None, individual data files periods are merged among cases with same facets values if SPLIT is not None, a pair is returned intead of the dicts list : - first element is a dict with facets which values are the same among all cases - second element is the dicts list as above, but in which facets with common values are discarded Example : >>> tos_data = ds(project='CMIP6', variable='tos', period='*', table='Omon', model='CNRM*', realization='r1i1p1f*' ) >>> common_keys, varied_keys = tos_data.glob(periods=True, split=True) >>> common_keys {'mip': 'CMIP', 'institute': 'CNRM-CERFACS', 'experiment': 'historical', 'realization': 'r1i1p1f2', 'table': 'Omon', 'variable': 'tos', 'version': 'latest', 'period': [1850-2014], 'root': '/bdd'} >>> varied_keys [{'model': 'CNRM-ESM2-1' , 'grid': 'gn' }, {'model': 'CNRM-ESM2-1' , 'grid': 'gr1'}, {'model': 'CNRM-CM6-1' , 'grid': 'gn' }, {'model': 'CNRM-CM6-1' , 'grid': 'gr1'}, {'model': 'CNRM-CM6-1-HR', 'grid': 'gn' } ] """ dic = self.kvp.copy() if self.alias: filevar, _, _, _, filenameVar, _, conditions = self.alias req_var = dic["variable"] dic["variable"] = string.Template(filevar).safe_substitute(dic) if filenameVar: dic["filenameVar"] = filenameVar clogger.debug("glob() with dic=%s" % repr(dic)) cases = list() files = selectFiles(with_periods=(periods is not None or what in ['files', ]), return_combinations=cases, use_frequency=use_frequency, **dic) if what in ['files', ]: return files else: if periods is not None: cases = group_periods(cases) else: # For non-optimized cases, select_files returns periods, # but we want an even behaviour for case in cases: case.pop('period', None) if split is not None: keys = remove_keys_with_same_values(cases) return keys, cases else: return cases
[docs] def explore(self, option='check_and_store', group_periods_on=None, operation='intersection', first=None): """ Versatile datafile exploration for a dataset which possibly has wildcards (* and ? ) in attributes. ``option`` can be : - 'choices' for returning a dict which keys are wildcard attributes and entries are values list - 'resolve' for returning a NEW DATASET with instanciated attributes (if uniquely) - 'ensemble' for returning AN ENSEMBLE based on multiple possible values of one or more attributes (tell which one is first in labels by using arg 'first') - 'check_and_store' (or missing) for just identifying and storing dataset files list (while ensuring non-ambiguity check for wildcard attributes) This feature works only for projects which organization is of type 'generic' **See further below, after the first examples, what can done with wildcard on 'period'** Toy example :: >>> rst=ds(project="example", simulation="*", variable="rst", period="1980-1981") >>> rst ds('example|*|rst|1980-1981|global|monthly') >>> rst.explore('choices') {'simulation': ['AMIPV6ALB2G']} >>> instanciated_dataset=rst.explore('resolve') >>> instanciated_dataset ds('example|AMIPV6ALB2G|rst|1980-1981|global|monthly') >>> my_ensemble=rst.explore('ensemble') error : "Creating an ensemble does not make sense because all wildcard attributes have a single possible value ({'simulation': ['AMIPV6ALB2G']})" Real life example for options ``choices`` and ``ensemble`` :: >>> rst=ds(project="CMIP6", model='*', experiment="*ontrol*", realization="r1i1p1f*", table="Amon", ... variable="rsut", period="1980-1981") >>> clog('info') >>> rst.explore('choices') info : Attribute institute has matching value CNRM-CERFACS info : Attribute experiment has multiple values : set(['piClim-control', 'piControl']) info : Attribute grid has matching value gr info : Attribute realization has matching value r1i1p1f2 info : Attribute mip has multiple values : set(['CMIP', 'RFMIP']) info : Attribute model has multiple values : set(['CNRM-ESM2-1', 'CNRM-CM6-1']) {'institute': ['CNRM-CERFACS'], 'experiment': ['piClim-control', 'piControl'], 'grid': ['gr'], 'realization': ['r1i1p1f2'], 'mip': ['CMIP', 'RFMIP'], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1']} >>> # Let us further select by setting experiment=piControl >>> mrst=ds(project="CMIP6", model='*', experiment="piControl", realization="r1i1p1f*", table="Amon", ... variable="rsut", period="1980-1981") >>> mrst.explore('choices') {'institute': ['CNRM-CERFACS'], 'mip': ['CMIP'], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], 'grid': ['gr'], 'realization': ['r1i1p1f2']} >>> small_ensemble=mrst.explore('ensemble') >>> small_ensemble cens({ 'CNRM-ESM2-1':ds('CMIP6%%rsut%1980-1981%global%/cnrm/cmip%CNRM-ESM2-1%CNRM-CERFACS%CMIP%Amon%piControl%' 'r1i1p1f2%gr%latest'), 'CNRM-CM6-1' :ds('CMIP6%%rsut%1980-1981%global%/cnrm/cmip%CNRM-CM6-1%CNRM-CERFACS%CMIP%Amon%piControl%' 'r1i1p1f2%gr%latest') }) When option='choices' and period= '*', the period of all matching files will be either : - aggregated among all instances of all attributes with wildcards (default) - or, if argument ``group_periods_on`` provides an attribute name, aggregated after being sorted on that attribute and merged The aggregation is governed by argument ``operation``, which can be either : - 'intersection' : which is the most useful case, and hence is the default - 'union' : which has not much sense except to know which periods are definitely not covered by any data - None : no aggregation occurs, and you get a dict of the merged periods, which keys are the value of the grouping attribute Attribute 'period' cannot use a * without being == * ; Examples without grouping periods over any attribute :: >>> # Let us use a kind of dataset which data files are temporally splitted, >>> # and allow for various models, and use a wildcard for period >>> so=ds(project="CMIP6", model='CNRM*', experiment="piControl", realization="r1i1p1f2", ... table="Omon", variable="so", period="*") >>> # What is the overall period covered by the union of all datafiles >>> # (but not necessarily by a single model!) >>> so.explore('choices', operation='union') { 'period': [1850-2349], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'] .....} >>> # What is the intersection of periods covered by each datafile >>> so.explore('choices') { 'period': [None], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'] .....} >>> # What is the list of periods covered by datafiles >>> so.explore('choices', operation=None) { 'period': {None: [1850-1899, 1900-1949, 1950-1999, 2000-2049, 2050-2099, 2100-2149, 2150-2199, 2200-2249, 2250-2299, 2300-2349]}, 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'] .....} Examples using periods grouping over an attribute :: >>> # What is the intersection of available periods after grouping them on the various values of 'model' >>> so.explore('choices',group_periods_on='model') { 'period': [1850-2349], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], ....} >>> # Same, but explicit the default value >>> so.explore('choices',group_periods_on='model',operation='intersection') { 'period': [1850-2349], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], ....} >>> # What are the aggregated periods for each value of 'model' >>> so.explore('choices',group_periods_on='model',operation=None) { 'period': {'CNRM-ESM2-1': [1850-2349], 'CNRM-CM6-1' : [1850-2349] }, 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], ...} """ use_frequency = cprojects[self.project].use_frequency if use_frequency: if "frequency" in self.kvp: use_frequency = self.kvp["frequency"] else: use_frequency = cdef("frequency", project=self.project) if not use_frequency: use_frequency = False dic = self.kvp.copy() if self.alias: filevar, _, _, _, filenameVar, _, conditions = self.alias req_var = dic["variable"] dic["variable"] = string.Template(filevar).safe_substitute(dic) if filenameVar: dic["filenameVar"] = filenameVar clogger.debug("Looking with dic=%s" % repr(dic)) # if option != 'check_and_store' : wildcards = dict() files = selectFiles(return_wildcards=wildcards, merge_periods_on=group_periods_on, use_frequency=use_frequency, **dic) # -- Use the requested variable instead of the aliased if self.alias: dic["variable"] = req_var # if option != 'check_and_store' : periods = wildcards.get('period', None) # else : periods=None if periods: # print "periods=",periods if option not in ['choices', ]: if group_periods_on: raise Climaf_Classes_Error( "Can use 'group_periods_on' only with option='choices'") if operation != 'intersection': raise Climaf_Classes_Error( "Can use operation %s only with option='choices'" % operation) if operation in ['intersection', ]: if group_periods_on: # print "periods=",periods merged_periods = [merge_periods( p) for p in list(periods.values())] inter = merged_periods.pop(0) for p in merged_periods: inter = intersect_periods_list(inter, p) else: inter = merge_periods(periods[None]) wildcards['period'] = inter elif operation in ['union', ]: to_merge = [] for plist in list(periods.values()): to_merge.extend(plist) wildcards['period'] = merge_periods(to_merge) elif operation is None: # Merge periods for each facet value separately if group_periods_on: for key in periods: periods[key] = merge_periods(periods[key]) wildcards['period'] = periods else: raise Climaf_Classes_Error( "Operation %s is not known " % operation) # wildcard_attributes_list = [k for k in dic if isinstance( dic[k], six.string_types) and "*" in dic[k]] if option in ['resolve', ]: clogger.debug("Trying to resolve on attributes %s" % wildcard_attributes_list) non_ambiguous_dict, ambiguous_dict = self.check_if_dict_ambiguous( wildcards) if len(ambiguous_dict) != 0: error_msg = list() for kw in sorted(list(ambiguous_dict)): if kw in ["variable", ]: error_msg.append("Filename variable %s is matched by multiple variables %s" % (ambiguous_dict[kw][0], repr(ambiguous_dict[kw][1]))) elif kw in ["period", ]: error_msg.append( "Periods with holes are not handled: %s" % str(ambiguous_dict[kw])) else: error_msg.append("Wildcard attribute %s is ambiguous %s for dataset %s" % (kw, str(ambiguous_dict[kw]), self)) raise Climaf_Classes_Error(" ".join(error_msg)) else: dic.update(**non_ambiguous_dict) return ds(**dic) elif option in ['choices', ]: clogger.debug( "Listing possible values for these wildcard attributes %s" % wildcard_attributes_list) self.files = files return wildcards elif option in ['ensemble', ]: clogger.debug("Trying to create an ensemble on attributes %s" % wildcard_attributes_list) is_ensemble = False for kw in wildcards: entry = wildcards[kw] # print "entry=",entry, 'type=',type(entry), 'ensemble_kw=',ensemble_kw if kw in ['period', ] and isinstance(entry, list): if len(wildcards['period']) > 1: raise Climaf_Classes_Error("Cannot create an ensemble with holes in period (%s)" % wildcards['period']) entry = entry[0] if isinstance(entry, list): is_ensemble = (len(entry) > 1) dic[kw] = entry if is_ensemble is False: # raise Climaf_Classes_Error("Creating an ensemble does not make sense because all wildcard "+\ # "attributes have a single possible value (%s)"%wildcards) clogger.warning("Creating an ensemble with a single member") self.files = files return eds(first=first, **dic) elif option in ['check_and_store', ]: for kw in wildcards: entry = wildcards[kw] if isinstance(entry, list) and len(entry) > 1: raise Climaf_Classes_Error("This dataset is ambiguous on attribute %s='%s'; please choose among :" " %s or use either 'ensure_dataset=False' (with method baseFiles or " "listfiles) or 'option=\'choices\' (with method explore). " "Context is %s" % (kw, dic[kw], entry, self.kvp)) self.files = files else: raise Climaf_Classes_Error("Unknown option %s" % option)
def baseFiles(self, force=False, ensure_dataset=True): """ Returns the list of (local or remote) files which include the data for the dataset Use cached value (i.e. attribute 'files') unless called with arg force=True If ensure_dataset is True, forbid ambiguous datasets """ if (force and self.project != 'file') or self.files is None: if ensure_dataset: self.explore() else: cases = self.explore(option='choices') list_keys = [k for k in cases if type( cases[k]) is list and k != 'period'] if len(list_keys) > 0: clogger.error( "The dataset is ambiguous on %s; its CRS is %s" % (cases, self)) return None return self.files
[docs] def listfiles(self, force=False, ensure_dataset=True): """ Returns the list of (local or remote) files which include the data for the dataset Use cached value unless called with arg force=True If ensure_dataset is True, forbid ambiguous datasets """ return self.baseFiles(force=force, ensure_dataset=ensure_dataset)
def hasRawVariable(self): """ Test local data files to tell if a dataset variable is actually included in files (rather than being a derived, virtual variable) For the time being, returns False, which leads to always consider that variables declared as 'derived' actually are derived """ clogger.debug("TBD: actually test variables in files, rather than assuming that variable %s is virtual for " "dataset %s" % (self.variable, self.crs)) return False
[docs] def check(self, frequency=True, gap=True, period=True): """ Check time consistency of first variable of a dataset or ensemble members: - if frequency is True : check if data frequency is consistent with dataset frequency - if gap is True : check if file data have a gap - if period is True : check if period covered by data actually includes the whole of dataset period Returns: True if every check is OK, False if one fails, None if analysis is not yet possible """ if gap: frequency = True # files = self.baseFiles() if not files: return False files = files.split() clogger.debug("List of selected files: %s" % files) # rep = True dsets = [xr.open_dataset(f, use_cftime=True) for f in files] all_dsets = xr.combine_by_coords(dsets, combine_attrs='override') # if self.frequency == 'fx' or self.frequency == 'annual_cycle': clogger.info("No check for fixed data for %s", self) return True if self.frequency == "monthly" and frequency: clogger.error("Check cannot yet process monthly data due to" + "to a shortcoming in analyzing monthly data frequency") return None if not getattr(dsets[0], "frequency", False) and frequency: clogger.warning("No frequency in file(s) for %s", self) return False if "time" not in all_dsets: clogger.warning("Cannot yet check a dataset which time dimension" + "is not named 'time' (%s)" % self) return False # times = all_dsets.time clogger.debug('Time data of selected files: %s' % times) # if frequency: # Check if data time interval is consistent with dataset frequency data_freq = xr.infer_freq(times) if data_freq is None: clogger.error( "Time interval detected by xr.infer_freq is None %s" % str(times)) return False table = {"monthly": "MS", "daily": "D", "day": "D", "6h": "6H", "3h": "3H", "1h": "1H", "6Hourly": "6H", "3Hourly": "3H"} if self.frequency not in table: clogger.error("Check cannot yet handle frequency %s" % self.frequency) return None if data_freq != table[self.frequency]: message = 'Data time interval %s is not consistent with dataset frequency %s' clogger.warning(message % (data_freq, self.frequency)) rep = False if gap: # Check if file data have a gap time_values = times.values.flatten() delta = freq_to_minutes(data_freq) cpt = 0 for ptim, tim in zip(time_values[:-1], time_values[1:]): if ptim + timedelta(minutes=delta) != tim: rep = False cpt += 1 if cpt > 3: break clogger.error("File data time issue between %s and %s, interval inconsistent with %s" % (ptim, tim, delta)) if period: # Compare period covered by data files with dataset's period cell_methods = getattr(dsets[0][varOf(self)], "cell_methods", None) file_period = timeLimits(times, use_frequency=True, cell_methods=cell_methods, strict_on_time_dim_name=False) clogger.debug('Period covered by selected files: %s' % file_period) consist = "" if not file_period.includes(self.period): consist = "not " rep = False clogger.info("Datafile time period (%s) includes dataset time period (%s)" % (file_period, self.period) + "=> time periods are %sconsistent." % consist) return rep
[docs]class cens(cobject, dict): def __init__(self, dic={}, order=None, sortfunc=None): """Function cens creates a CliMAF object of class ``cens`` , i.e. a dict of objects, which keys are member labels, and which members are ordered, using method ``set_order`` In some cases, ensembles of datasets from the same project can also be built easily using :py:func:`~climaf.classes.eds()` When applying an operator to an ensemble, CliMAF will know, from operator's declaration (see :py:func:`~climaf.operators.cscript()`), whether the operator 'wishes' to get the ensemble or, on the reverse, is not 'ensemble-capable' : - if the operator is ensemble-capable it will deliver it : - if it is a script : with a string composed by concatenating the corresponding input files; it will also provide the labels list to the script if its declaration calls for it with keyword ${labels} (see :py:func:`~climaf.operators.cscript()`) - if it is a Python function : with the dict of corresponding objects - if the operator is 'ensemble-dumb', CliMAF will loop applying it on each member, and will form a new ensemble with the results. The dict keys must be label strings, which describe what is basically different among members. They are usually used by plot scripts to provide a caption allowing to identify each dataset/object e.g using various colors. Examples (see also :download:`../examples/ensemble.py`) : >>> cdef('project','example'); cdef('simulation',"AMIPV6ALB2G") >>> cdef('variable','tas');cdef('frequency','monthly') >>> # >>> ds1980=ds(period="1980") >>> ds1981=ds(period="1981") >>> # >>> myens=cens({'1980':ds1980 , '1981':ds1981 }) >>> ncview(myens) # will launch ncview once per member >>> >>> myens=cens({'1980':ds1980 , '1981':ds1981 }, order=['1981','1980']) >>> myens.set_order(['1981','1980']) >>> >>> # Add a member >>> myens['abcd']=ds(period="1982") Limitations : Even if an ensemble is a dict, some dict methods are not properly implemented (popitem, fromkeys) and function iteritems does not use member order You can write an ensemble to a file using function :py:func:`~climaf.cache.efile` """ if not all(map(lambda x: isinstance(x, six.string_types), list(dic))): raise Climaf_Classes_Error("Ensemble keys/labels must be strings") if not all(map(lambda x: isinstance(x, cobject), list(dic.values()))): raise Climaf_Classes_Error( "Ensemble members must be CliMAF objects") self.sortfunc = sortfunc # dict.update(self, dic) # keylist = list(self) try: from natsort.natsort import natsorted keylist = natsorted(keylist) except: keylist.sort() if order: self.set_order(order, None) elif sortfunc: self.order = sortfunc(keylist) else: self.order = keylist # self.crs = self.buildcrs() self.register() def __eq__(self, other): res = super(cens, self).__eq__(other) if res: res = res and self.order == other.order and all( [self.__dict__[m] == other.__dict[m] for m in self.order]) return res def set_order(self, order, ordered_keylist=None): ordered_list = [o for o in order] ordered_list.sort() if ordered_keylist is None: ordered_keylist = list(self) ordered_keylist.sort() if sorted(ordered_list) != sorted(ordered_keylist): raise Climaf_Classes_Error( "Order list does not match dict keys list : %s and %s" % (repr(ordered_list), repr(ordered_keylist))) self.order = order def __setitem__(self, k, v): if not isinstance(k, six.string_types): raise Climaf_Classes_Error("Ensemble keys/labels must be strings") if not isinstance(v, cobject): raise Climaf_Classes_Error( "Ensemble members must be CliMAF objects") dict.__setitem__(self, k, v) if k not in self.order: self.order.append(k) if self.sortfunc: self.order = self.sortfunc(list(self)) self.crs = self.buildcrs() self.register() def items(self): return [(elt, self[elt]) for elt in self.order] def copy(self): e = cens(self, order=[m for m in self.order], sortfunc=self.sortfunc) return e def pop(self, key, default=None): if key in self: self.order.remove(key) return dict.pop(self, key, default) else: return default def clear(self): dict.clear(self) self.order = [] def update(self, it): dict.update(self, it) if isinstance(it, dict): for el, val in list(it.items()): self.order.append(el) else: for el, val in it: self.order.append(el) if self.sortfunc: self.order = self.sortfunc(list(self)) def buildcrs(self, crsrewrite=None, period=None): if crsrewrite is None and period is None: # A useful optimization, for multi-model studies rep = "cens({%s})" % ",".join( ["'%s':%s" % (m, self[m].crs) for m in self.order]) else: rep = "cens({%s})" % ",".join(["'%s':%s" % (m, self[m].buildcrs(crsrewrite=crsrewrite, period=period)) for m in self.order]) return rep def check(self): """ Check time consistency of first variable for each member of the ensemble : - check if first data time interval is consistent with dataset frequency - check if file data have a gap - check if period covered by data files actually includes the whole of dataset period Returns: True if period of data files included dataset period, False otherwise. Example: >>> # Ensemble with monthly frequency >>> j0=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1980') >>> j1=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1981') >>> ens=cens({'1980':j0, '1981':j1}) >>> res=ens.check() """ # Call 'check' method of 'cdataset' for each member of the ensemble rep = True for memb in self: # clogger.info('Member: %s'%memb) rep = self[memb].check() and rep return rep
[docs]def eds(first=None, **kwargs): """ Create a dataset ensemble using the same calling sequence as :py:func:`~climaf.classes.cdataset`, except that some facets are lists, which defines the ensemble members; these facets must be among the facets authorized for ensemble in the (single) project involved Example:: >>> cdef("frequency","monthly") ; cdef("project","CMIP5"); cdef("model","CNRM-CM5") >>> cdef("variable","tas"); cdef("period","1860") >>> ens=eds(experiment="historical", simulation=["r1i1p1","r2i1p1"]) Argument 'first' is used when multiple attributes are of list type, and tells which of these attributes appears first in member labels """ attval = processDatasetArgs(**kwargs) # Check that any facet/attribute of type 'list' (for defining an # ensemble) is OK for the project, and that there is at most one nlist = 0 listattr = [] for attr in attval: clogger.debug("Looking at attr %s for ensemble" % attr) if isinstance(attval[attr], list) and attr != "domain": if attr not in cprojects[attval["project"]].attributes_for_ensemble: raise Climaf_Classes_Error( "Attribute %s cannot be used for ensemble" % attr) clogger.debug("Attr %s is used for an ensemble" % attr) nlist += 1 listattr.append(attr) if len(listattr) < 1: raise Climaf_Classes_Error( "For building an ensemble, must have at least one attribute which is a list") # Create an ensemble of datasets if applicable d = dict() if len(listattr) == 1: # Simple case : only one attribute has multiple values (-> members) attr = listattr[0] for member in attval[attr]: attval2 = attval.copy() attval2[attr] = member d[member] = cdataset(**attval2) return cens(d, order=attval[attr]) else: # Must construct the cartesian product of all list-type attributes listattr2 = [att for att in listattr] if first is not None: listattr2.remove(first) att = first else: # Use the first attributes declared as ensemble-prone for the project for a in cprojects[attval["project"]].attributes_for_ensemble: print("Checkin listattribute", a, "against", listattr2) if a in listattr2: listattr2.remove(a) att = a break comb = [[(att, val)] for val in attval[att]] while len(listattr2) > 0: att = listattr2.pop(0) newcomb = [] for c in comb: for v in attval[att]: lst = [e for e in c] lst.append((att, v)) newcomb.append(lst) comb = newcomb orderl = list() for c in comb: attval2 = attval.copy() label = "" for att, val in c: attval2[att] = val label += val + "_" label = label[:-1] orderl.append(label) d[label] = cdataset(**attval2) return cens(d, order=orderl)
[docs]def fds(filename, simulation=None, variable=None, period=None, model=None): """ fds stands for FileDataSet; it allows to create a dataset simply by providing a filename and optionally a simulation name , a variable name, a period and a model name. For dataset attributes which are not provided, these defaults apply : - simulation : the filename basename (without suffix '.nc') - variable : the set of variables in the data file - period : the period actually covered by the data file (if it has time_bnds) - model : the 'model_id' attribute if it exists, otherwise : 'no_model' - project : 'file' (with separator = '|') - frequency : the value of global attribute fequency in datafile, if it exists The following restriction apply to such datasets : - functions :py:func:`~climaf.classes.calias` and :py:func:`~climaf.operators_derive.derive` cannot be used for project 'file' Results are unforeseen if all variables do not have the same time axis Examples : See :download:`data_file.py <../examples/data_file.py>` """ filename = os.path.expanduser(filename) if not os.path.exists(filename): raise Climaf_Classes_Error("File %s does no exist" % filename) # if model is None: model = model_id(filename) if simulation is None: simulation = os.path.basename(filename)[0:-3] # if variable is None: lvars = varsOfFile(filename) if len(lvars) == 0: raise Climaf_Classes_Error("No variable in file %s" % filename) variable = lvars.pop() for v in lvars: variable += "," + v else: lvars = variable.split(',') for v in lvars: if not fileHasVar(filename, v): raise Climaf_Classes_Error( "No variable %s in file %s" % (v, filename)) # try: fperiod = timeLimits(filename) except: fperiod = None if period is None: if fperiod is None: period = "fx" # raise Climaf_Classes_Error("Must provide a period for file %s " % filename) else: period = repr(fperiod) elif period != 'fx': if fperiod and not fperiod.includes(init_period(period)): raise Climaf_Classes_Error( "Max period from file %s is %s" % (filename, repr(fperiod))) # d = ds(project='file', model=model, simulation=simulation, variable=variable, period=period, path=filename) d.files = filename d.frequency = attrOfFile(filename, "frequency", "*") if period == 'fx': d.frequency = 'fx' return d
class ctree(cobject): def __init__(self, climaf_operator, script, *operands, **parameters): """ Builds the tree of a composed object, including a dict for outputs. """ if len(operands) == 0: raise Climaf_Classes_Error( "Cannot apply an operator to no operand") self.operator = climaf_operator self.script = script import copy if script is None: self.flags = False else: self.flags = copy.copy(script.flags) self.operands = operands if "period" in parameters: p = parameters["period"] if isinstance(p, cperiod): parameters["period"] = repr(p) if "variable" in parameters: self.variable = parameters["variable"] else: self.variable = None self.parameters = parameters for o in operands: if o and not isinstance(o, cobject): raise Climaf_Classes_Error( "operand " + repr(o) + " is not a CliMAF object") self.crs = self.buildcrs() self.outputs = dict() self.register() def buildcrs(self, crsrewrite=None, period=None): """ Builds the CRS expression representing applying OPERATOR on OPERANDS with PARAMETERS. Forces period downtree if provided A function for rewriting operand's CRS may be provided Special case : if operator is 'select' and sole operand is a dataset and there is no parameters, then return dataset's crs. This is the way to avoid repetitive data selection, when a data selection has been explictly cached """ first_op = self.operands[0] if self.operator in ['select', ] and len(self.operands) == 1 and isinstance(first_op, cdataset) and \ len(list(self.parameters)) == 0 and first_op.alias is None: if crsrewrite is None and period is None: return first_op.crs else: return first_op.buildcrs(crsrewrite=crsrewrite, period=period) # # General case # Operators are listed in alphabetical order; parameters too rep = list() # for op in [o for o in self.operands if o]: if crsrewrite is None and period is None and "crs" in dir(op): opcrs = op.crs else: opcrs = op.buildcrs(crsrewrite=crsrewrite, period=period) if crsrewrite: opcrs = crsrewrite(opcrs) rep.append(opcrs) # for par in [p for p in sorted(list(self.parameters)) if p not in ["member_label", ]]: value = self.parameters[par] if isinstance(value, six.string_types): value = str(value) rep.append("{}={}".format(par, repr(value))) rep = "%s(%s)" % (self.operator, ",".join(rep)) # clogger.debug("Create crs for ctree: %s" % rep) return rep def setperiod(self, period): """ modifies the period for all datasets of a tree""" self.erase() if isinstance(period, six.string_types): period = init_period(period) for op in self.operands: op.setperiod(period) self.crs = self.buildcrs(period=period) self.register() class scriptChild(cobject): def __init__(self, cobject, varname): """ Builds one of the child of a script call, which represents one output """ self.father = cobject self.varname = varname self.variable = varname self.crs = self.buildcrs() self.file = None self.register() def setperiod(self, period): self.erase() self.crs = self.buildcrs(period=period) self.register() def buildcrs(self, period=None, crsrewrite=None): if period is None: tmp = self.father.crs else: tmp = self.father.buildcrs(period=period) if crsrewrite: tmp = crsrewrite(tmp) return ".".join([tmp, self.varname]) def compare_trees(tree1, tree2, func, filter_on_operator=None): """ Recursively compares TREE1 and TREE2. For the nodes : compares operator and parameters; ensures that FILTER_ON_OPERATOR(operator) is not true For the leaves (datasets) : ensure that string representations of applying function FUNC to the pair of datasets returns the same value for all datasets pairs in the (parallel) trees Returns that common value : func(leave1,leave2)) or None FUNC cannot return None as a valid value """ if isinstance(tree1, cdataset) and isinstance(tree2, cdataset): rep = func(tree1, tree2) clogger.debug("Comparison of two datasets...") clogger.debug("... %s" % str(rep)) return rep elif isinstance(tree1, ctree) and isinstance(tree2, ctree): clogger.debug("Comparison of two trees...") if tree1.operator == tree2.operator: if filter_on_operator: if filter_on_operator(tree1.operator): clogger.debug("Operator filtered: %s" % tree1.operator) return None if tree1.parameters == tree2.parameters: clogger.debug("Parameters are coherent: %s" % tree1.parameters) rep = (reduce(lambda a, b: a if repr(a) == repr(b) else None, [compare_trees(op1, op2, func, filter_on_operator) for op1, op2 in zip(tree1.operands, tree2.operands)])) clogger.debug("... %s" % str(rep)) return rep else: clogger.debug("Parameters are not coherent: %s/%s" % (tree1.parameters, tree2.parameters)) return None elif isinstance(tree1, scriptChild) and isinstance(tree2, scriptChild): clogger.debug("Comparison of two scriptChild...") if tree1.varname == tree2.varname: clogger.debug("... varnames are coherent: %s" % tree1.varname) rep = compare_trees(tree1.father, tree2.father, func, filter_on_operator) clogger.debug("... %s" % str(rep)) return rep else: clogger.debug("... varnames are not coherent: %s/%s" % (tree1.varname, tree2.varname)) return None allow_errors_on_ds_call = True # False def allow_error_on_ds(allow=True): global allow_errors_on_ds_call allow_errors_on_ds_call = allow # print ('allow_errors_on_ds_call='+`allow_errors_on_ds_call`) def select_projects(**kwargs): """ If kwargs['project'] is a list (has multiple values), select_projects loops on the projects until it finds a file containing the aliased variable name. """ if 'project' not in kwargs: return kwargs else: p_list = kwargs['project'] if not isinstance(p_list, list): # p_list = [p_list] return kwargs for project in p_list: wkwargs = kwargs.copy() wkwargs.update(dict(project=project)) dat = cdataset(**wkwargs) files = dat.baseFiles() if files: clogger.info('-- File found for project ' + project + ' and ' + repr(wkwargs)) try: tmpVarInFile = varIsAliased(project, wkwargs['variable'])[0] except: tmpVarInFile = wkwargs['variable'] if fileHasVar(files.split(" ")[0], tmpVarInFile): clogger.info('-- Variable ' + tmpVarInFile + ' (aliased to variable ' + wkwargs['variable'] + ') found in ' + files.split(" ")[0]) return wkwargs else: clogger.info('-- Variable ' + tmpVarInFile + ' (aliased to variable ' + wkwargs['variable'] + ') was not found in ' + files.split(" ")[ 0]) # clogger.info('--> Try with another project than '+project+' or another variable name') else: clogger.info('-- No file found for project ' + project + ' and ' + repr(wkwargs)) return kwargs
[docs]def ds(*args, **kwargs): """Returns a dataset from its full Climate Reference Syntax string. Example :: >>> ds('CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last') Also a shortcut for :py:meth:`~climaf.classes.cdataset`, when used with with only keywords arguments. Example :: >>> cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',\ simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last') In that latter case, you may use e.g. period='last_50y' to get the last 50 years (or less) of data; but this will work only if no dataset's attribute is ambiguous. 'first_50y' also works, similarly; and also period='*'. You must refer to doc at : :py:meth:`~climaf.classes.cdataset` """ if len(args) > 1: raise Climaf_Classes_Error( "Must provide either only a string or only keyword arguments") # clogger.debug("Entering , with args=%s, kwargs=%s"%(`args`,`kwargs`)) if len(args) == 0: if 'period' in kwargs and isinstance(kwargs['period'], six.string_types): if kwargs['period'] == '*' and auto_resolve: clogger.info('Trying to solve for period for %s' % kwargs) if resolve_star_period(kwargs): # Case where there is a '*' only for period. kwargs has been modified clogger.info('Solved period = %s' % kwargs['period']) return cdataset(**select_projects(**kwargs)) else: match = re.match( "(?P<option>last|LAST|first|FIRST)_(?P<duration>[0-9]*)([yY])$", kwargs['period']) if match is not None: return resolve_first_or_last_years(copy.deepcopy(kwargs), match.group('duration'), option=match.group('option').lower()) return cdataset(**select_projects(**kwargs)) crs = args[0] results = [] for cproj in cprojects: try: dataset = cprojects[cproj].crs2ds(crs) except Climaf_Classes_Error: dataset = None if dataset: results.append(dataset) if len(results) > 1: e = "CRS expression %s is ambiguous among projects %s" % ( crs, repr(list(cprojects))) if allow_errors_on_ds_call: clogger.info(e) else: raise Climaf_Classes_Error(e) elif len(results) == 0: e = "CRS expression %s is not valid for any project in %s" % ( crs, repr(list(cprojects))) if allow_errors_on_ds_call: clogger.debug(e) else: raise Climaf_Classes_Error(e) else: rep = results[0] if rep.project == 'file': rep.files = rep.kvp["path"] return rep
[docs]def cfreqs(project, dic): """ Allow to declare a dictionary specific to ``project`` for matching ``normalized`` frequency values to project-specific frequency values Normalized frequency values are : decadal, yearly, monthly, daily, 6h, 3h, fx and annual_cycle When defining a dataset, any reference to a non-standard frequency will be left unchanged both in the datset's CRS and when trying to access corresponding datafiles Examples:: >>> cfreqs('CMIP5',{'monthly':'mon' , 'daily':'day' }) """ # frequencies[project] = dic
[docs]def crealms(project, dic): """ Allow to declare a dictionary specific to ``project`` for matching ``normalized`` realm names to project-specific realm names Normalized realm names are : atmos, ocean, land, seaice When defining a dataset, any reference to a non-standard realm will be left unchanged both in the datset's CRS and when trying to access corresponding datafiles Examples:: >>> crealms('CMIP5',{'atmos':'ATM' , 'ocean':'OCE' }) """ # realms[project] = dic
[docs]def calias(project, variable, fileVariable=None, scale=1., offset=0., units=None, missing=None, filenameVar=None, conditions=None): """ Declare that in ``project``, ``variable`` is to be computed by reading ``filevariable``, and applying ``scale`` and ``offset``; (see first example erai below) Arg ``conditions`` allows to restrict the effect, based on the value of some facets. It is a dictionary of applicable values or values'list, which keys are the facets (see example CMIP6 below) Arg ``filenameVar`` allows to tell which fake variable name should be used when computing the filename for this variable in this project (for optimisation purpose); (see seconf example erai below) Can tell that a given constant must be interpreted as a missing value (see 4th example, EM, below) ``variable`` may be a list. In that case, ``fileVariable`` and ``filenameVar``, if provided, should be parallel lists `` variable`` can be a comma separated list of variables, in which case this tells how variables are grouped in files (it make sense to use filenameVar in that case, as this is a way to provide the label which is unique to this grouping of variable; scale, offset and missing args must be the same for all variables in that case Example :: >>> calias('erai','tas_degC','t2m',scale=1., offset=-273.15) # scale and offset may be provided >>> calias('CMIP6','evspsbl',scale=-1., conditions={ 'model':'CanESM5' , 'version': ['20180103', '20190112'] }) >>> calias('erai','tas','t2m',filenameVar='2T') >>> calias('EM',[ 'sic', 'sit', 'sim', 'snd', 'ialb', 'tsice'], missing=1.e+20) >>> calias('data_CNRM','so,thetao',filenameVar='grid_T_table2.2') NB: A wrapper with same name of this function is defined in :py:func:`climaf.driver.calias` and it is the one which is exported by module climaf.api. It allows to use a list of variable. """ if not fileVariable: fileVariable = variable if not filenameVar: filenameVar = None if project not in cprojects: raise Climaf_Classes_Error("project %s is not known" % project) if project not in aliases: aliases[project] = dict() if not isinstance(variable, list): variable = [variable] if not isinstance(filenameVar, list): filenameVar = [filenameVar] if not isinstance(fileVariable, list): fileVariable = [fileVariable] if not isinstance(units, list): units = [units] if conditions is not None: for kw in conditions: if kw not in cprojects[project].facets: raise Climaf_Classes_Error( "Keyword \"%s\" is not allowed for project %s" % (kw, project)) for v, u, fv, fnv in zip(variable, units, fileVariable, filenameVar): aliases[project][v] = (fv, scale, offset, u, fnv, missing, conditions)
def varIsAliased(project, variable): """ Return a n-uplet (fileVariable, scale, offset, filevarName, missing,conditions) defining how to compute a 'variable' which is not in files, for the 'project' """ if project in aliases and variable in aliases[project]: return aliases[project][variable] def cmissing(project, missing, *kwargs): """ Declare that in 'project', a given constant must be interpreted as a missing value, for a given set of project's attributes values Such a declaration must follow all ``calias`` declarations for the same project """ pass # raise NotImplementedError() class cpage_all(cobject): def __init__(self, fig_lines=None, orientation=None, page_width=1000., page_height=1500., title="", x=0, y=2): """ Common tools for classes cpage and cpage_pdf. """ if fig_lines is None: raise Climaf_Classes_Error("fig_lines must be provided") if orientation is not None: if orientation in ['portrait', ]: page_width = 1000. page_height = 1500. elif orientation in ['landscape', ]: page_width = 1500. page_height = 1000. else: raise Climaf_Classes_Error("if set, orientation must be 'portrait' or 'landscape' (not %s)" % orientation) self.page_width = page_width self.page_height = page_height self.title = title self.x = x self.y = y def check_figs_list(self, fig_lines, widths, heights): if not widths: widths = [round(1. / len(fig_lines[0]), 2)] * len(fig_lines[0]) self.widths = widths if not heights: heights = [round(1. / len(fig_lines), 2)] * len(fig_lines) self.heights = heights if not all(isinstance(fig_line, list) for fig_line in fig_lines): raise Climaf_Classes_Error( "each element in fig_lines must be a list of figures") if not all([len(fig_lines[i]) == len(self.widths) for i in range(1, len(fig_lines))]): raise Climaf_Classes_Error("each line in fig_lines must have same dimension as widths %d" % len(self.widths)) if len(fig_lines) != len(self.heights): raise Climaf_Classes_Error( "fig_lines must have same size than heights") self.fig_lines = fig_lines def check_figs_cens(self, fig_lines, widths, heights): figs = [fig_lines[fig] for fig in fig_lines.order] if not widths: widths = [1., ] self.widths = widths if not heights: heights = [round(1. / len(figs), 2)] * len(figs) self.heights = heights if len(figs) < len(heights) * len(widths): figs.extend([None] * (len(heights) * len(widths) - len(figs) + 1)) self.fig_lines = [ figs[x: x + len(widths)] for x in range(0, len(heights) * len(widths), len(widths))] def buildcrs(self, crsrewrite=None, period=None): rep = list() for line in self.fig_lines: if crsrewrite is not None: rep.append("[%s]" % ",".join([f.buildcrs(crsrewrite=crsrewrite) if f is not None else repr(f) for f in line])) else: rep.append("[%s]" % ",".join( [f.crs if f is not None else repr(f) for f in line])) return rep
[docs]class cpage(cpage_all): def __init__(self, fig_lines=None, widths=None, heights=None, fig_trim=True, page_trim=True, format="png", orientation=None, page_width=1000., page_height=1500., title="", x=0, y=26, ybox=50, pt=24, font="Times-New-Roman", gravity="North", background="white", insert="", insert_width=200): """ Builds a CliMAF cpage object, which represents an array of figures (output: 'png' or 'pdf' figure) Args: fig_lines (a list of lists of figure objects or an ensemble of figure objects): each sublist of 'fig_lines' represents a line of figures widths (list, optional): the list of figure widths, i.e. the width of each column. By default, if fig_lines is: - a list of lists: spacing is even - an ensemble: one column is used heights (list, optional): the list of figure heights, i.e. the height of each line. By default spacing is even fig_trim (logical, optional): to turn on/off triming for all figures. It removes all the surrounding extra space of figures in the page, either True (default) or False page_trim (logical, optional): to turn on/off triming for the page. It removes all the surrounding extra space of the page, either True (default) or False format (str, optional) : graphic output format, either 'png' (default) or 'pdf'(not recommended) page_width (float, optional) : width resolution of resultant image; CLiMAF default: 1000. page_height (float, optional) : height resolution of resultant image; CLiMAF default: 1500. orientation (str,optional): if set, it supersedes page_width and page_height with values 1000*1500 (for portrait) or 1500*1000 (for landscape) title (str, optional) : append a label below or above (depending on optional argument 'gravity') figures in the page. insert(str,optional) : the filename for an image to insert (centered at the bottom) insert_width(int,optional) : the width at which the inserted image will be scaled (in pixels) If title is activated: - x, y (int, optional): annotate the page with text. x is the offset towards the right from the upper left corner of the page, while y is the offset upward or the bottom according to the optional argument 'gravity' (i.e. 'South' or 'North' respectively); CLiMAF default: x=0, y=26. For more details, see: http://www.imagemagick.org/script/command-line-options.php?#annotate ; where x and y correspond respectively to tx and ty in ``-annotate {+-}tx{+-}ty text`` - ybox (int, optional): width of the assigned box for title; CLiMAF default: 50. For more details, see: http://www.imagemagick.org/script/command-line-options.php?#splice - pt (int, optional): font size of the title; CLiMAF default: 24 - font (str, optional): set the font to use when creating title; CLiMAF default: 'Times-New-Roman'. To print a complete list of fonts, use: 'convert -list font' - gravity (str, optional): the choosen direction specifies where to position title; CLiMAF default: 'North'. For more details, see: http://www.imagemagick.org/script/command-line-options.php?#gravity - background (str, optional): background color of the assigned box for title; default: 'white'. To print a complete list of color names, use: 'convert -list color' Example: Using no default value, to create a page with 2 columns and 3 lines:: >>> tas_ds=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', period='1980-1981') >>> tas_avg=time_average(tas_ds) >>> fig=plot(tas_avg,title='title') >>> my_page=cpage([[None, fig],[fig, fig],[fig,fig]], widths=[0.2,0.8], ... heights=[0.33,0.33,0.33], fig_trim=False, page_trim=False, ... format='pdf', title='Page title', x=10, y=20, ybox=45, ... pt=20, font='Utopia', gravity='South', background='grey90', ... page_width=1600., page_height=2400.) """ super(cpage, self).__init__(fig_lines=fig_lines, orientation=orientation, page_width=page_width, page_height=page_height, title=title, x=x, y=y) self.fig_trim = fig_trim self.page_trim = page_trim self.format = format self.ybox = ybox self.pt = pt self.font = font self.gravity = gravity self.background = background self.insert = insert self.insert_width = insert_width if self.ybox < (self.y + self.pt): raise Climaf_Classes_Error( "Title exceeds the assigned box: ybox<y+pt") if not isinstance(fig_lines, (list, cens)): raise Climaf_Classes_Error("fig_lines must be a CliMAF ensemble or a list " "of lists (each representing a line of figures)") elif isinstance(fig_lines, list): self.check_figs_list(fig_lines=fig_lines, widths=widths, heights=heights) # case of an ensemble (cens) if heights and widths are not provided elif not widths and not heights: self.scatter_on_page([fig_lines[label] for label in fig_lines.order]) else: # case of an ensemble (cens) with heights or widths provided self.check_figs_cens(fig_lines=fig_lines, widths=widths, heights=heights) # self.crs = self.buildcrs() def scatter_on_page(self, figs): """ Try to optimize nb of columns and lines, based on figs list length """ n = len(figs) if n in range(1, 4): nx, ny = 1, n elif n == 4: nx, ny = 2, 2 elif n in range(5, 7): nx, ny = 2, 3 elif n in range(7, 9): nx, ny = 2, 4 elif n in range(9, 13): nx, ny = 3, 4 elif n in range(13, 16): nx, ny = 3, 5 elif n in range(16, 21): nx, ny = 4, 5 elif n in range(21, 25): nx, ny = 4, 6 elif n in range(25, 36): nx, ny = 5, 7 elif n in range(36, 49): nx, ny = 6, 8 else: raise Climaf_Classes_Error("Too many figures in page") figs.extend([None] * (nx * ny - len(figs) + 1)) lines = [figs[x: x + nx] for x in range(0, nx * ny, nx)] self.fig_lines = lines self.widths = [round(1. / nx, 2)] * nx self.heights = [round(1. / ny, 2)] * ny def buildcrs(self, crsrewrite=None, period=None): rep = super(cpage, self).buildcrs(crsrewrite=crsrewrite, period=period) param = "%s,%s, fig_trim='%s', page_trim='%s', format='%s', page_width=%d, page_height=%d" % \ (repr(self.widths), repr(self.heights), self.fig_trim, self.page_trim, self.format, self.page_width, self.page_height) if isinstance(self.title, six.string_types) and len(self.title) != 0: param = "%s, title='%s', x=%d, y=%d, ybox=%d, pt=%d, font='%s', gravity='%s', backgroud='%s', " \ "insert='%s', insert_width=%d" % (param, self.title, self.x, self.y, self.ybox, self.pt, self.font, self.gravity, self.background, self.insert, self.insert_width) rep = "cpage([%s],%s)" % (",".join(rep), param) return rep
[docs]class cpage_pdf(cpage_all): def __init__(self, fig_lines=None, widths=None, heights=None, orientation=None, page_width=1000., page_height=1500., scale=1., openright=False, title="", x=0, y=2, titlebox=False, pt="Huge", font="\\familydefault", background="white"): """ Builds a CliMAF cpage_pdf object, which represents an array of figures (output: 'pdf' figure). Figures are automatically centered in the page using 'pdfjam' tool; see http://www2.warwick.ac.uk/fac/sci/statistics/staff/academic-research/firth/software/pdfjam Args: fig_lines (a list of lists of figure objects or an ensemble of figure objects): each sublist of 'fig_lines' represents a line of figures widths (list, optional): the list of figure widths, i.e. the width of each column. By default, if fig_lines is: - a list of lists: spacing is even - an ensemble: one column is used heights (list, optional): the list of figure heights, i.e. the height of each line. By default spacing is even page_width (float, optional): width resolution of resultant image; CLiMAF default: 1000. page_height (float, optional): height resolution of resultant image; CLiMAF default: 1500. orientation (str,optional): if set, it supersedes page_width and page_height with values 1000*1500 (for portrait) or 1500*1000 (for landscape) scale (float, optional): to scale all input pages; default:1. openright (logical, optional): this option puts an empty figure before the first figure; default: False. For more details, see: http://ftp.oleane.net/pub/CTAN/macros/latex/contrib/pdfpages/pdfpages.pdf title (str, optional): append a label in the page. If title is activated, it is by default horizontally centered: - x (int, optional): title horizontal shift (in cm). - y (int, optional): vertical shift from the top of the page (in cm); only positive (down) values have an effect, default=2 cm - titlebox (logical, optional): set it to True to frame the text in a box, frame color is 'black' - pt (str, optional): title font size; CLiMAF default: 'Huge' (corresponding to 24 pt). You can set or not a backslash before this argument. - font (str, optional): font abbreviation among available LaTex fonts; default: '\\\\\\\\familydefault'. - background (str, optional): frame fill background color; among LaTex 'fcolorbox' colors; default: 'white'. Left and right margins are set to 2cm. Example: Using no default value, to create a PDF page with 2 columns and 3 lines:: >>> tas_ds=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', period='1980-1981') >>> tas_avg=time_average(tas_ds) >>> fig=plot(tas_avg,title='title',format='pdf') >>> crop_fig=cpdfcrop(fig) >>> my_pdfpage=cpage_pdf([[crop_fig,crop_fig],[crop_fig, crop_fig],[crop_fig,crop_fig]], ... widths=[0.2,0.8], heights=[0.33,0.33,0.33], page_width=800., page_height=1200., ... scale=0.95, openright=True, title='Page title', x=-5, y=10, titlebox=True, ... pt='huge', font='ptm', background='yellow') # Font name is 'Times' """ super(cpage_pdf, self).__init__(fig_lines=fig_lines, orientation=orientation, page_width=page_width, page_height=page_height, title=title, x=x, y=y) self.scale = scale self.openright = openright self.titlebox = titlebox self.pt = pt self.font = font self.background = background if not isinstance(fig_lines, (list, cens)): raise Climaf_Classes_Error("fig_lines must be a CliMAF ensemble or a list " "of lists (each representing a line of figures)") elif isinstance(fig_lines, list): self.check_figs_list(fig_lines=fig_lines, widths=widths, heights=heights) else: # case of an ensemble (cens) self.check_figs_cens(fig_lines=fig_lines, widths=widths, heights=heights) # self.crs = self.buildcrs() def buildcrs(self, crsrewrite=None, period=None): rep = super(cpage_pdf, self).buildcrs( crsrewrite=crsrewrite, period=period) param = "%s,%s, page_width=%d, page_height=%d, scale=%.2f, openright='%s'" % \ (repr(self.widths), repr(self.heights), self.page_width, self.page_height, self.scale, self.openright) if isinstance(self.title, six.string_types) and len(self.title) != 0: param = "%s, title='%s', x=%d, y=%d, titlebox='%s', pt='%s', font='%s', backgroud='%s'" % \ (param, self.title, self.x, self.y, self.titlebox, self.pt, self.font, self.background) rep = "cpage_pdf([%s],%s)" % (",".join(rep), param) return rep
def guess_projects(crs): """ Return the list of projects involved in the datasets involved in a CRS expression. """ def guess_project(crs): """ Guess which is the project name for a dataset's crs, with minimum assumption on the separator used in the project """ separators = [r'.', r'_', r'£', r'$', r'@', r'_', r'|', r'&', r"-", r"=", r"^", r";", r":", r"!", r'§', r'/', r'.', r'ø', r'+', r'°'] counts = dict() for sep in separators: counts[sep] = crs.count(sep) # Assume that the highest count gives the right separator max = 0 for key in counts: if counts[key] >= max: max = counts[key] sep = key return crs[1:crs.find(sep)] return list(map(guess_project, re.findall(r"ds\(([^)]*)", crs))) def browse_tree(cobj, func, results): """ Browse a CliMAF object's tree, accumulating in 'results' the values returned by 'func' on each tree node or leave (if they are not None) """ if isinstance(cobj, cdataset) or isinstance(cobj, cdummy): res = func(cobj) if res: partial.append(res) elif isinstance(cobj, ctree): res = func(cobj.operator) if res: partial.append(res) for op in cobj.operands: browse_tree(op, func, partial) elif isinstance(cobj, scriptChild): browse_tree(cobj.father, func, partial) elif isinstance(cobj, cpage): for line in cobj.fig_lines: list(map(lambda x: browse_tree(x, func, partial), line)) elif cobj is None: return else: clogger.error("Cannot yet handle object :%s", repr(cobj)) return def domainOf(cobject): """ Returns a domain for a CliMAF object : if object is a dataset, returns its domain, otherwise returns domain of first operand """ if isinstance(cobject, cdataset): if isinstance(cobject.domain, list): rep = "" for coord in cobject.domain[0:-1]: rep = r"%s%d," % (rep, coord) rep = "%s%d" % (rep, cobject.domain[-1]) return rep else: if cobject.domain == "global": return "" else: return cobject.domain elif isinstance(cobject, ctree): clogger.debug( "For now, domainOf logic for scripts output is basic (1st operand) - TBD") return domainOf(cobject.operands[0]) elif isinstance(cobject, scriptChild): clogger.debug( "For now, domainOf logic for scriptChilds is basic - TBD") return domainOf(cobject.father) elif isinstance(cobject, cens): clogger.debug( "for now, domainOf logic for 'cens' objet is basic (1st member)- TBD") return domainOf(list(cobject.values())[0]) elif cobject is None: return "none" else: if cobject != "": clogger.error("Unkown class for argument " + repr(cobject)) def varOf(cobject): return attributeOf(cobject, "variable") def modelOf(cobject): return attributeOf(cobject, "model") def simulationOf(cobject): return attributeOf(cobject, "simulation") def experimentOf(cobject): return attributeOf(cobject, "experiment") def realizationOf(cobject): return attributeOf(cobject, "realization") def projectOf(cobject): return attributeOf(cobject, "project") def realmOf(cobject): return attributeOf(cobject, "realm") def gridOf(cobject): return attributeOf(cobject, "grid") def attributeOf(cobject, attrib): """ Returns the attribute for a CliMAF object : if object is a dataset, returns its attribute property, otherwise returns attribute of first operand """ if isinstance(cobject, cdataset): val = getattr(cobject, attrib, None) if val is not None: return val else: return cobject.kvp.get(attrib) elif isinstance(cobject, cens): return attributeOf(list(cobject.values())[0], attrib) elif getattr(cobject, attrib, None): value = getattr(cobject, attrib) clogger.debug("Find value for object's %s... %s" % (attrib, value)) return value elif isinstance(cobject, ctree): clogger.debug("for now, varOf logic is basic (1st operand) - TBD") # TODO: Check which operands in the correct one value = getattr(cobject, attrib, None) if value is None: value = attributeOf(cobject.operands[0], attrib) clogger.debug("Find value for current first operand... %s" % value) return value else: clogger.debug("Find value for current object... %s" % value) return value elif isinstance(cobject, cdummy): return "dummy" elif isinstance(cobject, cpage) or isinstance(cobject, cpage_pdf): return None elif cobject is None: return '' else: raise Climaf_Classes_Error( "Unknown class for argument " + repr(cobject)) def timePeriod(cobject): """ Returns a time period for a CliMAF object : if object is a dataset, returns its time period, otherwise analyze complex case and reurns something sensible """ if isinstance(cobject, cdataset): return cobject.period elif isinstance(cobject, ctree): clogger.debug("timePeriod : processing %s,operands=%s" % (cobject.script, repr(cobject.operands))) if cobject.script.flags.doCatTime and len(cobject.operands) > 1: clogger.debug( "Building composite period for results of %s" % cobject.operator) periods = [timePeriod(op) for op in cobject.operands] merged_period = merge_periods(periods) if len(merged_period) > 1: raise Climaf_Driver_Error("Issue when time assembling with %s, periods are not consecutive : %s" % (cobject.operator, merged_period)) return merged_period[0] else: clogger.debug( "timePeriod logic for script is 'choose 1st operand' %s" % cobject.script) return timePeriod(cobject.operands[0]) elif isinstance(cobject, scriptChild): clogger.debug( "for now, timePeriod logic for scriptChilds is basic - TBD") return timePeriod(cobject.father) elif isinstance(cobject, cens): clogger.debug( "for now, timePeriod logic for 'cens' objet is basic (1st member)- TBD") return timePeriod(list(cobject.values())[0]) else: return None # clogger.error("unkown class for argument "+`cobject`) def resolve_star_period(kwargs): # If dict 'kwargs' has only kw 'period' with value '*', resolve # corresponding dataset on period, and sets kwargs['period'] # accordingly (if dataset has only one corresponding period) if 'period' in kwargs and kwargs['period'] == '*' and \ not any(["*" in kwargs[k] or "?" in kwargs[k] for k in kwargs if k != 'period']): explorer = cdataset(** select_projects(** kwargs)) attributes = explorer.explore(option='choices') if 'period' in attributes: periods = attributes['period'] if len(periods) == 1: kwargs['period'] = str(periods[0]) return True return False def resolve_first_or_last_years(kwargs, duration, option="last"): # Returns a dataset after translation of period like 'last_50y' kwargs['period'] = '*' explorer = ds(**kwargs) attributes = explorer.explore(option='choices') if 'period' in attributes: periods = attributes['period'] if option == 'last': period = periods[-1] kwargs['period'] = lastyears(period, int(duration)) if option == 'first': period = periods[0] kwargs['period'] = firstyears(period, int(duration)) else: kwargs['period'] = '*' explorer = ds(**kwargs) return explorer.explore('resolve') def test(): # clogger.basicConfig(level=clogger.DEBUG) # clogger.basicConfig(format='"%(asctime)s [%(funcName)s: %(filename)s,%(lineno)d] %(message)s : %(levelname)s', # level=clogger.DEBUG) cdef("project", "CMIP5") # cdef("project","PR6") cdef("model", "CNRM-CM5") cdef("experiment", "historical") cdef("simulation", "r1i1p1") cdef("period", "197901-198012") cdef("domain", "global") # tos = cdataset(experiment="rcp85", variable="tos", period="19790101-19790102") tr = ctree("operator", tos, para1="val1", para2="val2") print(tr) # tos.pr() # # ds1=Dataset(period="1850-2012") # genericDataSets(ds1.crs) # ds2=Dataset(project="CMIP3") # ex="toto("+ ds1.crs + "," + ds2.crs # print genericDataSets(ex) # print firstGenericDataSet(ex) def t2(): p = period("1984-1984") if __name__ == "__main__": test()