Source code for climaf.classes

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
 Basic types and syntax for a CLIMAF Reference Syntax interpreter and driver
 This is a first protoype, where the interpreter is Python itself


"""
# Created : S.Sénési - 2014

from __future__ import print_function, division, unicode_literals, absolute_import

import re
import string
import copy
import os.path
from collections import defaultdict
from functools import reduce, partial
import six
import warnings
import json
import shutil
import glob
import xarray as xr
from datetime import timedelta

from env.environment import *
from climaf.utils import Climaf_Classes_Error, remove_keys_with_same_values
from climaf.dataloc import isLocal, getlocs, selectFiles, dataloc
from climaf.period import init_period, cperiod, merge_periods, intersect_periods_list,\
    lastyears, firstyears, group_periods, freq_to_minutes
from env.clogging import clogger
from climaf.netcdfbasics import fileHasVar, varsOfFile, attrOfFile, timeLimits, model_id

warnings.filterwarnings("ignore", category=DeprecationWarning)

# Should function ds() try to resolve for period=*
auto_resolve = False


[docs]def derive_cproject(name, parent_name, new_project_facets=list()):
    """
    Create a new project named 'name' from the project 'parent_name' adding the facets listed in 'new_project_facets'
    if specified. Also derive the location list from the parent project.

    :param name: name of the new project
    :param parent_name: name of the source project
    :param new_project_facets: the list of the facets to add to the new project (could be already present in parent).
    :return: the new project
    """
    if name in cprojects or any([elt.project == name for elt in locs]):
        raise Climaf_Classes_Error(
            "Could not derive a project from an existing one if it already exists: %s." % name)
    else:
        cprojects[parent_name].derive(name, new_project_facets)
        [elt.derive(name) for elt in locs if elt.project == parent_name]


[docs]class cproject(object):
    def __init__(self, name, *args, **kwargs):
        """
        Declare a project and its facets/attributes in CliMAF (see below)

        Args:
          name (string) : project name;
           do not use the chosen separator in it (see below)
          args (strings) : attribute names;
           they are free; do not use the chosen separator in it (see below); **CliMAF
           anyway will add attributes :
           project, simulation, variable, period, and domain**
          kwargs (dict) :
           can only be used with keywords :

            - ``sep`` or ``separator`` for indicating the symbol separating
              facets in the dataset syntax. Defaults to ".".
            - ``ensemble`` for declaring a list of attribute
              names which are allowed for defining an ensemble in
              this project ('simulation' is automatically allowed)
            - ``use_frequency`` to declare that the frequency can not be derived from time bounds of the file.
              In this case the facet ``frequency`` is mandatory for the project and a default value must be defined.

        Returns : a cproject object, which string representation is
        the pattern later used in CliMAF Refreence Syntax for
        representing datasets in this project

        A 'cproject' is the definition of a set of attributes, or
        facets, which values will completely define a 'dataset' as
        managed by CliMAF. Its name is one of the possible keys
        for describing data locations (see
        :py:class:`~climaf.dataloc.dataloc`)

        For instance, cproject CMIP5, after its Data Reference Syntax,
        has attributes :
        model, simulation (used for rip), experiment, variable, frequency, realm, table, version


        **A number of projects are built-in**. See :py:mod:`~climaf.projects`

        A dataset in a cproject declared as ::

        >>> cproject('MINE','myfreq','myfacet',sep='_')

        will return ::

          ${project}_${simulation}_${variable}_${period}_${domain}_${myfreq}_${myfacet}

        and will have datasets represented as  e.g.::

          'MINE_hist_tas_[1980-1999]_global_decadal_gabu'

        while an example for built-in cproject CMIP5 will be::

          'CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last'

        The attributes list should include all facets which are useful
        for distinguishing datasets from each other, and for computing
        datafile pathnames in the 'generic' organization (see
        :py:class:`~climaf.dataloc.dataloc`)

        A default value for a given facet can be specified, by providing a tuple
        (facet_name,default_value) instead of the facet name. This default value is
        however of lower priority than the value set using :py:func:`~climaf.classes.cdef`

        A project can be declared as having non-standard variable
        names in datafiles, or variables that should undergo re-scaling; see
        :py:func:`~climaf.classes.calias`

        A project can be declared as having non-standard frequency names (this is
        used when accessing datafiles); see :py:func:`~climaf.classes.cfreqs`)

        """
        if name in cprojects:
            clogger.warning("Redefining project %s" % name)
        self.project = name
        #
        self.facets = []
        self.facet_defaults = dict()
        self.facet_authorized_values = dict()
        forced = ['project', 'simulation', 'variable', 'period', 'domain']
        for f in forced:
            self.facets.append(f)
        for a in args:
            if isinstance(a, tuple):
                facet_name, facet_default = a
                self.facet_defaults[facet_name] = facet_default
            else:
                facet_name = a
            if facet_name not in forced:
                self.facets.append(facet_name)
        #
        self.separator = "."
        if "separator" in kwargs:
            self.separator = kwargs['separator']
        if "sep" in kwargs:
            self.separator = kwargs['sep']
        if self.separator == ",":
            raise Climaf_Classes_Error(
                "Character ',' is forbidden as a project separator")
        cprojects[name] = self
        self.crs = ""
        # Build the pattern for the datasets CRS for this cproject
        for f in self.facets:
            self.crs += "${%s}%s" % (f, self.separator)
        self.crs = self.crs[:-1]
        # Create an attribute hodling the list of facets which are allowed
        # for defining an ensemble, and put a first facet there
        self.attributes_for_ensemble = ['simulation']
        if 'ensemble' in kwargs:
            self.attributes_for_ensemble.extend(kwargs["ensemble"])
        self.use_frequency = kwargs.get("use_frequency", False)

    def derive(self, new_name, new_facets=list()):
        """
        Derive a new project from this one with name 'new_name' and possibly new facets listed in 'new_facets'
        :param new_name: name of the newly created project
        :param new_facets: list of the new facets
        :return: the new project
        """
        args = list()
        for a in self.facets:
            if a in self.facet_defaults:
                args.append((a, self.facet_defaults[a]))
            else:
                args.append(a)
        args.extend(new_facets)
        kwargs = dict()
        kwargs["separator"] = self.separator
        if len(self.attributes_for_ensemble) > 1:
            kwargs["ensemble"] = self.attributes_for_ensemble[1:]
        return cproject(new_name, *args, **kwargs)

    def __repr__(self):
        return self.crs

    def crs2ds(self, crs):
        """
        Try to interpret string ``crs`` as the CRS of a dataset for
        the cproject. Return the dataset if OK
        """
        fields = crs.split(self.separator)
        if len(fields) == len(self.facets):
            if fields[0] == self.project:
                kvp = dict()
                for i, f in enumerate(self.facets):
                    kvp[f] = fields[i]
                return cdataset(**kvp)

    def build_cvalid_from_tree_of_files(self, project_name=None):
        if project_name is None:
            project_name = self.project
        # Find out the directory paths to be checked (other keys can be considered by hand)
        project_locs = [os.path.dirname(loc) for loc in locs if loc.project in [
            project_name, ]]
        # Do not consider root
        project_locs = [loc.replace(
            "${root}", self.facet_defaults["root"]) for loc in project_locs]
        facets_regexp = re.compile(r"\$\{(?P<facet>[^\{^\}]+)\}")
        list_facets = list()
        for loc in project_locs:
            list_facets.append([m.groupdict()["facet"]
                               for m in facets_regexp.finditer(loc)])
        dict_facets = defaultdict(list)
        for (loc, facets) in zip(project_locs, list_facets):
            loc_list = [loc, ]
            tmp_loc_list = list()
            for facet in facets:
                facet_reg = r"\$\{%s\}" % facet
                facet_regexp = re.compile(facet_reg)
                for tmp_loc in loc_list:
                    match = facet_regexp.match(tmp_loc)
                    if match is not None:
                        begin_tmp_loc = tmp_loc[:tmp_loc.find(
                            os.sep, match.end())]
                        begin_tmp_loc = begin_tmp_loc.replace(facet_reg, "*")
                        list_values = glob.glob(begin_tmp_loc)
                        list_values = [val.replace(tmp_loc[:match.start()], "")[:len(tmp_loc) - match.end()]
                                       for val in list_values]
                        dict_facets[facet].extend(list_values)
                        tmp_loc_list.extend(
                            [tmp_loc.replace(facet_reg, val) for val in list_values])
                    else:
                        tmp_loc_list.append(tmp_loc)
                loc_list, tmp_loc_list = tmp_loc_list, list()
        for key in dict_facets:
            dict_facets[key] = sorted(list(set(dict_facets[key])))
        return dict_facets

    def build_cvalid_conf_file_name(self, project_name=None, choice="both"):
        """
        Build cvalid conf file name from project name.
        :param project_name: name of the default project to be used
        :param choice: where to look for the conf file, either "user" (in $HOME/.climaf), "default" (in climaf/projects)
                       or "both"
        :return: a list of possible conf file names
        """
        if project_name is None:
            project_name = self.project
        cvalid_user_conf_file = os.sep.join(
            [os.environ["HOME"], ".climaf", "cvalid_{}.json".format(project_name)])
        cvalid_default_conf_file = os.sep.join([os.path.dirname(os.path.abspath(__file__)), "project",
                                                "cvalid_{}.json".format(project_name)])
        if choice in ["both", ]:
            return [cvalid_user_conf_file, cvalid_default_conf_file]
        elif choice in ["user", ]:
            return [cvalid_user_conf_file, ]
        elif choice in ["default", ]:
            return [cvalid_default_conf_file, ]
        else:
            raise ValueError("Unknown value for choice: %s" % choice)

    def initialize_cvalid_values(self, project_name=None):
        """
        Initialize cvalid values for the current project with values defined in a json file, either in the CliMAF'
        project directory or in the climaf conf directory.
        :param project_name: name of the project to build the conf file name
        """
        cvalid_conf_files = self.build_cvalid_conf_file_name(
            project_name=project_name, choice="both")
        cvalid_conf_files = [f for f in cvalid_conf_files if os.path.isfile(f)]
        if len(cvalid_conf_files) > 0:
            cvalid_conf_file = cvalid_conf_files[0]
            content = json.load(cvalid_conf_file)
            for key in content:
                self.cvalid(key, content[key])

    def initialize_user_cvalid_values(self, project_name=None, from_tree_of_files=False, force=False):
        """
        Initialize the user's configuration file for project project_name.
        If the configuration file already exists, do nothing except if force=True.
        If from_tree_of_file=True, read the tree of files to find out the possible values (not implemented yet).
        :param project_name: name of the default project
        :param from_tree_of_files: boolean, should the tree of file be read?
        :param force: boolean, should an existing user conf file be bypassed?
        """
        cvalid_user_conf_file = self.build_cvalid_conf_file_name(
            project_name=project_name, choice="user")[0]
        cvalid_default_conf_file = self.build_cvalid_conf_file_name(
            project_name=project_name, choice="default")[0]
        if os.path.isfile(cvalid_user_conf_file):
            if force:
                clogger.warning("User's cvalid configuration file %s already exists and force=True, replace it" %
                                cvalid_user_conf_file)
                os.remove(cvalid_user_conf_file)
                if from_tree_of_files:
                    content = self.build_cvalid_from_tree_of_files(
                        project_name)
                    json.dump(content, cvalid_user_conf_file)
                elif os.path.isfile(cvalid_default_conf_file):
                    if not os.path.isdir(os.path.dirname(cvalid_user_conf_file)):
                        os.makedirs(os.path.dirname(cvalid_user_conf_file))
                    shutil.copyfile(cvalid_default_conf_file,
                                    cvalid_user_conf_file)
                else:
                    clogger.error(
                        "Default cvalid configuration file %s does not exist" % cvalid_default_conf_file)
            else:
                clogger.warning("User's cvalid configuration file %s already exists and force=False, do nothing." %
                                cvalid_user_conf_file)
        elif cvalid_default_conf_file:
            if not os.path.isdir(os.path.dirname(cvalid_user_conf_file)):
                os.makedirs(os.path.dirname(cvalid_user_conf_file))
            shutil.copyfile(cvalid_default_conf_file, cvalid_user_conf_file)
        else:
            clogger.error(
                "Default cvalid configuration file %s does not exist" % cvalid_default_conf_file)

    def cvalid(self, attribute, value=None):
        """Set or get the list of valid values for a CliMAF dataset attribute
        or facet (such as e.g. 'model', 'simulation' ...). Useful
        e.g. for constraining those data files which match a dataset
        definition

        Example::

        >>> cvalid('grid' , [ "gr", "gn", "gr1", "gr2" ])

        """
        #
        if attribute not in self.facets:
            raise Climaf_Classes_Error(
                "project '%s' doesn't use facet '%s'" % (self.project, attribute))
        if value is None:
            return self.facet_authorized_values.get(attribute, None)
        else:
            self.facet_authorized_values[attribute] = value


[docs]def cdef(attribute, value=None, project=None):
    """
    Set or get the default value for a CliMAF dataset attribute
    or facet (such as e.g. 'model', 'simulation' ...), for use by
    next calls to :py:class:`~climaf.classes.cdataset()` or to
    :py:func:`~climaf.classes.ds`

    Argument 'project' allows to restrict the use/query of the default
    value to the context of the given 'project'. On can also set the
    (global) default value for attribute 'project'

    There is no actual check that 'attribute' is a valid keyword for
    a call to ``ds`` or ``cdataset``

    Example::

    >>> cdef('project','OCMPI5')
    >>> cdef('frequency','monthly',project='OCMPI5')
    """
    if project not in cprojects:
        raise Climaf_Classes_Error(
            "project '%s' has not yet been declared" % project)
    if attribute == 'project':
        project = None
    #
    if project and attribute not in cprojects[project].facets:
        raise Climaf_Classes_Error(
            "project '%s' doesn't use facet '%s'" % (project, attribute))
    if value is None:
        rep = cprojects[project].facet_defaults.get(attribute, None)
        if not rep:
            rep = cprojects[None].facet_defaults.get(attribute, "")
        return rep
    else:
        cprojects[project].facet_defaults[attribute] = value


cproject(None)
cdef("domain", "global")

# All Cobject instances are registered in this directory :
cobjects = dict()


class cobject(object):
    def __init__(self):
        # crs is the string expression defining the object
        # in the CLIMAF Reference Syntax
        self.crs = "void"

    def __str__(self):
        # return "Climaf object : "+self.crs
        return self.crs

    def __repr__(self):
        return self.crs

    def register(self):
        pass
        # cobjects[self.crs]=self
        # clogger.debug("Object Created ; crs = %s"%(self.crs))

    def erase(self):
        pass
        # del(cobjects[self.crs])
        # clogger.debug("Object deleted ; crs = %s"%(self.crs))

    def buildcrs(self):
        raise NotImplementedError

    def __eq__(self, other):
        """
        Check the equality of two CliMAF objects.
        :param other: CliMAF object to be compared
        :return: boolean indicating whether the CliMAF objects are the same or not
        """
        return isinstance(other, type(self)) and self.crs == other.crs


class cdummy(cobject):
    def __init__(self):
        """
        cdummy class represents dummy arguments in the CRS
        """
        self.crs = self.buildcrs()

    def buildcrs(self, period=None, crsrewrite=None):
        return 'ARG'


def processDatasetArgs(**kwargs):
    """
    Perfom basic checks on kwargs for functions cdataset and eds
    regarding the project where the dataset is defined
    Also complement with default values as handled by the
    project's definition and by cdef()
    """
    if 'project' in kwargs:
        project = kwargs['project']
    else:
        project = cdef("project")
    if project is None:
        raise Climaf_Classes_Error("Must provide a project (Can use cdef)")
    elif project not in cprojects:
        raise Climaf_Classes_Error(
            "Dataset's project '%s' has not "
            "been described by a call to cproject()" % project)
    attval = dict()
    attval["project"] = project
    sep = cprojects[project].separator
    #
    # Register facets values
    for facet in cprojects[project].facets:
        if facet in kwargs and kwargs[facet]:
            val = kwargs[facet]
        else:
            val = cdef(facet, project=project)
        attval[facet] = val
        if val:
            if isinstance(val, list):
                listval = val
            else:
                listval = [val]
            for lval in listval:
                if isinstance(lval, six.string_types) and lval.find(sep) >= 0:
                    raise Climaf_Classes_Error(
                        "You cannot use character '%s' when setting '%s=%s' because "
                        "it is the declared separator for project '%s'. "
                        "See help(cproject) for changing it, if needed" % (sep, facet, val, project))
            # print "initalizing facet %s with value"%(facet,val)
    if attval['project'] == 'CMIP5':
        # Allow for a synonym for 'simulation' in CMIP5 : 'member'
        if 'member' in kwargs and kwargs['member'] not in [None, '']:
            attval['simulation'] = kwargs['member']
            clogger.info(
                'Attribute "member" in project CMIP5 has been translated to "simulation"')
        # Special processing for CMIP5 fixed fields : handling redundancy in facets
        if (attval['table'] == 'fx' or attval['period'] == 'fx' or
                attval['simulation'] == 'r0i0p0' or attval['frequency'] == 'fx'):
            attval['table'] = 'fx'
            attval['period'] = 'fx'
            attval['simulation'] = 'r0i0p0'
            attval['frequency'] = 'fx'
    # Special processing for CMIP6  : facet 'simulation' is forbidden (must use 'realization')
    if (attval['project'] == 'CMIP6') and 'simulation' in kwargs and len(kwargs['simulation']) > 0:
        raise Climaf_Classes_Error("You cannot use attribute 'simulation' in CMIP6; please use 'realization'. "
                                   "This if for kwargs=%s" % repr(kwargs))

    errmsg = ""
    for facet in cprojects[project].facets:
        if attval[facet] is None:
            e = "Project '%s' needs facet '%s'. You may use cdef() for setting a default value" \
                % (project, facet)
            errmsg += " " + e
    if errmsg != "":
        raise Climaf_Classes_Error(errmsg)
    #
    # print "kw="+`kwargs`
    for facet in attval:
        # print "checking facet %s"%facet
        # Facet specific processing
        if facet == 'period':
            if attval["period"] == "fx":
                attval["period"] = cperiod(attval["period"])
            elif not isinstance(attval['period'], cperiod) and attval['period'] != "*":
                attval['period'] = init_period(attval['period'])
        # Check for typing or user's logic errors
        if facet not in cprojects[project].facets:
            e = "Project %s doesn't have facet %s" % (project, facet)
            errmsg += " " + e
    if errmsg != "":
        raise Climaf_Classes_Error(errmsg)
    if 'period' in attval and not isinstance(attval['period'], cperiod) and attval['period'] not in ["*", ]:
        Climaf_Classes_Error("at end of  process.. : period is not a cperiod")
    return attval


[docs]class cdataset(cobject):
    # def __init__(self,project=None,model=None,simulation=None,period=None,
    #             rip=None,frequency=None,domain=None,variable=None,version='last') :
    def __init__(self, **kwargs):
        """
        Create a CLIMAF dataset.

        A CLIMAF dataset is a description of what the data (rather than
        the data itself or a file).  It is basically a set of pairs
        attribute-value. The list of attributes actually used to
        describe a dataset is defined by the project it refers
        to.

        To display the attributes you may use for a given project, type e.g.:

        >>> cprojects["CMIP5"]

        For further details on projects , see
        :py:class:`~climaf.classes.cproject`

        None of the project's attributes are mandatory arguments, because
        all attributes defaults to the value set by
        :py:func:`~climaf.classes.cdef` (which also applies if
        providing a None value for an attribute)

        Some attributes have a special format or processing :

        - period : see :py:func:`~climaf.period.init_period`. See also
          function :py:func:`climaf.classes.ds` for added
          flexibility in defining periods as last of first set of years
          among available data

        - domain : allowed values are either 'global' or a list for
          latlon corners ordered as in : [ latmin, latmax, lonmin,
          lonmax ]

        - variable :  name of the geophysical variable ; this should be :

           - either a variable actually included in the datafiles,

           - or a 'derived' variable (see  :py:func:`~climaf.operators_derive.derive` ),

           - or, an aliased variable name (see :py:func:`~climaf.classes.alias` )

        - in project CMIP5 , for triplets (frequency, simulation, period, table )  :
          if any is 'fx' (or 'r0i0p0 for simulation), the others are forced to
          'fx' (resp. 'r0i0p0') too.

        Example, using no default value, and adressing some CMIP5 data ::

          >>>  cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',
          >>>           simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last')

        You may use wildcard ('*') in attribute values, and use  :py:meth:`~climaf.classes.cdataset.explore`
        for having CliMAF doing something sensible matching such attributes with available data

        """
        #
        attval = processDatasetArgs(**kwargs)
        #
        # TBD : Next lines for backward compatibility, but should re-engineer
        self.project = attval["project"]
        self.simulation = attval['simulation']
        self.variable = attval['variable']
        # alias is a n-plet : filevar, scale, offset, filenameVar, missing
        self.period = attval['period']
        self.domain = attval['domain']
        #
        self.model = attval.get('model', "*")
        self.frequency = attval.get('frequency', "*")
        # Normalized name is annual_cycle, but allow also for 'seasonal' for the time being
        if self.frequency in ['seasonal', 'annual_cycle']:
            self.period.fx = True
        freqs_dic = frequencies.get(self.project, None)
        # print freqs_dic
        if freqs_dic:
            for k in freqs_dic:
                if freqs_dic[k] == self.frequency and k == 'annual_cycle':
                    self.period.fx = True
        #
        self.kvp = attval
        self.alias = varIsAliased(self.project, self.variable)
        #
        if "," in self.variable and self.alias:
            filevar, scale, offset, units, filenameVar, missing, conditions = self.alias
            if filevar != self.variable or scale != 1. or offset != 0 or missing:
                raise Climaf_Classes_Error(
                    "Cannot alias/scale/setmiss on group variable")
        # Build CliMAF Ref Syntax for the dataset
        self.crs = self.buildcrs()
        #
        self.files = None
        self.local_copies_of_remote_files = None
        self.register()

    def __eq__(self, other):
        res = super(cdataset, self).__eq__(other)
        if res:
            self_kvp = copy.deepcopy(self.kvp)
            self_kvp["model"] = self.model
            self_kvp["frequency"] = self.frequency
            self_kvp["alias"] = self.alias
            other_kvp = copy.deepcopy(other.kvp)
            other_kvp["model"] = other.model
            other_kvp["frequency"] = other.frequency
            other_kvp["alias"] = other.alias
            res = res and all([self_kvp[p] == other_kvp[p] for p in self_kvp])
        return res

    def setperiod(self, period):
        if isinstance(period, six.string_types):
            period = init_period(period)
        self.erase()
        self.period = period
        self.kvp['period'] = period
        self.crs = self.buildcrs()
        self.register()

    def buildcrs(self, period=None, crsrewrite=None):
        crs_template = string.Template(cprojects[self.project].crs)
        dic = self.kvp.copy()
        if period is not None:
            dic['period'] = period
        if isinstance(dic['domain'], list):
            dic['domain'] = repr(dic['domain'])
        rep = "ds('%s')" % crs_template.safe_substitute(dic)
        return rep

    def errata(self):
        if self.project == "CMIP6":
            service = "https://errata.es-doc.org/1/resolve/simple-pid?datasets="
            browser = "firefox"
            try:
                res = self.explore('resolve')
            except:
                raise Climaf_Classes_Error(
                    "Cannot proceed with errata: Cannot resolve ambiguities on %s" % repr(self))
            # CMIP6.CMIP.CNRM-CERFACS.CNRM-ESM2-1.1pctCO2.r1i1p1f2.Emon.expfe.gn.v20181018
            ref = ".".join(["CMIP6", res.kvp["mip"], res.kvp["institute"], res.kvp["model"], res.kvp["experiment"],
                            res.kvp["realization"], res.kvp["table"], res.kvp["variable"], res.kvp["grid"],
                            "v" + res.kvp["version"]])
            clogger.warning("Querying errata service %s using %s" %
                            (service, browser))
            os.system("%s %s%s &" % (browser, service, ref))
            # voir le fichier api_errata_Atef.py pour faire mieux
        else:
            clogger.warning(
                "No errata service is yet defined for project %s" % self.project)

    def isLocal(self):
        # return self.baseFiles().find(":")<0
        model = getattr(self, "model", "*")
        return isLocal(project=self.project, model=model, simulation=self.simulation, frequency=self.frequency,
                       realm=self.kvp.get("realm", "*"), table=self.kvp.get("table", "*"))

    def isCached(self):
        """ TBD : analyze if a remote dataset is locally cached

        """
        # clogger.error("TBD - remote datasets are not yet cached")
        rep = False
        return rep

    def oneVarPerFile(self):
        llocs = getlocs(project=self.project, model=self.model, simulation=self.simulation, frequency=self.frequency,
                        realm=self.kvp.get("realm", "*"), table=self.kvp.get("table", "*"))
        return all([org for org, freq, url in llocs])

    def periodIsFine(self):
        clogger.debug("always returns False, yet - TBD")
        return False

    def domainIsFine(self):
        clogger.debug("a bit too simple yet (domain=='global')- TBD")
        return self.domain == 'global'

    def periodHasOneFile(self):
        return len(self.baseFiles().split(" ")) < 2
        # clogger.debug("always returns False, yet - TBD")
        # return(False)

    def hasOneMember(self):
        clogger.debug("always returns True, yet - TBD")
        return True

    def hasExactVariable(self):
        # Assume that group variable do not need aliasing
        if "," in self.variable:
            return True
        clogger.debug("always returns False, yet - TBD")
        return False

    def missingIsOK(self):
        if self.alias is None:
            return True
        else:
            _, _, _, _, _, missing, _ = self.alias
            return missing is None

    def matches_conditions(self, conditions):
        """
        Return True if, for all keys in dict conditions, the kvp
        value of object for same key is among condition's values (which can be a list)
        Example :
          with conditions={ "model":"CanESM5" , "version": ["20180103", "20190112"] }
          the method will return True for both versions of that model
        """
        if conditions is None:
            return True
        for key in conditions:
            values = conditions[key]
            if not isinstance(values, list):
                values = [values, ]
            if self.kvp[key] not in values:
                return False
        return True

    def check_if_dict_ambiguous(self, input_dict):
        ambiguous_dict = dict()
        non_ambigous_dict = dict()
        for (kw, val) in input_dict.items():
            if isinstance(val, list):
                if len(val) > 1:
                    ambiguous_dict[kw] = val
                else:
                    non_ambigous_dict[kw] = val[0]
            elif kw in ['variable', ]:  # Should take care of aliasing to fileVar
                matching_vars = set()
                paliases = aliases.get(self.project, [])
                for variable in paliases:
                    if val == paliases[variable][0]:
                        matching_vars.add(variable)
                if len(matching_vars) == 0:
                    # No filename variable in aliases matches actual filename
                    non_ambigous_dict[kw] = val
                elif len(matching_vars) == 1:
                    # One variable has a filename variable which matches the retrieved filename
                    non_ambigous_dict[kw] = matching_vars[0]
                else:
                    ambiguous_dict[kw] = (val, matching_vars)
            else:
                non_ambigous_dict[kw] = val
        return non_ambigous_dict, ambiguous_dict

[docs]    def glob(self, what=None, periods=None, split=None, use_frequency=False):
        """Datafile exploration for a dataset which possibly has
        wildcards (* and ?) in attributes/facets.

        Returns info regarding matching datafile or directories:

          - if WHAT = 'files' , returns a string of all data filenames

          - otherwise, returns a list of facet/value dictionnaries for
            matching data (or a pair, see below)

        In last case, data file periods are not returned if arg
        PERIODS is None and data search is optimized for the project.
        In that case, the globbing is done on data directories and not
        on data files, which is much faster.

        If PERIODS is not None, individual data files periods are
        merged among cases with same facets values

        if SPLIT is not None, a pair is returned intead of the dicts list :

           - first element is a dict with facets which values are the
             same among all cases

           - second element is the dicts list as above, but in which
             facets with common values are discarded

        Example :

        >>> tos_data = ds(project='CMIP6', variable='tos', period='*',
               table='Omon', model='CNRM*', realization='r1i1p1f*' )

        >>> common_keys, varied_keys = tos_data.glob(periods=True, split=True)

        >>> common_keys
        {'mip': 'CMIP', 'institute': 'CNRM-CERFACS', 'experiment': 'historical',
        'realization': 'r1i1p1f2', 'table': 'Omon', 'variable': 'tos',
        'version': 'latest', 'period': [1850-2014], 'root': '/bdd'}

        >>> varied_keys
        [{'model': 'CNRM-ESM2-1'  , 'grid': 'gn' },
         {'model': 'CNRM-ESM2-1'  , 'grid': 'gr1'},
         {'model': 'CNRM-CM6-1'   , 'grid': 'gn' },
         {'model': 'CNRM-CM6-1'   , 'grid': 'gr1'},
         {'model': 'CNRM-CM6-1-HR', 'grid': 'gn' } ]

        """
        dic = self.kvp.copy()
        if self.alias:
            filevar, _, _, _, filenameVar, _, conditions = self.alias
            req_var = dic["variable"]
            dic["variable"] = string.Template(filevar).safe_substitute(dic)
            if filenameVar:
                dic["filenameVar"] = filenameVar
        clogger.debug("glob() with dic=%s" % repr(dic))
        cases = list()
        files = selectFiles(with_periods=(periods is not None or what in ['files', ]),
                            return_combinations=cases, use_frequency=use_frequency, **dic)
        if what in ['files', ]:
            return files
        else:
            if periods is not None:
                cases = group_periods(cases)
            else:
                # For non-optimized cases, select_files returns periods,
                # but we want an even behaviour
                for case in cases:
                    case.pop('period', None)
            if split is not None:
                keys = remove_keys_with_same_values(cases)
                return keys, cases
            else:
                return cases

[docs]    def explore(self, option='check_and_store', group_periods_on=None, operation='intersection', first=None):
        """
        Versatile datafile exploration for a dataset which possibly has wildcards (* and ? ) in
        attributes.

        ``option`` can be :

          - 'choices' for returning a dict which keys are wildcard attributes and entries
            are values list
          - 'resolve' for returning a NEW DATASET with instanciated attributes (if uniquely)
          - 'ensemble' for returning AN ENSEMBLE based on multiple possible values of one
            or more attributes (tell which one is first in labels by using arg 'first')
          - 'check_and_store' (or missing) for just identifying and storing dataset files list
            (while ensuring non-ambiguity check for wildcard attributes)

        This feature works only for projects which organization is of type 'generic'

        **See further below, after the first examples, what can done with wildcard on 'period'**

        Toy example ::

          >>> rst=ds(project="example", simulation="*", variable="rst", period="1980-1981")
          >>> rst
          ds('example|*|rst|1980-1981|global|monthly')

          >>> rst.explore('choices')
          {'simulation': ['AMIPV6ALB2G']}

          >>> instanciated_dataset=rst.explore('resolve')
          >>> instanciated_dataset
          ds('example|AMIPV6ALB2G|rst|1980-1981|global|monthly')

          >>> my_ensemble=rst.explore('ensemble')
          error    : "Creating an ensemble does not make sense because all wildcard attributes have a single possible
                      value ({'simulation': ['AMIPV6ALB2G']})"

        Real life example for options ``choices`` and ``ensemble`` ::

          >>> rst=ds(project="CMIP6", model='*', experiment="*ontrol*", realization="r1i1p1f*", table="Amon",
          ...        variable="rsut", period="1980-1981")
          >>> clog('info')
          >>> rst.explore('choices')
          info     : Attribute institute has matching value CNRM-CERFACS
          info     : Attribute experiment has multiple values : set(['piClim-control', 'piControl'])
          info     : Attribute grid has matching value gr
          info     : Attribute realization has matching value r1i1p1f2
          info     : Attribute mip has multiple values : set(['CMIP', 'RFMIP'])
          info     : Attribute model has multiple values : set(['CNRM-ESM2-1', 'CNRM-CM6-1'])
          {'institute': ['CNRM-CERFACS'], 'experiment': ['piClim-control', 'piControl'], 'grid': ['gr'],
          'realization': ['r1i1p1f2'], 'mip': ['CMIP', 'RFMIP'], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1']}

          >>> # Let us further select by setting experiment=piControl
          >>> mrst=ds(project="CMIP6", model='*', experiment="piControl", realization="r1i1p1f*", table="Amon",
          ...         variable="rsut", period="1980-1981")
          >>> mrst.explore('choices')
          {'institute': ['CNRM-CERFACS'], 'mip': ['CMIP'], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], 'grid': ['gr'],
           'realization': ['r1i1p1f2']}
          >>> small_ensemble=mrst.explore('ensemble')
          >>> small_ensemble
          cens({
                'CNRM-ESM2-1':ds('CMIP6%%rsut%1980-1981%global%/cnrm/cmip%CNRM-ESM2-1%CNRM-CERFACS%CMIP%Amon%piControl%'
                                 'r1i1p1f2%gr%latest'),
                'CNRM-CM6-1' :ds('CMIP6%%rsut%1980-1981%global%/cnrm/cmip%CNRM-CM6-1%CNRM-CERFACS%CMIP%Amon%piControl%'
                                 'r1i1p1f2%gr%latest')
               })

        When option='choices' and period= '*', the period of all matching files will be either :

          - aggregated among all instances of all attributes with wildcards (default)
          - or, if argument ``group_periods_on`` provides an attribute name, aggregated after
            being sorted on that attribute and merged

        The aggregation is governed by argument ``operation``, which can be either :

          - 'intersection' : which is the most useful case, and hence is the default
          - 'union' : which has not much sense except to know which periods are definitely
            not covered by any data
          - None : no aggregation occurs, and you get a dict of the merged periods, which
            keys are the value of the grouping attribute

        Attribute 'period' cannot use a * without being  == * ;


        Examples without grouping periods over any attribute ::

          >>> # Let us use a kind of dataset which data files are temporally splitted,
          >>> # and allow for various models, and use a wildcard for period
          >>> so=ds(project="CMIP6", model='CNRM*', experiment="piControl", realization="r1i1p1f2",
          ... table="Omon", variable="so", period="*")

          >>> # What is the overall period covered by the union of all datafiles
          >>> # (but not necessarily by a single model!)
          >>> so.explore('choices', operation='union')
          { 'period': [1850-2349], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'] .....}

          >>> # What is the intersection of periods covered by each datafile
          >>> so.explore('choices')
          { 'period': [None], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'] .....}

          >>> # What is the list of periods covered by datafiles
          >>> so.explore('choices', operation=None)
          { 'period': {None: [1850-1899, 1900-1949, 1950-1999, 2000-2049, 2050-2099,
                              2100-2149, 2150-2199, 2200-2249, 2250-2299, 2300-2349]},
             'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'] .....}

        Examples using periods grouping over an attribute ::

          >>> # What is the intersection of available periods after grouping them on the various values of 'model'
          >>> so.explore('choices',group_periods_on='model')
          { 'period': [1850-2349], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], ....}

          >>> # Same, but explicit the default value
          >>> so.explore('choices',group_periods_on='model',operation='intersection')
          { 'period': [1850-2349], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], ....}

          >>> # What are the aggregated periods for each value of 'model'
          >>> so.explore('choices',group_periods_on='model',operation=None)
          { 'period':
              {'CNRM-ESM2-1': [1850-2349],
               'CNRM-CM6-1' : [1850-2349] },
            'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], ...}

        """
        use_frequency = cprojects[self.project].use_frequency
        if use_frequency:
            if "frequency" in self.kvp:
                use_frequency = self.kvp["frequency"]
            else:
                use_frequency = cdef("frequency", project=self.project)
                if not use_frequency:
                    use_frequency = False
        dic = self.kvp.copy()
        if self.alias:
            filevar, _, _, _, filenameVar, _, conditions = self.alias
            req_var = dic["variable"]
            dic["variable"] = string.Template(filevar).safe_substitute(dic)
            if filenameVar:
                dic["filenameVar"] = filenameVar
        clogger.debug("Looking with dic=%s" % repr(dic))
        # if option != 'check_and_store' :
        wildcards = dict()
        files = selectFiles(return_wildcards=wildcards, merge_periods_on=group_periods_on, use_frequency=use_frequency,
                            **dic)
        # -- Use the requested variable instead of the aliased
        if self.alias:
            dic["variable"] = req_var
        # if option != 'check_and_store' :
        periods = wildcards.get('period', None)
        # else : periods=None
        if periods:
            # print "periods=",periods
            if option not in ['choices', ]:
                if group_periods_on:
                    raise Climaf_Classes_Error(
                        "Can use 'group_periods_on' only with option='choices'")
                if operation != 'intersection':
                    raise Climaf_Classes_Error(
                        "Can use operation %s only with option='choices'" % operation)
            if operation in ['intersection', ]:
                if group_periods_on:
                    # print "periods=",periods
                    merged_periods = [merge_periods(
                        p) for p in list(periods.values())]
                    inter = merged_periods.pop(0)
                    for p in merged_periods:
                        inter = intersect_periods_list(inter, p)
                else:
                    inter = merge_periods(periods[None])
                wildcards['period'] = inter
            elif operation in ['union', ]:
                to_merge = []
                for plist in list(periods.values()):
                    to_merge.extend(plist)
                wildcards['period'] = merge_periods(to_merge)
            elif operation is None:
                # Merge periods for each facet value separately
                if group_periods_on:
                    for key in periods:
                        periods[key] = merge_periods(periods[key])
                wildcards['period'] = periods
            else:
                raise Climaf_Classes_Error(
                    "Operation %s is not known " % operation)
        #
        wildcard_attributes_list = [k for k in dic if isinstance(
            dic[k], six.string_types) and "*" in dic[k]]
        if option in ['resolve', ]:
            clogger.debug("Trying to resolve on attributes %s" %
                          wildcard_attributes_list)
            non_ambiguous_dict, ambiguous_dict = self.check_if_dict_ambiguous(
                wildcards)
            if len(ambiguous_dict) != 0:
                error_msg = list()
                for kw in sorted(list(ambiguous_dict)):
                    if kw in ["variable", ]:
                        error_msg.append("Filename variable %s is matched by multiple variables %s" %
                                         (ambiguous_dict[kw][0], repr(ambiguous_dict[kw][1])))
                    elif kw in ["period", ]:
                        error_msg.append(
                            "Periods with holes are not handled: %s" % str(ambiguous_dict[kw]))
                    else:
                        error_msg.append("Wildcard attribute %s is ambiguous %s for dataset %s" %
                                         (kw, str(ambiguous_dict[kw]), self))
                raise Climaf_Classes_Error(" ".join(error_msg))
            else:
                dic.update(**non_ambiguous_dict)
                return ds(**dic)
        elif option in ['choices', ]:
            clogger.debug(
                "Listing possible values for these wildcard attributes %s" % wildcard_attributes_list)
            self.files = files
            return wildcards
        elif option in ['ensemble', ]:
            clogger.debug("Trying to create an ensemble on attributes %s" %
                          wildcard_attributes_list)
            is_ensemble = False
            for kw in wildcards:
                entry = wildcards[kw]
                # print "entry=",entry, 'type=',type(entry), 'ensemble_kw=',ensemble_kw
                if kw in ['period', ] and isinstance(entry, list):
                    if len(wildcards['period']) > 1:
                        raise Climaf_Classes_Error("Cannot create an ensemble with holes in period (%s)" %
                                                   wildcards['period'])
                    entry = entry[0]
                if isinstance(entry, list):
                    is_ensemble = (len(entry) > 1)
                dic[kw] = entry
            if is_ensemble is False:
                # raise Climaf_Classes_Error("Creating an ensemble does not make sense because all wildcard "+\
                #                           "attributes have a single possible value (%s)"%wildcards)
                clogger.warning("Creating an ensemble with a single member")
            self.files = files
            return eds(first=first, **dic)
        elif option in ['check_and_store', ]:
            for kw in wildcards:
                entry = wildcards[kw]
                if isinstance(entry, list) and len(entry) > 1:
                    raise Climaf_Classes_Error("This dataset is ambiguous on attribute %s='%s'; please choose among :"
                                               " %s or use either 'ensure_dataset=False' (with method baseFiles or "
                                               "listfiles) or 'option=\'choices\' (with method explore). "
                                               "Context is %s" % (kw, dic[kw], entry, self.kvp))
            self.files = files
        else:
            raise Climaf_Classes_Error("Unknown option %s" % option)

    def baseFiles(self, force=False, ensure_dataset=True):
        """ Returns the list of (local or remote) files which include the data
        for the dataset

        Use cached value (i.e. attribute 'files') unless called with arg force=True

        If ensure_dataset is True, forbid ambiguous datasets
        """
        if (force and self.project != 'file') or self.files is None:
            if ensure_dataset:
                self.explore()
            else:
                cases = self.explore(option='choices')
                list_keys = [k for k in cases if type(
                    cases[k]) is list and k != 'period']
                if len(list_keys) > 0:
                    clogger.error(
                        "The dataset is ambiguous on %s; its CRS is %s" % (cases, self))
                    return None
        return self.files

[docs]    def listfiles(self, force=False, ensure_dataset=True):
        """ Returns the list of (local or remote) files which include the data
        for the dataset

        Use cached value unless called with arg force=True

        If ensure_dataset is True, forbid ambiguous datasets
        """
        return self.baseFiles(force=force, ensure_dataset=ensure_dataset)

    def hasRawVariable(self):
        """ Test local data files to tell if a dataset variable is actually included
        in files (rather than being a derived, virtual variable)

        For the time being, returns False, which leads to always consider that variables
        declared as 'derived' actually are derived """
        clogger.debug("TBD: actually test variables in files, rather than assuming that variable %s is virtual for "
                      "dataset %s" % (self.variable, self.crs))
        return False

[docs]    def check(self, frequency=True, gap=True, period=True):
        """
        Check time consistency of first variable of a dataset or ensemble members:
        - if frequency is True : check if data frequency is consistent with dataset frequency
        - if gap is True : check if file data have a gap
        - if period is True : check if period covered by data actually includes the
        whole of dataset period

        Returns: True if every check is OK, False if one fails, None if analysis is not yet possible
        """
        if gap:
            frequency = True
        #
        files = self.baseFiles()
        if not files:
            return False
        files = files.split()
        clogger.debug("List of selected files: %s" % files)
        #
        rep = True
        dsets = [xr.open_dataset(f, use_cftime=True) for f in files]
        all_dsets = xr.combine_by_coords(dsets, combine_attrs='override')
        #
        if self.frequency == 'fx' or self.frequency == 'annual_cycle':
            clogger.info("No check for fixed data for %s", self)
            return True
        if self.frequency == "monthly" and frequency:
            clogger.error("Check cannot yet process monthly data due to" +
                          "to a shortcoming in analyzing monthly data frequency")
            return None
        if not getattr(dsets[0], "frequency", False) and frequency:
            clogger.warning("No frequency in file(s) for %s", self)
            return False
        if "time" not in all_dsets:
            clogger.warning("Cannot yet check a dataset which time dimension" +
                            "is not named 'time' (%s)" % self)
            return False
        #
        times = all_dsets.time
        clogger.debug('Time data of selected files: %s' % times)
        #
        if frequency:
            # Check if data time interval is consistent with dataset frequency
            data_freq = xr.infer_freq(times)
            if data_freq is None:
                clogger.error(
                    "Time interval detected by xr.infer_freq is None %s" % str(times))
                return False
            table = {"monthly": "MS", "daily": "D", "day": "D", "6h": "6H", "3h": "3H",
                     "1h": "1H", "6Hourly": "6H", "3Hourly": "3H"}
            if self.frequency not in table:
                clogger.error("Check cannot yet handle frequency %s" %
                              self.frequency)
                return None
            if data_freq != table[self.frequency]:
                message = 'Data time interval %s is not consistent with dataset frequency %s'
                clogger.warning(message % (data_freq, self.frequency))
                rep = False

        if gap:
            # Check if file data have a gap
            time_values = times.values.flatten()
            delta = freq_to_minutes(data_freq)
            cpt = 0
            for ptim, tim in zip(time_values[:-1], time_values[1:]):
                if ptim + timedelta(minutes=delta) != tim:
                    rep = False
                    cpt += 1
                    if cpt > 3:
                        break
                    clogger.error("File data time issue between %s and %s, interval inconsistent with %s" %
                                  (ptim, tim, delta))

        if period:
            # Compare period covered by data files with dataset's period
            cell_methods = getattr(dsets[0][varOf(self)], "cell_methods", None)
            file_period = timeLimits(times, use_frequency=True, cell_methods=cell_methods,
                                     strict_on_time_dim_name=False)
            clogger.debug('Period covered by selected files: %s' % file_period)
            consist = ""
            if not file_period.includes(self.period):
                consist = "not "
                rep = False
            clogger.info("Datafile time period (%s) includes dataset time period (%s)" %
                         (file_period, self.period) + "=> time periods are %sconsistent." % consist)
        return rep


[docs]class cens(cobject, dict):
    def __init__(self, dic={}, order=None, sortfunc=None):
        """Function cens creates a CliMAF object of class ``cens`` ,
        i.e. a dict of objects, which keys are member labels, and
        which members are ordered, using method ``set_order``

        In some cases, ensembles of datasets from the same project
        can also be built easily using :py:func:`~climaf.classes.eds()`

        When applying an operator to an ensemble, CliMAF will know,
        from operator's declaration (see
        :py:func:`~climaf.operators.cscript()`), whether the operator
        'wishes' to get the ensemble or, on the reverse, is not
        'ensemble-capable' :

         - if the operator is ensemble-capable it will deliver it :

           - if it is a script : with a string composed  by
             concatenating the corresponding input files; it will
             also provide the labels list to the script if its
             declaration calls for it with keyword ${labels}
             (see :py:func:`~climaf.operators.cscript()`)
           - if it is a Python function : with the dict of
             corresponding objects

         - if the operator is 'ensemble-dumb', CliMAF will loop
           applying it on each member, and will form a new ensemble
           with the results.

        The dict keys must be label strings, which describe what is
        basically different among members. They are usually used by
        plot scripts to provide a caption allowing to identify each
        dataset/object e.g using various colors.

        Examples (see also :download:`../examples/ensemble.py`) :

        >>> cdef('project','example'); cdef('simulation',"AMIPV6ALB2G")
        >>> cdef('variable','tas');cdef('frequency','monthly')
        >>> #
        >>> ds1980=ds(period="1980")
        >>> ds1981=ds(period="1981")
        >>> #
        >>> myens=cens({'1980':ds1980 , '1981':ds1981 })
        >>> ncview(myens)  # will launch ncview once per member
        >>>
        >>> myens=cens({'1980':ds1980 , '1981':ds1981 }, order=['1981','1980'])
        >>> myens.set_order(['1981','1980'])
        >>>
        >>> # Add a member
        >>> myens['abcd']=ds(period="1982")

        Limitations : Even if an ensemble is a dict, some dict methods
        are not properly implemented (popitem, fromkeys) and function
        iteritems does not use member order

        You can write an ensemble to a file using function
        :py:func:`~climaf.cache.efile`

        """
        if not all(map(lambda x: isinstance(x, six.string_types), list(dic))):
            raise Climaf_Classes_Error("Ensemble keys/labels must be strings")
        if not all(map(lambda x: isinstance(x, cobject), list(dic.values()))):
            raise Climaf_Classes_Error(
                "Ensemble members must be CliMAF objects")
        self.sortfunc = sortfunc
        #
        dict.update(self, dic)
        #
        keylist = list(self)
        try:
            from natsort.natsort import natsorted
            keylist = natsorted(keylist)
        except:
            keylist.sort()
        if order:
            self.set_order(order, None)
        elif sortfunc:
            self.order = sortfunc(keylist)
        else:
            self.order = keylist
        #
        self.crs = self.buildcrs()
        self.register()

    def __eq__(self, other):
        res = super(cens, self).__eq__(other)
        if res:
            res = res and self.order == other.order and all(
                [self.__dict__[m] == other.__dict[m] for m in self.order])
        return res

    def set_order(self, order, ordered_keylist=None):
        ordered_list = [o for o in order]
        ordered_list.sort()
        if ordered_keylist is None:
            ordered_keylist = list(self)
            ordered_keylist.sort()
        if sorted(ordered_list) != sorted(ordered_keylist):
            raise Climaf_Classes_Error(
                "Order list does not match dict keys list : %s   and %s" %
                (repr(ordered_list), repr(ordered_keylist)))
        self.order = order

    def __setitem__(self, k, v):
        if not isinstance(k, six.string_types):
            raise Climaf_Classes_Error("Ensemble keys/labels must be strings")
        if not isinstance(v, cobject):
            raise Climaf_Classes_Error(
                "Ensemble members must be CliMAF objects")
        dict.__setitem__(self, k, v)
        if k not in self.order:
            self.order.append(k)
            if self.sortfunc:
                self.order = self.sortfunc(list(self))
        self.crs = self.buildcrs()
        self.register()

    def items(self):
        return [(elt, self[elt]) for elt in self.order]

    def copy(self):
        e = cens(self,
                 order=[m for m in self.order],
                 sortfunc=self.sortfunc)
        return e

    def pop(self, key, default=None):
        if key in self:
            self.order.remove(key)
            return dict.pop(self, key, default)
        else:
            return default

    def clear(self):
        dict.clear(self)
        self.order = []

    def update(self, it):
        dict.update(self, it)
        if isinstance(it, dict):
            for el, val in list(it.items()):
                self.order.append(el)
        else:
            for el, val in it:
                self.order.append(el)
        if self.sortfunc:
            self.order = self.sortfunc(list(self))

    def buildcrs(self, crsrewrite=None, period=None):
        if crsrewrite is None and period is None:
            # A useful optimization, for multi-model studies
            rep = "cens({%s})" % ",".join(
                ["'%s':%s" % (m, self[m].crs) for m in self.order])
        else:
            rep = "cens({%s})" % ",".join(["'%s':%s" % (m, self[m].buildcrs(crsrewrite=crsrewrite, period=period))
                                           for m in self.order])
        return rep

    def check(self):
        """
        Check time consistency of first variable for each member of the ensemble :
        - check if first data time interval is consistent with dataset frequency
        - check if file data have a gap
        - check if period covered by data files actually includes the whole of dataset period

        Returns: True if period of data files included dataset period, False otherwise.

        Example:

        >>> # Ensemble with monthly frequency
        >>> j0=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1980')
        >>> j1=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1981')
        >>> ens=cens({'1980':j0, '1981':j1})
        >>> res=ens.check()

        """

        # Call 'check' method of 'cdataset' for each member of the ensemble
        rep = True
        for memb in self:
            # clogger.info('Member: %s'%memb)
            rep = self[memb].check() and rep
        return rep


[docs]def eds(first=None, **kwargs):
    """
    Create a dataset ensemble using the same calling sequence as
    :py:func:`~climaf.classes.cdataset`, except that some facets
    are lists, which defines the ensemble members; these facets must be among
    the facets authorized for ensemble in the (single) project involved

    Example::

    >>> cdef("frequency","monthly") ;  cdef("project","CMIP5"); cdef("model","CNRM-CM5")
    >>> cdef("variable","tas"); cdef("period","1860")
    >>> ens=eds(experiment="historical", simulation=["r1i1p1","r2i1p1"])

    Argument 'first' is used when multiple attributes are of list type, and tells which
    of these attributes appears first in member labels

    """
    attval = processDatasetArgs(**kwargs)
    # Check that any facet/attribute of type 'list' (for defining an
    # ensemble) is OK for the project, and that there is at most one
    nlist = 0
    listattr = []
    for attr in attval:
        clogger.debug("Looking at attr %s for ensemble" % attr)
        if isinstance(attval[attr], list) and attr != "domain":
            if attr not in cprojects[attval["project"]].attributes_for_ensemble:
                raise Climaf_Classes_Error(
                    "Attribute %s cannot be used for ensemble" % attr)
            clogger.debug("Attr %s is used for an ensemble" % attr)
            nlist += 1
            listattr.append(attr)
    if len(listattr) < 1:
        raise Climaf_Classes_Error(
            "For building an ensemble, must have at least one attribute which is a list")
    # Create an ensemble of datasets if applicable
    d = dict()
    if len(listattr) == 1:
        # Simple case : only one attribute has multiple values (-> members)
        attr = listattr[0]
        for member in attval[attr]:
            attval2 = attval.copy()
            attval2[attr] = member
            d[member] = cdataset(**attval2)
        return cens(d, order=attval[attr])
    else:
        # Must construct the cartesian product of all list-type attributes
        listattr2 = [att for att in listattr]
        if first is not None:
            listattr2.remove(first)
            att = first
        else:
            # Use the first attributes declared as ensemble-prone for the project
            for a in cprojects[attval["project"]].attributes_for_ensemble:
                print("Checkin listattribute", a, "against", listattr2)
                if a in listattr2:
                    listattr2.remove(a)
                    att = a
                    break
        comb = [[(att, val)] for val in attval[att]]
        while len(listattr2) > 0:
            att = listattr2.pop(0)
            newcomb = []
            for c in comb:
                for v in attval[att]:
                    lst = [e for e in c]
                    lst.append((att, v))
                    newcomb.append(lst)
            comb = newcomb
        orderl = list()
        for c in comb:
            attval2 = attval.copy()
            label = ""
            for att, val in c:
                attval2[att] = val
                label += val + "_"
            label = label[:-1]
            orderl.append(label)
            d[label] = cdataset(**attval2)
        return cens(d, order=orderl)


[docs]def fds(filename, simulation=None, variable=None, period=None, model=None):
    """
    fds stands for FileDataSet; it allows to create a dataset simply
    by providing a filename and optionally a simulation name , a
    variable name, a period and a model name.

    For dataset attributes which are not provided, these defaults apply :

    - simulation : the filename basename (without suffix '.nc')
    - variable : the set of variables in the data file
    - period : the period actually covered by the data file (if it has time_bnds)
    - model : the 'model_id' attribute if it exists, otherwise : 'no_model'
    - project  : 'file' (with separator = '|')
    - frequency : the value of global attribute fequency in datafile, if it exists

    The following restriction apply to such datasets :

    - functions :py:func:`~climaf.classes.calias` and
      :py:func:`~climaf.operators_derive.derive` cannot be used for project
      'file'

    Results are unforeseen if all variables do not have the same time axis

    Examples : See :download:`data_file.py <../examples/data_file.py>`

    """
    filename = os.path.expanduser(filename)
    if not os.path.exists(filename):
        raise Climaf_Classes_Error("File %s does no exist" % filename)
    #
    if model is None:
        model = model_id(filename)
    if simulation is None:
        simulation = os.path.basename(filename)[0:-3]
    #
    if variable is None:
        lvars = varsOfFile(filename)
        if len(lvars) == 0:
            raise Climaf_Classes_Error("No variable in file %s" % filename)
        variable = lvars.pop()
        for v in lvars:
            variable += "," + v
    else:
        lvars = variable.split(',')
        for v in lvars:
            if not fileHasVar(filename, v):
                raise Climaf_Classes_Error(
                    "No variable %s in file %s" % (v, filename))
    #
    try:
        fperiod = timeLimits(filename)
    except:
        fperiod = None
    if period is None:
        if fperiod is None:
            period = "fx"
            # raise Climaf_Classes_Error("Must provide a period for file %s " % filename)
        else:
            period = repr(fperiod)
    elif period != 'fx':
        if fperiod and not fperiod.includes(init_period(period)):
            raise Climaf_Classes_Error(
                "Max period from file %s is %s" % (filename, repr(fperiod)))
    #
    d = ds(project='file', model=model, simulation=simulation,
           variable=variable, period=period, path=filename)
    d.files = filename

    d.frequency = attrOfFile(filename, "frequency", "*")
    if period == 'fx':
        d.frequency = 'fx'

    return d


class ctree(cobject):
    def __init__(self, climaf_operator, script, *operands, **parameters):
        """ Builds the tree of a composed object, including a dict for outputs.

        """
        if len(operands) == 0:
            raise Climaf_Classes_Error(
                "Cannot apply an operator to no operand")
        self.operator = climaf_operator
        self.script = script
        import copy
        if script is None:
            self.flags = False
        else:
            self.flags = copy.copy(script.flags)
        self.operands = operands
        if "period" in parameters:
            p = parameters["period"]
            if isinstance(p, cperiod):
                parameters["period"] = repr(p)
        if "variable" in parameters:
            self.variable = parameters["variable"]
        else:
            self.variable = None
        self.parameters = parameters
        for o in operands:
            if o and not isinstance(o, cobject):
                raise Climaf_Classes_Error(
                    "operand " + repr(o) + " is not a CliMAF object")
        self.crs = self.buildcrs()
        self.outputs = dict()
        self.register()

    def buildcrs(self, crsrewrite=None, period=None):
        """ Builds the CRS expression representing applying OPERATOR on
        OPERANDS with PARAMETERS.
        Forces period downtree if provided
        A function for rewriting operand's CRS may be provided

        Special case : if operator is 'select' and sole operand is a dataset and there
        is no parameters, then return dataset's crs. This is the way to avoid
        repetitive data selection, when a data selection has been explictly cached
        """
        first_op = self.operands[0]
        if self.operator in ['select', ] and len(self.operands) == 1 and isinstance(first_op, cdataset) and \
                len(list(self.parameters)) == 0 and first_op.alias is None:
            if crsrewrite is None and period is None:
                return first_op.crs
            else:
                return first_op.buildcrs(crsrewrite=crsrewrite, period=period)
        #
        # General case
        # Operators are listed in alphabetical order; parameters too
        rep = list()
        #
        for op in [o for o in self.operands if o]:
            if crsrewrite is None and period is None and "crs" in dir(op):
                opcrs = op.crs
            else:
                opcrs = op.buildcrs(crsrewrite=crsrewrite, period=period)
            if crsrewrite:
                opcrs = crsrewrite(opcrs)
            rep.append(opcrs)
        #
        for par in [p for p in sorted(list(self.parameters)) if p not in ["member_label", ]]:
            value = self.parameters[par]
            if isinstance(value, six.string_types):
                value = str(value)
            rep.append("{}={}".format(par, repr(value)))
        rep = "%s(%s)" % (self.operator, ",".join(rep))
        # clogger.debug("Create crs for ctree: %s" % rep)
        return rep

    def setperiod(self, period):
        """ modifies the period for all datasets of a tree"""
        self.erase()
        if isinstance(period, six.string_types):
            period = init_period(period)
        for op in self.operands:
            op.setperiod(period)
        self.crs = self.buildcrs(period=period)
        self.register()


class scriptChild(cobject):
    def __init__(self, cobject, varname):
        """
        Builds one of the child of a script call, which represents one output

        """
        self.father = cobject
        self.varname = varname
        self.variable = varname
        self.crs = self.buildcrs()
        self.file = None
        self.register()

    def setperiod(self, period):
        self.erase()
        self.crs = self.buildcrs(period=period)
        self.register()

    def buildcrs(self, period=None, crsrewrite=None):
        if period is None:
            tmp = self.father.crs
        else:
            tmp = self.father.buildcrs(period=period)
        if crsrewrite:
            tmp = crsrewrite(tmp)
        return ".".join([tmp, self.varname])


def compare_trees(tree1, tree2, func, filter_on_operator=None):
    """
    Recursively compares TREE1 and TREE2.

    For the nodes : compares operator and parameters; ensures
    that FILTER_ON_OPERATOR(operator) is not true

    For the leaves (datasets) : ensure that string representations of
    applying function FUNC to the pair of datasets returns the same
    value for all datasets pairs in the (parallel) trees

    Returns that common value : func(leave1,leave2)) or None

    FUNC cannot return None as a valid value
    """
    if isinstance(tree1, cdataset) and isinstance(tree2, cdataset):
        rep = func(tree1, tree2)
        clogger.debug("Comparison of two datasets...")
        clogger.debug("... %s" % str(rep))
        return rep
    elif isinstance(tree1, ctree) and isinstance(tree2, ctree):
        clogger.debug("Comparison of two trees...")
        if tree1.operator == tree2.operator:
            if filter_on_operator:
                if filter_on_operator(tree1.operator):
                    clogger.debug("Operator filtered: %s" % tree1.operator)
                    return None
            if tree1.parameters == tree2.parameters:
                clogger.debug("Parameters are coherent: %s" % tree1.parameters)
                rep = (reduce(lambda a, b: a if repr(a) == repr(b) else None,
                              [compare_trees(op1, op2, func, filter_on_operator)
                               for op1, op2 in zip(tree1.operands, tree2.operands)]))
                clogger.debug("... %s" % str(rep))
                return rep
            else:
                clogger.debug("Parameters are not coherent: %s/%s" %
                              (tree1.parameters, tree2.parameters))
                return None
    elif isinstance(tree1, scriptChild) and isinstance(tree2, scriptChild):
        clogger.debug("Comparison of two scriptChild...")
        if tree1.varname == tree2.varname:
            clogger.debug("... varnames are coherent: %s" % tree1.varname)
            rep = compare_trees(tree1.father, tree2.father,
                                func, filter_on_operator)
            clogger.debug("... %s" % str(rep))
            return rep
        else:
            clogger.debug("... varnames are not coherent: %s/%s" %
                          (tree1.varname, tree2.varname))
            return None


allow_errors_on_ds_call = True  # False


def allow_error_on_ds(allow=True):
    global allow_errors_on_ds_call
    allow_errors_on_ds_call = allow
    # print ('allow_errors_on_ds_call='+`allow_errors_on_ds_call`)


def select_projects(**kwargs):
    """
    If kwargs['project'] is a list (has multiple values), select_projects loops on the projects
    until it finds a file containing the aliased variable name.
    """
    if 'project' not in kwargs:
        return kwargs
    else:
        p_list = kwargs['project']
    if not isinstance(p_list, list):
        # p_list = [p_list]
        return kwargs
    for project in p_list:
        wkwargs = kwargs.copy()
        wkwargs.update(dict(project=project))
        dat = cdataset(**wkwargs)
        files = dat.baseFiles()
        if files:
            clogger.info('-- File found for project ' +
                         project + ' and ' + repr(wkwargs))
            try:
                tmpVarInFile = varIsAliased(project, wkwargs['variable'])[0]
            except:
                tmpVarInFile = wkwargs['variable']
            if fileHasVar(files.split(" ")[0], tmpVarInFile):
                clogger.info('-- Variable ' + tmpVarInFile + ' (aliased to variable ' +
                             wkwargs['variable'] + ') found in ' + files.split(" ")[0])
                return wkwargs
            else:
                clogger.info('-- Variable ' + tmpVarInFile +
                             ' (aliased to variable ' + wkwargs['variable'] + ') was not found in ' + files.split(" ")[
                                 0])
                # clogger.info('--> Try with another project than '+project+' or another variable name')
        else:
            clogger.info('-- No file found for project ' +
                         project + ' and ' + repr(wkwargs))
    return kwargs


[docs]def ds(*args, **kwargs):
    """Returns a dataset from its full Climate Reference Syntax
    string. Example ::

     >>> ds('CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last')

    Also a shortcut for :py:meth:`~climaf.classes.cdataset`,
    when used with with only keywords arguments. Example ::

     >>> cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',\
              simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last')

    In that latter case, you may use e.g. period='last_50y' to get the
    last 50 years (or less) of data; but this will work only if no
    dataset's attribute is ambiguous. 'first_50y' also works,
    similarly; and also period='*'.

    You must refer to doc at : :py:meth:`~climaf.classes.cdataset`

    """
    if len(args) > 1:
        raise Climaf_Classes_Error(
            "Must provide either only a string or only keyword arguments")
    # clogger.debug("Entering , with args=%s, kwargs=%s"%(`args`,`kwargs`))
    if len(args) == 0:
        if 'period' in kwargs and isinstance(kwargs['period'], six.string_types):
            if kwargs['period'] == '*' and auto_resolve:
                clogger.info('Trying to solve for period for %s' % kwargs)
                if resolve_star_period(kwargs):
                    # Case where there is a '*' only for period. kwargs has been modified
                    clogger.info('Solved period = %s' % kwargs['period'])
                    return cdataset(**select_projects(**kwargs))
            else:
                match = re.match(
                    "(?P<option>last|LAST|first|FIRST)_(?P<duration>[0-9]*)([yY])$", kwargs['period'])
                if match is not None:
                    return resolve_first_or_last_years(copy.deepcopy(kwargs), match.group('duration'),
                                                       option=match.group('option').lower())
        return cdataset(**select_projects(**kwargs))

    crs = args[0]
    results = []
    for cproj in cprojects:
        try:
            dataset = cprojects[cproj].crs2ds(crs)
        except Climaf_Classes_Error:
            dataset = None
        if dataset:
            results.append(dataset)
    if len(results) > 1:
        e = "CRS expression %s is ambiguous among projects %s" % (
            crs, repr(list(cprojects)))
        if allow_errors_on_ds_call:
            clogger.info(e)
        else:
            raise Climaf_Classes_Error(e)
    elif len(results) == 0:
        e = "CRS expression %s is not valid for any project in %s" % (
            crs, repr(list(cprojects)))
        if allow_errors_on_ds_call:
            clogger.debug(e)
        else:
            raise Climaf_Classes_Error(e)
    else:
        rep = results[0]
        if rep.project == 'file':
            rep.files = rep.kvp["path"]
        return rep


[docs]def cfreqs(project, dic):
    """
    Allow to declare a dictionary specific to ``project`` for matching
    ``normalized`` frequency values to project-specific frequency values

    Normalized frequency values are :
      decadal, yearly, monthly, daily, 6h, 3h, fx and annual_cycle

    When defining a dataset, any reference to a non-standard
    frequency will be left unchanged both in the datset's CRS and
    when trying to access corresponding datafiles

    Examples::

    >>> cfreqs('CMIP5',{'monthly':'mon' , 'daily':'day' })
    """
    #
    frequencies[project] = dic


[docs]def crealms(project, dic):
    """
    Allow to declare a dictionary specific to ``project`` for matching
    ``normalized`` realm names to project-specific realm names

    Normalized realm names are :
      atmos, ocean, land, seaice

    When defining a dataset, any reference to a non-standard
    realm will be left unchanged both in the datset's CRS and
    when trying to access corresponding datafiles

    Examples::

    >>> crealms('CMIP5',{'atmos':'ATM' , 'ocean':'OCE' })
    """
    #
    realms[project] = dic


[docs]def calias(project, variable, fileVariable=None, scale=1., offset=0.,
           units=None, missing=None, filenameVar=None, conditions=None):
    """ Declare that in ``project``, ``variable`` is to be computed by
    reading ``filevariable``, and applying ``scale`` and ``offset``;
    (see first example erai below)

    Arg ``conditions`` allows to restrict the effect, based on the value
    of some facets. It is a dictionary of applicable values or
    values'list, which keys are the facets  (see example CMIP6 below)

    Arg ``filenameVar`` allows to tell which fake variable name should be
    used when computing the filename for this variable in this project
    (for optimisation purpose); (see seconf example erai below)

    Can tell that a given constant must be interpreted as a missing value
    (see 4th example, EM, below)

    ``variable`` may be a list. In that case, ``fileVariable`` and
    ``filenameVar``, if provided, should be parallel lists

    `` variable`` can be a comma separated list of variables, in which
    case this tells how variables are grouped in files (it make sense
    to use filenameVar in that case, as this is a way to provide the
    label which is unique to this grouping of variable; scale, offset
    and missing args must be the same for all variables in that case

    Example ::

    >>> calias('erai','tas_degC','t2m',scale=1., offset=-273.15)  # scale and offset may be provided
    >>> calias('CMIP6','evspsbl',scale=-1., conditions={ 'model':'CanESM5' , 'version': ['20180103', '20190112'] })
    >>> calias('erai','tas','t2m',filenameVar='2T')
    >>> calias('EM',[ 'sic', 'sit', 'sim', 'snd', 'ialb', 'tsice'], missing=1.e+20)
    >>> calias('data_CNRM','so,thetao',filenameVar='grid_T_table2.2')

    NB: A wrapper with same name of this function is defined in
    :py:func:`climaf.driver.calias` and it is the one which is
    exported by module climaf.api. It allows to use a list of
    variable.

    """
    if not fileVariable:
        fileVariable = variable
    if not filenameVar:
        filenameVar = None
    if project not in cprojects:
        raise Climaf_Classes_Error("project %s is not known" % project)
    if project not in aliases:
        aliases[project] = dict()
    if not isinstance(variable, list):
        variable = [variable]
    if not isinstance(filenameVar, list):
        filenameVar = [filenameVar]
    if not isinstance(fileVariable, list):
        fileVariable = [fileVariable]
    if not isinstance(units, list):
        units = [units]
    if conditions is not None:
        for kw in conditions:
            if kw not in cprojects[project].facets:
                raise Climaf_Classes_Error(
                    "Keyword \"%s\" is not allowed for project %s" % (kw, project))
    for v, u, fv, fnv in zip(variable, units, fileVariable, filenameVar):
        aliases[project][v] = (fv, scale, offset, u, fnv, missing, conditions)


def varIsAliased(project, variable):
    """
    Return a n-uplet (fileVariable, scale, offset, filevarName,
    missing,conditions) defining how to compute a 'variable' which
    is not in files, for the 'project'
    """
    if project in aliases and variable in aliases[project]:
        return aliases[project][variable]


def cmissing(project, missing, *kwargs):
    """ Declare that in 'project', a given constant must be interpreted
    as a missing value, for a given set of project's attributes values

    Such a declaration must follow all ``calias`` declarations for the
    same project
    """
    pass
    # raise NotImplementedError()


class cpage_all(cobject):
    def __init__(self, fig_lines=None, orientation=None, page_width=1000., page_height=1500., title="", x=0, y=2):
        """
        Common tools for classes cpage and cpage_pdf.
        """
        if fig_lines is None:
            raise Climaf_Classes_Error("fig_lines must be provided")
        if orientation is not None:
            if orientation in ['portrait', ]:
                page_width = 1000.
                page_height = 1500.
            elif orientation in ['landscape', ]:
                page_width = 1500.
                page_height = 1000.
            else:
                raise Climaf_Classes_Error("if set, orientation must be 'portrait' or 'landscape' (not %s)" %
                                           orientation)
        self.page_width = page_width
        self.page_height = page_height
        self.title = title
        self.x = x
        self.y = y

    def check_figs_list(self, fig_lines, widths, heights):
        if not widths:
            widths = [round(1. / len(fig_lines[0]), 2)] * len(fig_lines[0])
        self.widths = widths

        if not heights:
            heights = [round(1. / len(fig_lines), 2)] * len(fig_lines)
        self.heights = heights

        if not all(isinstance(fig_line, list) for fig_line in fig_lines):
            raise Climaf_Classes_Error(
                "each element in fig_lines must be a list of figures")
        if not all([len(fig_lines[i]) == len(self.widths) for i in range(1, len(fig_lines))]):
            raise Climaf_Classes_Error("each line in fig_lines must have same dimension as widths %d" %
                                       len(self.widths))
        if len(fig_lines) != len(self.heights):
            raise Climaf_Classes_Error(
                "fig_lines must have same size than heights")
        self.fig_lines = fig_lines

    def check_figs_cens(self, fig_lines, widths, heights):
        figs = [fig_lines[fig] for fig in fig_lines.order]
        if not widths:
            widths = [1., ]
        self.widths = widths
        if not heights:
            heights = [round(1. / len(figs), 2)] * len(figs)
        self.heights = heights

        if len(figs) < len(heights) * len(widths):
            figs.extend([None] * (len(heights) * len(widths) - len(figs) + 1))
        self.fig_lines = [
            figs[x: x + len(widths)] for x in range(0, len(heights) * len(widths), len(widths))]

    def buildcrs(self, crsrewrite=None, period=None):
        rep = list()
        for line in self.fig_lines:
            if crsrewrite is not None:
                rep.append("[%s]" % ",".join([f.buildcrs(crsrewrite=crsrewrite) if f is not None else repr(f)
                                              for f in line]))
            else:
                rep.append("[%s]" % ",".join(
                    [f.crs if f is not None else repr(f) for f in line]))
        return rep


[docs]class cpage(cpage_all):
    def __init__(self, fig_lines=None, widths=None, heights=None,
                 fig_trim=True, page_trim=True, format="png",
                 orientation=None,
                 page_width=1000., page_height=1500., title="", x=0, y=26, ybox=50, pt=24,
                 font="Times-New-Roman", gravity="North", background="white",
                 insert="", insert_width=200):
        """
        Builds a CliMAF cpage object, which represents an array of figures (output:
        'png' or 'pdf' figure)

        Args:

          fig_lines (a list of lists of figure objects or an ensemble of figure objects):
           each sublist of 'fig_lines' represents a line of figures
          widths (list, optional): the list of figure widths, i.e. the width of each
           column. By default, if fig_lines is:

             - a list of lists: spacing is even
             - an ensemble: one column is used
          heights (list, optional): the list of figure heights, i.e. the
           height of each line. By default spacing is even
          fig_trim (logical, optional): to turn on/off triming for all figures.
           It removes all the surrounding extra space of figures in the page,
           either True (default) or False
          page_trim (logical, optional): to turn on/off triming for the page. It
           removes all the surrounding extra space of the page, either True
           (default) or False
          format (str, optional) : graphic output format, either 'png' (default)
           or 'pdf'(not recommended)
          page_width (float, optional) : width resolution of resultant image;
           CLiMAF default: 1000.
          page_height (float, optional) : height resolution of resultant image;
           CLiMAF default: 1500.
          orientation (str,optional): if set, it supersedes page_width and
           page_height with values 1000*1500 (for portrait) or 1500*1000 (for landscape)
          title (str, optional) : append a label below or above (depending on optional
           argument 'gravity') figures in the page.
          insert(str,optional) : the filename for an image to insert (centered at the
           bottom)
          insert_width(int,optional) : the width at which the inserted image will be
           scaled (in pixels)

        If title is activated:

            - x, y (int, optional): annotate the page with text.
              x is the offset towards the right from the upper left corner
              of the page, while y is the offset upward or the bottom
              according to the optional argument 'gravity' (i.e. 'South' or 'North'
              respectively); CLiMAF default: x=0, y=26. For more details, see:
              http://www.imagemagick.org/script/command-line-options.php?#annotate ;
              where x and y correspond respectively to tx and ty
              in ``-annotate {+-}tx{+-}ty text``
            - ybox (int, optional): width of the assigned box for title;
              CLiMAF default: 50. For more details, see:
              http://www.imagemagick.org/script/command-line-options.php?#splice
            - pt (int, optional): font size of the title; CLiMAF default: 24
            - font (str, optional): set the font to use when creating title; CLiMAF
              default: 'Times-New-Roman'. To print a complete list of fonts, use:
              'convert -list font'
            - gravity (str, optional): the choosen direction specifies where to position
              title; CLiMAF default: 'North'. For more details, see:
              http://www.imagemagick.org/script/command-line-options.php?#gravity
            - background (str, optional): background color of the assigned box for
              title; default: 'white'. To print a complete list of color names, use:
              'convert -list color'

        Example:

         Using no default value, to create a page with 2 columns and 3 lines::

          >>> tas_ds=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
          >>> tas_avg=time_average(tas_ds)
          >>> fig=plot(tas_avg,title='title')
          >>> my_page=cpage([[None, fig],[fig, fig],[fig,fig]], widths=[0.2,0.8],
          ... heights=[0.33,0.33,0.33], fig_trim=False, page_trim=False,
          ... format='pdf', title='Page title', x=10, y=20, ybox=45,
          ... pt=20, font='Utopia', gravity='South', background='grey90',
          ... page_width=1600., page_height=2400.)
        """
        super(cpage, self).__init__(fig_lines=fig_lines, orientation=orientation, page_width=page_width,
                                    page_height=page_height, title=title, x=x, y=y)
        self.fig_trim = fig_trim
        self.page_trim = page_trim
        self.format = format
        self.ybox = ybox
        self.pt = pt
        self.font = font
        self.gravity = gravity
        self.background = background
        self.insert = insert
        self.insert_width = insert_width
        if self.ybox < (self.y + self.pt):
            raise Climaf_Classes_Error(
                "Title exceeds the assigned box: ybox<y+pt")
        if not isinstance(fig_lines, (list, cens)):
            raise Climaf_Classes_Error("fig_lines must be a CliMAF ensemble or a list "
                                       "of lists (each representing a line of figures)")
        elif isinstance(fig_lines, list):
            self.check_figs_list(fig_lines=fig_lines,
                                 widths=widths, heights=heights)
        # case of an ensemble (cens) if heights and widths are not provided
        elif not widths and not heights:
            self.scatter_on_page([fig_lines[label]
                                 for label in fig_lines.order])
        else:  # case of an ensemble (cens) with heights or widths provided
            self.check_figs_cens(fig_lines=fig_lines,
                                 widths=widths, heights=heights)
        #
        self.crs = self.buildcrs()

    def scatter_on_page(self, figs):
        """ Try to optimize nb of columns and lines, based on figs
        list length
        """
        n = len(figs)
        if n in range(1, 4):
            nx, ny = 1, n
        elif n == 4:
            nx, ny = 2, 2
        elif n in range(5, 7):
            nx, ny = 2, 3
        elif n in range(7, 9):
            nx, ny = 2, 4
        elif n in range(9, 13):
            nx, ny = 3, 4
        elif n in range(13, 16):
            nx, ny = 3, 5
        elif n in range(16, 21):
            nx, ny = 4, 5
        elif n in range(21, 25):
            nx, ny = 4, 6
        elif n in range(25, 36):
            nx, ny = 5, 7
        elif n in range(36, 49):
            nx, ny = 6, 8
        else:
            raise Climaf_Classes_Error("Too many figures in page")
        figs.extend([None] * (nx * ny - len(figs) + 1))
        lines = [figs[x: x + nx] for x in range(0, nx * ny, nx)]
        self.fig_lines = lines
        self.widths = [round(1. / nx, 2)] * nx
        self.heights = [round(1. / ny, 2)] * ny

    def buildcrs(self, crsrewrite=None, period=None):
        rep = super(cpage, self).buildcrs(crsrewrite=crsrewrite, period=period)
        param = "%s,%s, fig_trim='%s', page_trim='%s', format='%s', page_width=%d, page_height=%d" % \
            (repr(self.widths), repr(self.heights), self.fig_trim, self.page_trim, self.format, self.page_width,
             self.page_height)
        if isinstance(self.title, six.string_types) and len(self.title) != 0:
            param = "%s, title='%s', x=%d, y=%d, ybox=%d, pt=%d, font='%s', gravity='%s', backgroud='%s', " \
                    "insert='%s', insert_width=%d" % (param, self.title, self.x, self.y, self.ybox, self.pt, self.font,
                                                      self.gravity, self.background, self.insert, self.insert_width)
        rep = "cpage([%s],%s)" % (",".join(rep), param)

        return rep


[docs]class cpage_pdf(cpage_all):
    def __init__(self, fig_lines=None, widths=None, heights=None,
                 orientation=None, page_width=1000., page_height=1500.,
                 scale=1., openright=False,
                 title="", x=0, y=2, titlebox=False, pt="Huge",
                 font="\\familydefault", background="white"):
        """
        Builds a CliMAF cpage_pdf object, which represents an array of figures (output:
        'pdf' figure). Figures are automatically centered in the page using 'pdfjam' tool; see
        http://www2.warwick.ac.uk/fac/sci/statistics/staff/academic-research/firth/software/pdfjam

        Args:
          fig_lines (a list of lists of figure objects or an ensemble of figure objects):
           each sublist of 'fig_lines' represents a line of figures
          widths (list, optional): the list of figure widths, i.e. the width of each
           column. By default, if fig_lines is:

             - a list of lists: spacing is even
             - an ensemble: one column is used
          heights (list, optional): the list of figure heights, i.e. the
           height of each line. By default spacing is even
          page_width (float, optional): width resolution of resultant image;
           CLiMAF default: 1000.
          page_height (float, optional): height resolution of resultant image;
           CLiMAF default: 1500.
          orientation (str,optional): if set, it supersedes page_width and
           page_height with values 1000*1500 (for portrait) or 1500*1000 (for landscape)
          scale (float, optional): to scale all input pages; default:1.
          openright (logical, optional): this option puts an empty figure before the
           first figure; default: False. For more details, see:
           http://ftp.oleane.net/pub/CTAN/macros/latex/contrib/pdfpages/pdfpages.pdf
          title (str, optional): append a label in the page.

        If title is activated, it is by default horizontally centered:

            - x (int, optional): title horizontal shift (in cm).
            - y (int, optional): vertical shift from the top of the page (in cm);
              only positive (down) values have an effect, default=2 cm
            - titlebox (logical, optional): set it to True to frame the text in a box,
              frame color is 'black'
            - pt (str, optional): title font size; CLiMAF default: 'Huge'
              (corresponding to 24 pt). You can set or not a backslash before this
              argument.
            - font (str, optional): font
              abbreviation among available LaTex fonts; default: '\\\\\\\\familydefault'.
            - background (str, optional): frame fill background color; among LaTex
              'fcolorbox' colors; default: 'white'.

        Left and right margins are set to 2cm.

        Example:

         Using no default value, to create a PDF page with 2 columns and 3 lines::

          >>> tas_ds=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
          >>> tas_avg=time_average(tas_ds)
          >>> fig=plot(tas_avg,title='title',format='pdf')
          >>> crop_fig=cpdfcrop(fig)
          >>> my_pdfpage=cpage_pdf([[crop_fig,crop_fig],[crop_fig, crop_fig],[crop_fig,crop_fig]],
          ... widths=[0.2,0.8], heights=[0.33,0.33,0.33], page_width=800., page_height=1200.,
          ... scale=0.95, openright=True, title='Page title', x=-5, y=10, titlebox=True,
          ... pt='huge', font='ptm', background='yellow') # Font name is 'Times'
        """
        super(cpage_pdf, self).__init__(fig_lines=fig_lines, orientation=orientation, page_width=page_width,
                                        page_height=page_height, title=title, x=x, y=y)
        self.scale = scale
        self.openright = openright
        self.titlebox = titlebox
        self.pt = pt
        self.font = font
        self.background = background
        if not isinstance(fig_lines, (list, cens)):
            raise Climaf_Classes_Error("fig_lines must be a CliMAF ensemble or a list "
                                       "of lists (each representing a line of figures)")
        elif isinstance(fig_lines, list):
            self.check_figs_list(fig_lines=fig_lines,
                                 widths=widths, heights=heights)
        else:  # case of an ensemble (cens)
            self.check_figs_cens(fig_lines=fig_lines,
                                 widths=widths, heights=heights)
        #
        self.crs = self.buildcrs()

    def buildcrs(self, crsrewrite=None, period=None):
        rep = super(cpage_pdf, self).buildcrs(
            crsrewrite=crsrewrite, period=period)
        param = "%s,%s, page_width=%d, page_height=%d, scale=%.2f, openright='%s'" % \
                (repr(self.widths), repr(self.heights), self.page_width,
                 self.page_height, self.scale, self.openright)
        if isinstance(self.title, six.string_types) and len(self.title) != 0:
            param = "%s, title='%s', x=%d, y=%d, titlebox='%s', pt='%s', font='%s', backgroud='%s'" % \
                    (param, self.title, self.x, self.y, self.titlebox,
                     self.pt, self.font, self.background)
        rep = "cpage_pdf([%s],%s)" % (",".join(rep), param)

        return rep


def guess_projects(crs):
    """
    Return the list of projects involved in the datasets involved in a
    CRS expression.
    """

    def guess_project(crs):
        """
        Guess which is the project name for a dataset's crs, with minimum
        assumption on the separator used in the project
        """
        separators = [r'.', r'_', r'£', r'$', r'@', r'_', r'|', r'&', r"-", r"=", r"^",
                      r";", r":", r"!", r'§', r'/', r'.', r'ø', r'+', r'°']
        counts = dict()
        for sep in separators:
            counts[sep] = crs.count(sep)
        # Assume that the highest count gives the right separator
        max = 0
        for key in counts:
            if counts[key] >= max:
                max = counts[key]
                sep = key
        return crs[1:crs.find(sep)]

    return list(map(guess_project, re.findall(r"ds\(([^)]*)", crs)))


def browse_tree(cobj, func, results):
    """ Browse a CliMAF object's tree, accumulating in 'results' the
    values returned by 'func' on each tree node or leave (if they are
    not None)
    """
    if isinstance(cobj, cdataset) or isinstance(cobj, cdummy):
        res = func(cobj)
        if res:
            partial.append(res)
    elif isinstance(cobj, ctree):
        res = func(cobj.operator)
        if res:
            partial.append(res)
        for op in cobj.operands:
            browse_tree(op, func, partial)
    elif isinstance(cobj, scriptChild):
        browse_tree(cobj.father, func, partial)
    elif isinstance(cobj, cpage):
        for line in cobj.fig_lines:
            list(map(lambda x: browse_tree(x, func, partial), line))
    elif cobj is None:
        return
    else:
        clogger.error("Cannot yet handle object :%s", repr(cobj))
        return


def domainOf(cobject):
    """ Returns a domain for a CliMAF object : if object is a dataset, returns
    its domain, otherwise returns domain of first operand
    """
    if isinstance(cobject, cdataset):
        if isinstance(cobject.domain, list):
            rep = ""
            for coord in cobject.domain[0:-1]:
                rep = r"%s%d," % (rep, coord)
            rep = "%s%d" % (rep, cobject.domain[-1])
            return rep
        else:
            if cobject.domain == "global":
                return ""
            else:
                return cobject.domain
    elif isinstance(cobject, ctree):
        clogger.debug(
            "For now, domainOf logic for scripts output is basic (1st operand) - TBD")
        return domainOf(cobject.operands[0])
    elif isinstance(cobject, scriptChild):
        clogger.debug(
            "For now, domainOf logic for scriptChilds is basic - TBD")
        return domainOf(cobject.father)
    elif isinstance(cobject, cens):
        clogger.debug(
            "for now, domainOf logic for 'cens' objet is basic (1st member)- TBD")
        return domainOf(list(cobject.values())[0])
    elif cobject is None:
        return "none"
    else:
        if cobject != "":
            clogger.error("Unkown class for argument " + repr(cobject))


def varOf(cobject):
    return attributeOf(cobject, "variable")


def modelOf(cobject):
    return attributeOf(cobject, "model")


def simulationOf(cobject):
    return attributeOf(cobject, "simulation")


def experimentOf(cobject):
    return attributeOf(cobject, "experiment")


def realizationOf(cobject):
    return attributeOf(cobject, "realization")


def projectOf(cobject):
    return attributeOf(cobject, "project")


def realmOf(cobject):
    return attributeOf(cobject, "realm")


def gridOf(cobject):
    return attributeOf(cobject, "grid")


def attributeOf(cobject, attrib):
    """ Returns the attribute for a CliMAF object : if object is a dataset, returns
    its attribute property, otherwise returns attribute of first operand
    """
    if isinstance(cobject, cdataset):
        val = getattr(cobject, attrib, None)
        if val is not None:
            return val
        else:
            return cobject.kvp.get(attrib)
    elif isinstance(cobject, cens):
        return attributeOf(list(cobject.values())[0], attrib)
    elif getattr(cobject, attrib, None):
        value = getattr(cobject, attrib)
        clogger.debug("Find value for object's %s... %s" % (attrib, value))
        return value
    elif isinstance(cobject, ctree):
        clogger.debug("for now, varOf logic is basic (1st operand) - TBD")
        # TODO: Check which operands in the correct one
        value = getattr(cobject, attrib, None)
        if value is None:
            value = attributeOf(cobject.operands[0], attrib)
            clogger.debug("Find value for current first operand... %s" % value)
            return value
        else:
            clogger.debug("Find value for current object... %s" % value)
            return value
    elif isinstance(cobject, cdummy):
        return "dummy"
    elif isinstance(cobject, cpage) or isinstance(cobject, cpage_pdf):
        return None
    elif cobject is None:
        return ''
    else:
        raise Climaf_Classes_Error(
            "Unknown class for argument " + repr(cobject))


def timePeriod(cobject):
    """ Returns a time period for a CliMAF object : if object is a dataset, returns
    its time period, otherwise analyze complex case and reurns something sensible
    """
    if isinstance(cobject, cdataset):
        return cobject.period
    elif isinstance(cobject, ctree):
        clogger.debug("timePeriod : processing %s,operands=%s" %
                      (cobject.script, repr(cobject.operands)))
        if cobject.script.flags.doCatTime and len(cobject.operands) > 1:
            clogger.debug(
                "Building composite period for results of %s" % cobject.operator)
            periods = [timePeriod(op) for op in cobject.operands]
            merged_period = merge_periods(periods)
            if len(merged_period) > 1:
                raise Climaf_Driver_Error("Issue when time assembling with %s, periods are not consecutive : %s" %
                                          (cobject.operator, merged_period))
            return merged_period[0]
        else:
            clogger.debug(
                "timePeriod logic for script is 'choose 1st operand' %s" % cobject.script)
            return timePeriod(cobject.operands[0])
    elif isinstance(cobject, scriptChild):
        clogger.debug(
            "for now, timePeriod logic for scriptChilds is basic - TBD")
        return timePeriod(cobject.father)
    elif isinstance(cobject, cens):
        clogger.debug(
            "for now, timePeriod logic for 'cens' objet is basic (1st member)- TBD")
        return timePeriod(list(cobject.values())[0])
    else:
        return None  # clogger.error("unkown class for argument "+`cobject`)


def resolve_star_period(kwargs):

    # If dict 'kwargs' has only kw 'period' with value '*', resolve
    # corresponding dataset on period, and sets kwargs['period']
    # accordingly (if dataset has only one corresponding period)

    if 'period' in kwargs and kwargs['period'] == '*' and \
       not any(["*" in kwargs[k] or "?" in kwargs[k] for k in kwargs if k != 'period']):
        explorer = cdataset(** select_projects(** kwargs))
        attributes = explorer.explore(option='choices')
        if 'period' in attributes:
            periods = attributes['period']
            if len(periods) == 1:
                kwargs['period'] = str(periods[0])
                return True
    return False


def resolve_first_or_last_years(kwargs, duration, option="last"):
    # Returns a dataset after translation of period like 'last_50y'
    kwargs['period'] = '*'
    explorer = ds(**kwargs)
    attributes = explorer.explore(option='choices')
    if 'period' in attributes:
        periods = attributes['period']
        if option == 'last':
            period = periods[-1]
            kwargs['period'] = lastyears(period, int(duration))
        if option == 'first':
            period = periods[0]
            kwargs['period'] = firstyears(period, int(duration))
    else:
        kwargs['period'] = '*'
    explorer = ds(**kwargs)
    return explorer.explore('resolve')


def test():
    #    clogger.basicConfig(level=clogger.DEBUG)
    #    clogger.basicConfig(format='"%(asctime)s [%(funcName)s: %(filename)s,%(lineno)d] %(message)s : %(levelname)s',
    #                        level=clogger.DEBUG)
    cdef("project", "CMIP5")
    # cdef("project","PR6")
    cdef("model", "CNRM-CM5")
    cdef("experiment", "historical")
    cdef("simulation", "r1i1p1")
    cdef("period", "197901-198012")
    cdef("domain", "global")
    #
    tos = cdataset(experiment="rcp85", variable="tos",
                   period="19790101-19790102")
    tr = ctree("operator", tos, para1="val1", para2="val2")
    print(tr)
    # tos.pr()
    #
    # ds1=Dataset(period="1850-2012")
    # genericDataSets(ds1.crs)
    # ds2=Dataset(project="CMIP3")
    # ex="toto("+ ds1.crs + "," + ds2.crs
    # print genericDataSets(ex)
    # print firstGenericDataSet(ex)


def t2():
    p = period("1984-1984")


if __name__ == "__main__":
    test()