Source code for climaf.classes

# -*- coding: iso-8859-1 -*-
""" 
 Basic types and syntax for a CLIMAF Reference Syntax interpreter and driver
 This is a first protoype, where the interpreter is Python itself


"""
# Created : S.Senesi - 2014

import re, string, copy, os.path

import dataloc
from period    import init_period, cperiod
from clogging  import clogger, dedent
from netcdfbasics import fileHasVar, varsOfFile, timeLimits, model_id
from decimal   import Decimal

#: Dictionary of declared projects (type is cproject)
cprojects=dict()

#: Dictionary of aliases dictionaries
aliases=dict()

#: Dictionary of frequency names dictionaries
frequencies=dict()

#: Dictionary of realms names dictionaries
realms = dict()

[docs]class cproject():
    def __init__(self,name,  *args, **kwargs) :
        """
        Declare a project and its facets/attributes in CliMAF (see below)

        Args:
          name (string) : project name; 
           do not use the chosen separator in it (see below)
          args (strings) : attribute names; 
           they are free; do not use the chosen separator in it (see below); **CliMAF 
           anyway will add attributes : 
           project, simulation, variable, period, and domain**
          kwargs (dict) :
           can only be used with keywords :

            - ``sep`` or ``separator`` for indicating the symbol separating
              facets in the dataset syntax. Defaults to ".".
            - ``ensemble`` for declaring a list of attribute
              names which are allowed for defining an ensemble in
              this project ('simulation' is automatically allowed)

        Returns : a cproject object, which string representation is
        the pattern later used in CliMAF Refreence Syntax for
        representing datasets in this project

        A 'cproject' is the definition of a set of attributes, or
        facets, which values will completely define a 'dataset' as
        managed by CliMAF. Its name is one of the possible keys 
        for describing data locations (see
        :py:class:`~climaf.dataloc.dataloc`)

        For instance, cproject CMIP5, after its Data Reference Syntax, 
        has attributes : 
        model, simulation (used for rip), experiment, variable, frequency, realm, table, version


        **A number of projects are built-in**. See :py:mod:`~climaf.projects`
        
        A dataset in a cproject declared as ::

        >>> cproject('MINE','myfreq','myfacet',sep='_')

        will return ::

          ${project}_${simulation}_${variable}_${period}_${domain}_${myfreq}_${myfacet}

        and will have datasets represented as  e.g.::

          'MINE_hist_tas_[1980-1999]_global_decadal_gabu'

        while an example for built-in cproject CMIP5 will be::

          'CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last'

        The attributes list should include all facets which are useful
        for distinguishing datasets from each other, and for computing
        datafile pathnames in the 'generic' organization (see
        :py:class:`~climaf.dataloc.dataloc`)

        A default value for a given facet can be specified, by providing a tuple
        (facet_name,default_value) instead of the facet name. This default value is
        however of lower priority than the value set using :py:func:`~climaf.classes.cdef`

        A project can be declared as having non-standard variable
        names in datafiles, or variables that should undergo re-scaling; see
        :py:func:`~climaf.classes.calias`

        A project can be declared as having non-standard frequency names (this is 
        used when accessing datafiles); see :py:func:`~climaf.classes.cfreqs`)

        """
        if name in cprojects : clogger.warning("Redefining project %s"%name)
        self.project=name
        #
        self.facets=[]
        self.facet_defaults=dict()
        forced=['project','simulation', 'variable', 'period', 'domain']
        for f in forced : self.facets.append(f)
        for a in args :
            if isinstance(a,tuple) :
                facet_name,facet_default=a
                self.facet_defaults[facet_name]=facet_default
            else :
                facet_name=a
            if not facet_name in forced : self.facets.append(facet_name)
        #
        self.separator="."
        if "separator" in kwargs : self.separator=kwargs['separator']
        if "sep"       in kwargs : self.separator=kwargs['sep']
        if self.separator=="," :
            raise Climaf_Classes_Error("Character ',' is forbidden as a project separator")
        cprojects[name]=self
        self.crs=""
        # Build the pattern for the datasets CRS for this cproject
        for f in self.facets : 
            self.crs += "${%s}%s"%(f,self.separator)
        self.crs=self.crs[:-1]
        # Create an attribute hodling the list of facets which are allowed
        # for defining an ensemble, and put a first facet there
        self.attributes_for_ensemble=['simulation']
        if 'ensemble' in kwargs :
            self.attributes_for_ensemble.extend(kwargs["ensemble"])

    def __repr__(self):
        return self.crs
    def crs2ds(self,crs) :
        """ 
        Try to interpret string ``crs`` as the CRS of a dataset for
        the cproject. Return the dataset if OK
        """
        fields=crs.split(self.separator)
        if len(fields) == len(self.facets) :
            if fields[0] == self.project :
                kvp=dict()
                for i,f in enumerate(self.facets) : kvp[f]=fields[i]
                return cdataset(**kvp)

[docs]def cdef(attribute,value=None, project=None):
    """
    Set or get the default value for a CliMAF dataset attribute
    or facet (such as e.g. 'model', 'simulation' ...), for use by
    next calls to :py:class:`~climaf.classes.cdataset()` or to
    :py:func:`~climaf.classes.ds`

    Argument 'project' allows to restrict the use/query of the default
    value to the context of the given 'project'. On can also set the
    (global) default value for attribute 'project'

    There is no actual check that 'attribute' is a valid keyword for
    a call to ``ds`` or ``cdataset``

    Example::

    >>> cdef('project','OCMPI5')
    >>> cdef('frequency','monthly',project='OCMPI5')
    """
    
    if project not in cprojects :
        raise Climaf_Classes_Error("project '%s' has not yet been declared"%project)
    if attribute == 'project' : project=None
    #
    if project and not attribute in cprojects[project].facets :
        raise Climaf_Classes_Error("project '%s' doesn't use facet '%s'"%(project,attribute))
    if value is None :
        rep=cprojects[project].facet_defaults.get(attribute,None)
        if not rep :
            rep=cprojects[None].facet_defaults.get(attribute,"")
        return rep
    else :
        cprojects[project].facet_defaults[attribute]=value
            

cproject(None)
cdef("domain","global")


# All Cobject instances are registered in this directory :
cobjects=dict()

class cobject():
    def __init__(self):
        # crs is the string expression defining the object 
        # in the CLIMAF Reference Syntax
        self.crs="void"
    def __str__(self):
        #return "Climaf object : "+self.crs
        return self.buildcrs()
    def __repr__(self):
        return self.buildcrs()
    def register(self):
        pass
        #cobjects[self.crs]=self
        #clogger.debug("Object Created ; crs = %s"%(self.crs))
    def erase(self):
        pass
        #del(cobjects[self.crs])
        #clogger.debug("Object deleted ; crs = %s"%(self.crs))


class cdummy(cobject):
    def __init__(self):
        """
        cdummy class represents dummy arguments in the CRS
        """
        pass
    def buildcrs(self,period=None,crsrewrite=None):
        return('ARG')


def processDatasetArgs(**kwargs) :
    """
    Perfom basic checks on kwargs for functions cdataset and eds
    regarding the project where the dataset is defined
    Also complement with default values as handled by the
    project's definition and by cdef()
    """
    if 'project' in kwargs : project=kwargs['project']
    else : project= cdef("project")
    if project is None :
        raise Climaf_Classes_Error("Must provide a project (Can use cdef)")
    elif project not in cprojects :
        raise Climaf_Classes_Error(
            "Dataset's project '%s' has not "
            "been described by a call to cproject()"%project)
    attval=dict()
    attval["project"]=project
    sep=cprojects[project].separator
    #
    # Register facets values
    for facet in cprojects[project].facets :
        if facet in kwargs and kwargs[facet] : val=kwargs[facet]
        else: val=cdef(facet,project=project)
        attval[facet]=val
        if val :
            if isinstance(val,list) : listval=val
            else : listval=[val]
            for lval in listval :
                if isinstance(lval,str) and lval.find(sep) >= 0 :
                    raise Climaf_Classes_Error(
                        "You cannot use character '%s' when setting '%s=%s' because "
                        "it is the declared separator for project '%s'. "
                        "See help(cproject) for changing it, if needed"%(sep,facet,val,project))
            #print "initalizing facet %s with value"%(facet,val)
    if (attval['project'] == 'CMIP5'):
        # Allow for a synonym for 'simulation' in CMIP5 : 'member'
        if 'member' in kwargs and kwargs['member'] not in [None, '']:
            attval['simulation']=kwargs['member']
            clogger.info('Attribute "member" in project CMIP5 has been translated to "simulation"')
        # Special processing for CMIP5 fixed fields : handling redundancy in facets
        if ( attval['table']=='fx' or attval['period']=='fx' or 
             attval['simulation']=='r0i0p0' or attval['frequency']=='fx') :
            attval['table']='fx' ; attval['period']='fx' 
            attval['simulation']='r0i0p0' ; attval['frequency']='fx'
    # Special processing for CMIP6  : facet 'simulation' is forbidden (must use 'realization')
    if (attval['project'] == 'CMIP6')  and 'simulation' in kwargs and kwargs['simulation'] is not '':
        raise Climaf_Classes_Error("You cannot use attribute 'simulation' in CMIP6; please use 'realization'. This if for kwargs=%s"%`kwargs`)
    
    errmsg=""
    for facet in cprojects[project].facets :
        if attval[facet] is None :
            e="Project '%s' needs facet '%s'. You may use cdef() for setting a default value"\
               %(project,facet)
            errmsg+=" "+e
    if errmsg != "" : raise Climaf_Classes_Error(errmsg)
    #
    #print "kw="+`kwargs`
    for facet in attval :
        #print "checking facet %s"%facet
        # Facet specific processing
        if facet=='period' :
            if not isinstance(attval['period'],cperiod) and attval['period'] != "*" :
                try :
                    attval['period']=init_period(attval['period'])
                except :
                    raise Climaf_Classes_Error("Cannot interpret period for %s"%`attval['period']`)
        # Check for typing or user's logic errors
        if not facet in cprojects[project].facets :
            e="Project %s doesn't have facet %s"%(project,facet)
            errmsg+=" "+e
    if errmsg != "" : raise Climaf_Classes_Error(errmsg)
    if 'period' in attval and not isinstance(attval['period'],cperiod) and attval['period'] !=  "*" :
        Climaf_Classes_Error("at end of  process.. : period is not a cperiod")
    return attval


[docs]class cdataset(cobject):
    #def __init__(self,project=None,model=None,simulation=None,period=None,
    #             rip=None,frequency=None,domain=None,variable=None,version='last') :
    def __init__(self,**kwargs) :
        """
        Create a CLIMAF dataset. 
        
        A CLIMAF dataset is a description of what the data (rather than 
        the data itself or a file).  It is basically a set of pairs
        attribute-value. The list of attributes actually used to
        describe a dataset is defined by the project it refers
        to. 

        To display the attributes you may use for a given project, type e.g.:

        >>> cprojects["CMIP5"]

        For further details on projects , see
        :py:class:`~climaf.classes.cproject`

        None of the project's attributes are mandatory arguments, because
        all attributes defaults to the value set by
        :py:func:`~climaf.classes.cdef` (which also applies if
        providing a None value for an attribute)

        Some attributes have a special format or processing : 
        
        - period : see :py:func:`~climaf.period.init_period`

        - domain : allowed values are either 'global' or a list for
          latlon corners ordered as in : [ latmin, latmax, lonmin,
          lonmax ]
            
        - variable :  name of the geophysical variable ; this should be :

           - either a variable actually included in the datafiles,

           - or a 'derived' variable (see  :py:func:`~climaf.operators.derive` ),
             
           - or, an aliased variable name (see :py:func:`~climaf.classes.alias` )

        - in project CMIP5 , for triplets (frequency, simulation, period, table )  : 
          if any is 'fx' (or 'r0i0p0 for simulation), the others are forced to
          'fx' (resp. 'r0i0p0') too.

        Example, using no default value, and adressing some CMIP5 data ::

          >>>  cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',
          >>>           simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last')

        You may use wildcard ('*') in attribute values, and use  :py:meth:`~climaf.classes.cdataset.explore`
        for having CliMAF doing something sensible matching such attributes with available data
        
        """
        #
        attval=processDatasetArgs(**kwargs)
        #
        # TBD : Next lines for backward compatibility, but should re-engineer 
        self.project   = attval["project"]
        self.simulation= attval['simulation']
        self.variable  = attval['variable']
        # alias is a n-plet : filevar, scale, offset, filenameVar, missing
        self.period    = attval['period']
        self.domain    = attval['domain']
        #
        self.model     = attval.get('model',"*")
        self.frequency = attval.get('frequency',"*")
        # Normalized name is annual_cycle, but allow also for 'seasonal' for the time being
        if (self.frequency=='seasonal' or self.frequency=='annual_cycle') :
            self.period.fx=True
        freqs_dic=frequencies.get(self.project,None)
        #print freqs_dic
        if freqs_dic :
            for k in freqs_dic :
                if freqs_dic[k]==self.frequency and k=='annual_cycle' :
                    self.period.fx=True
        #
        self.kvp=attval
        self.alias=varIsAliased(self.project,self.variable)
        #
        if ("," in self.variable and self.alias) :
            filevar,scale,offset,units,filenameVar,missing=self.alias
            if (filevar != self.variable or scale != 1. or offset != 0 or missing ) :
                raise Climaf_Classes_Error("Cannot alias/scale/setmiss on group variable")
        # Build CliMAF Ref Syntax for the dataset
        self.crs=self.buildcrs()
        # 
        self.files=None
        self.local_copies_of_remote_files=None
        self.register()

    def setperiod(self,period) :
        self.erase()
        self.period=period
        self.kvp['period']=period
        self.crs=self.buildcrs()
        self.register()
        
    def buildcrs(self,period=None,crsrewrite=None):
        crs_template=string.Template(cprojects[self.project].crs)
        dic=self.kvp.copy()
        if period is not None : dic['period']=period
	if type(dic['domain']) is list : dic['domain']=`dic['domain']`
        rep="ds('%s')"%crs_template.safe_substitute(dic)
        return rep

    def isLocal(self) :
        #return self.baseFiles().find(":")<0
        model=getattr(self,"model","*")
        return(dataloc.isLocal(project=self.project, model=model, \
                               simulation=self.simulation, frequency=self.frequency))
        
    def isCached(self) :
        """ TBD : analyze if a remote dataset is locally cached
        
        """
        #clogger.error("TBD - remote datasets are not yet cached")
        rep=False
        return rep

    def oneVarPerFile(self):
        locs=dataloc.getlocs(project=self.project, model=self.model, simulation=self.simulation, \
                             frequency=self.frequency)
        return(all([org for org,freq,url in locs]))
    
    def periodIsFine(self):
        clogger.debug("always returns False, yet - TBD")
        return(False) 
        
    def domainIsFine(self):
        clogger.debug("a bit too simple yet (domain=='global')- TBD")
        return(self.domain == 'global') 
        
    def periodHasOneFile(self) :
        return(len(self.baseFiles().split(" ")) < 2)
        #clogger.debug("always returns False, yet - TBD")
        #return(False) 

    def hasOneMember(self) :
        clogger.debug("always returns True, yet - TBD")
        return(True) 

    def hasExactVariable(self):
        # Assume that group variable do not need aliasing
        if ("," in self.variable) : return True
        clogger.debug("always returns False, yet - TBD")
        return(False) 
    
    def missingIsOK(self):
        if (alias is None) : return True
        _,_,_,_,_,missing=self.alias
        return missing is None
    
[docs]    def explore(self,option='check_and_store',sort_periods_on=None):
        """
        Versatile datafile exploration for a dataset which possibly has wildcards (* and ? ) in  
        attributes. 

        ``option`` can be :
        
          - 'choices' for returning a dict which keys are wildcard attributes and entries 
            are values list
          - 'resolve' for returning a NEW DATASET with instanciated attributes (if uniquely)
          - 'ensemble' for returning AN ENSEMBLE based on multiple possible values of a 
            single attribute
          - 'check_and_store' (or missing) for just identifying and storing dataset files list 
            (while ensuring non-ambiguity check for wildcard attributes)

        This feature works only for projects which organization is of type 'generic'

        Attribute 'period' cannot use a * without being  == * ; in that case, the period of all 
        matching files will be either :

          - aggregated among all instances of all attributes with wildcards (default)
          - or aggregated after being sorted on attribute ``sort_periods_on``, if provided

        Toy example ::

          >>> rst=ds(project="example", simulation="*", variable="rst", period="1980-1981")
          >>> rst
          ds('example|*|rst|1980-1981|global|monthly')
          
          >>> rst.explore('choices')
          {'simulation': ['AMIPV6ALB2G']}
          
          >>> instanciated_dataset=rst.explore('resolve')
          >>> instanciated_dataset
          ds('example|AMIPV6ALB2G|rst|1980-1981|global|monthly')
          
          >>> my_ensemble=rst.explore('ensemble')
          error    : "Creating an ensemble does not make sense because all wildcard attributes have a single possible value ({'simulation': ['AMIPV6ALB2G']})"

        Real life example for options ``choices`` and ``ensemble`` ::

          >>> rst=ds(project="CMIP6", model='*', experiment="*ontrol*", realization="r1i1p1f*", table="Amon", variable="rsut", period="1980-1981")
          >>> clog('info')
          >>> rst.explore('choices')
          info     : Attribute institute has matching value CNRM-CERFACS
          info     : Attribute experiment has multiple values : set(['piClim-control', 'piControl'])
          info     : Attribute grid has matching value gr
          info     : Attribute realization has matching value r1i1p1f2
          info     : Attribute mip has multiple values : set(['CMIP', 'RFMIP'])
          info     : Attribute model has multiple values : set(['CNRM-ESM2-1', 'CNRM-CM6-1'])
          {'institute': ['CNRM-CERFACS'], 'experiment': ['piClim-control', 'piControl'], 'grid': ['gr'], 
          'realization': ['r1i1p1f2'], 'mip': ['CMIP', 'RFMIP'], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1']}

          # Let us further select by setting experiment=piCOntrol
          >>> mrst=ds(project="CMIP6", model='*', experiment="piControl", realization="r1i1p1f*", table="Amon", variable="rsut", period="1980-1981")
          >>> mrst.explore('choices')
          {'institute': ['CNRM-CERFACS'], 'mip': ['CMIP'], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], 'grid': ['gr'], 'realization': ['r1i1p1f2']}
          >>> small_ensemble=mrst.explore('ensemble')
          >>> small_ensemble
          cens({
                'CNRM-ESM2-1':ds('CMIP6%%rsut%1980-1981%global%/cnrm/cmip%CNRM-ESM2-1%CNRM-CERFACS%CMIP%Amon%piControl%r1i1p1f2%gr%latest'),
                'CNRM-CM6-1' :ds('CMIP6%%rsut%1980-1981%global%/cnrm/cmip%CNRM-CM6-1%CNRM-CERFACS%CMIP%Amon%piControl%r1i1p1f2%gr%latest')
               })

        Identify period covered by data, and versions ::

          >>> d=ds(project="CMIP6",experiment="piControl", realization='r1i1p1f2', variable="so", 
          ... table="*", period="*" , model="*",version="*")
          >>> clog('info')
          >>> d.explore('choices')
          info     : Attribute institute='*' has matching value 'CNRM-CERFACS'
          info     : Attribute perios='*' has matching value [1850-2349]
          info     : Attribute version='*' has multiple values : ['v0', 'v20180720', 'latest']
          info     : Attribute grid='g*' has matching value 'gn'
          info     : Attribute mip='*' has matching value 'CMIP'
          info     : Attribute table='*' has matching value 'Omon'
          info     : Attribute model='*' has multiple values : ['CNRM-ESM2-1', 'CNRM-CM6-1']
          {'institute': 'CNRM-CERFACS', 'period': [1850-2349], 'version': ['v0', 'v20180720', 'latest'], 'grid': 'gn', 'table': 'Omon', 'mip': 'CMIP', 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1']}

        Analyze available periods for each value of a given attribute ::

          >>> rsut=ds(project="CMIP6", model='*', experiment="piControl*", realization="r1i1p1f*", table="Amon", variable="rsut", period="*")
          >>> rsut.explore('choices','model')
          {'institute': 'CNRM-CERFACS', 'period': {'CNRM-ESM2-1': [1850-2349], 'CNRM-CM6-1': [1850-2349]}, 
             'experiment': 'piControl', 'grid': 'gr', 'realization': 'r1i1p1f2', 'mip': 'CMIP', 
             'model': ['CNRM-ESM2-1', 'CNRM-CM6-1']}

          # Could also be written : rsut.explore(option='choices',sort_periods_on='model')

        """
        dic=self.kvp.copy()
        if self.alias : 
            filevar,_,_,_,filenameVar,_=self.alias
            dic["variable"]=string.Template(filevar).safe_substitute(dic)
            if filenameVar : dic["filenameVar"]=filenameVar
        clogger.debug("Looking with dic=%s"%`dic`)
        wildcards=None
        if option != 'check_and_store' : wildcards=dict()
        files=dataloc.selectFiles(return_wildcards=wildcards,sort_periods_on=sort_periods_on,**dic)
        #
        wildcard_attributes_list=[ k for k in dic if type(dic[k]) is str and  "*" in dic[k]]
        if option == 'resolve' :
            clogger.debug("Trying to resolve on attributes %s"%wildcard_attributes_list)
            for kw in wildcards :
                dic[kw]=wildcards[kw][0]
                if len(wildcards[kw]) > 1 :
                    raise Climaf_Classes_Error("Wildcard attribute %s is ambiguous %s"%(kw,wildcards[kw]))
            return ds(**dic)
        elif option == 'choices' :
            clogger.debug("Listing possible values for  %s"%wildcard_attributes_list)
            return wildcards
        elif option == 'ensemble' :
            clogger.debug("Trying to create an ensemble on attributes %s"%wildcard_attributes_list)
            ensemble_kw=None
            for kw in wildcards :
                if len(wildcards[kw]) > 1 :
                    if ensemble_kw is not None :
                        raise \
                            Climaf_Classes_Error("Cannot create an ensemble, because there are at least"+\
                                                 " two possible attributes for defining it : %s and %s"%\
                                                 (ensemble_kw,kw))
                    else: ensemble_kw=kw
                    dic[kw]=wildcards[kw]
                else:
                    dic[kw]=wildcards[kw][0]
            if ensemble_kw is None :
                raise Climaf_Classes_Error("Creating an ensemble does not make sense because all wildcard "+\
                                           "attributes have a single possible value (%s)"%wildcards)
            return eds(**dic)
        elif option == 'check_and_store' :
            self.files=files
        else:
            raise Climaf_Classes_Error("Unknown option %s"%(option))

    def baseFiles(self,force=False):
        """ Returns the list of (local or remote) files which include the data
        for the dataset
        
        Use cached value (i.e. attribute 'files') unless called with arg force=True
        """
        if (force and self.project != 'file') or self.files is None :
            self.explore()
        return self.files

[docs]    def listfiles(self,force=False):
        """ Returns the list of (local or remote) files which include the data
        for the dataset
        
        Use cached value unless called with arg force=True
        """
        return self.baseFiles(force=force)

    def hasRawVariable(self) :
        """ Test local data files to tell if a dataset variable is actually included 
        in files (rather than being a derived, virtual variable)

        For the time being, returns False, which leads to always consider that variables
        declared as 'derived' actually are derived """
        clogger.debug("TBD: actually test variables in files, rather than assuming that variable %s is virtual for dataset %s"\
                        %(self.variable,self.crs))
        return(False)

[docs]    def check(self):
        """
        Check time consistency of first variable of a dataset or ensemble members:
        - check if first data time interval is consistent with dataset frequency
        - check if file data have a gap
        - check if period covered by data files actually includes the whole of dataset period
          
        Returns: True if period of data files included dataset period, False otherwise.
        
        Examples:
    
        >>> # Dataset with monthly frequency  
        >>> tas=ds(project='example', simulation='AMIPV6ALB2G', variable='tas',period='1980-1981')
        >>> res1=tas.check()
        >>>
        >>> # Ensemble with monthly frequency  
        >>> j0=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1980')
        >>> j1=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1981')
        >>> ens=cens({'1980':j0, '1981':j1})
        >>> res2=ens.check()
    
        >>> # Define a new project for 'em' data with 3 hours frequency in particular     
        >>> cproject('em_3h','root','group','realm','frequency',separator='|')
        >>> path='/cnrm/cmip/cnrm/simulations/${group}/${realm}/Regu/${frequency}/${simulation}/${variable}_??_YYYY.nc' 
        >>> dataloc(project='em_3h', organization='generic', url=path)
    
        >>> # Dataset with 3h frequency for 'tas' variable (instant) 
        >>> tas_3h=ds(project='em_3h',variable='tas',group='AR4',realm='Atmos',frequency='3Hourly', simulation='A1B',period='2050-2100')
        >>> res3=tas_3h.check()
     
        >>> # Dataset with 3h frequency for 'pr' variable (time mean) 
        >>> pr_3h=ds(project='em_3h',variable='pr',group='AR4',realm='Atmos',frequency='3Hourly', simulation='A1B',period='2050-2100')
        >>> res4=pr_3h.check()   
        
        """
        from anynetcdf import ncf
        from datetime import datetime, timedelta
        from netCDF4 import num2date
        import numpy as np
        
        # Returns the list of files which include the data for the dataset
        # or for each member of the ensemble
        if isinstance(self,cdataset):
            if self.isLocal() or self.isCached() :
                files=self.baseFiles()
            else:
                files=self.local_copies_of_remote_files
            if not files:
                clogger.error('No file found for: %s'%self)
                if not ( self.isLocal() or self.isCached() ):
                    clogger.warning('For remote data, you have to do at first "cfile(%s)"'%self)                
                return(False)
        else :
            clogger.error("Cannot handle %s" %self)
            return
        #
        if files:
            filedate=[]
            clogger.debug("List of selected files: %s"%files)
            
            var=str.split(varOf(self),',')[0] 
            # Concatenate all data files
            for filename in str.split(files,' '):
                fileobj=ncf(filename)
                #
                if self.project in aliases and var in aliases[self.project]:
                    var=aliases[self.project][var][0]
                #
                dimname=''
                for dim in fileobj.variables[var].dimensions:  
                    if 'time' in dim: dimname=dim
                if not dimname: clogger.error('No time dimension for variable %s'%var)
                time_obj=fileobj.variables[dimname]  
                filedate=np.concatenate((filedate,num2date(time_obj.getValue(),\
                                        units=time_obj.units,calendar=time_obj.calendar)))
            
            clogger.debug('Time data of selected files: %s'%filedate)

            # Check if first data time interval is consistent with dataset frequency
            if len(filedate) > 1 :
                filedate_delta=(filedate[1]-filedate[0]).total_seconds()
            else:
                clogger.error('Time dimension is degenerated.')
                return

            if ( (self.frequency == 'monthly' or not self.frequency) and \
                 (filedate_delta > 31.*24.*3600 or filedate_delta <= 29.*24.*3600.) ) \
               or ( self.frequency == 'yearly' and \
                    (filedate_delta > 366.*24.*3600. or filedate_delta < 365.*24.*3600.) ) \
               or ( self.frequency == 'decadal' and \
                    (filedate_delta > 3653.*24.*3600. or filedate_delta < 3651.*24.*3600.) ):
        
                clogger.warning('First data time interval (= %.1f days) is not consistent with dataset frequency (i.e. %s)'\
                                %(filedate_delta/(24.*3600.),self.frequency))

            elif self.frequency == 'daily' and filedate_delta != 86400.:
                clogger.warning('First data time interval (= %.2f hours) is not consistent with dataset frequency (i.e. %s)'\
                                %(filedate_delta/3600.,self.frequency))

            elif (self.frequency == '6h'or self.frequency == '3h' or self.frequency == '1h' \
                  or self.frequency == '3Hourly' or self.frequency == '6Hourly') \
                  and filedate_delta != float(self.frequency[0])*3600.:
                clogger.warning('First data time interval (= %.2f hours) is different to dataset frequency (i.e. %.2f)'\
                                %(filedate_delta/3600.,float(self.frequency[0])))

            # Check if file data have a gap
            i=0
            cpt=0
            while i < len(filedate)-2:
                i+=1
                if (filedate[i+1]-filedate[i]).total_seconds() != filedate_delta:
                    cpt+=1
                    if cpt < 5:
                        if self.frequency == 'monthly' or not self.frequency or \
                           self.frequency == 'yearly' or self.frequency == 'decadal':
                            clogger.error('File data have a gap between indexes %i and %i: delta = %.0f days '\
                                          %(i,i+1,(filedate[i+1]-filedate[i]).total_seconds()/(24.*3600.)) +\
                                          'instead of %.0f days (<=> 1st data interval)'\
                                          %(filedate_delta/(24.*3600.)))
                        elif self.frequency == 'daily' or self.frequency == '6h'or \
                             self.frequency == '3h' or self.frequency == '1h' or \
                             self.frequency == '3Hourly' or self.frequency == '6Hourly':
                            clogger.error('File data have a gap between indexes %i and %i: '%(i,i+1) +\
                                          'delta = %.0f hours instead of %.0f hours (<=> 1st data interval)' \
                                          %((filedate[i+1]-filedate[i]).total_seconds()/3600.,filedate_delta/3600.))
            #
            # Compute period covered by data files
            if self.frequency == 'monthly' or not self.frequency:  
                filedate[0]=filedate[0].replace(day=01)
                if filedate[-1].month > 11 :
                    filedate[-1]=filedate[-1].replace(year=filedate[-1].year+1)
                    filedate[-1]=filedate[-1].replace(month=01)
                    filedate[-1]=filedate[-1].replace(day=01)
                else:
                    filedate[-1]=filedate[-1].replace(month=filedate[-1].month+1)
                    filedate[-1]=filedate[-1].replace(day=01)

            elif self.frequency == 'daily':
                filedate[0]=filedate[0].replace(hour=00)
                filedate[-1]=filedate[-1].replace(hour=00)
                filedate[-1]=filedate[-1] + timedelta(days=1)
              
            elif self.frequency == '6h'or self.frequency == '3h' or self.frequency == '1h' \
                      or self.frequency == '3Hourly' or self.frequency == '6Hourly':
           
                if 'cell_methods' in fileobj.variables[var].__dict__ : # time mean 

                    regex=re.compile('.*time *: *mean *\(? *interval *: *([0-9]+.?[0-9]+?) ([a-zA-Z]+) *\)')
                    cell_meth_att = regex.search(fileobj.variables[var].cell_methods)
                    if cell_meth_att:
                        if cell_meth_att.group(2) == 'hours': freq=float(cell_meth_att.group(1))
                        elif cell_meth_att.group(2) == 'minutes': freq=float(cell_meth_att.group(1))/60.
                    else: # 'cell_methods' attribute defined with the value 'time: mean'
                        freq=filedate_delta/3600.

                    filedate[0] = filedate[0] - timedelta( minutes=(freq/2.)*60 + \
                                    ((filedate[0].hour*60 + filedate[0].minute)-(freq/2.)*60)%(freq*60) )
                    filedate[-1] = filedate[-1] - timedelta( minutes=(freq/2.)*60 + \
                                    ((filedate[-1].hour*60 + filedate[-1].minute)-(freq/2.)*60)%(freq*60) - freq*60 ) 

                else: # assume it is instant data
                    freq=filedate_delta/3600.
                    filedate[-1] = filedate[-1] - timedelta( minutes=(freq/2.)*60 + \
                        ((filedate[-1].hour*60 + filedate[-1].minute)-(freq/2.)*60)%(freq*60) - 2*freq*60 )

            elif self.frequency == 'yearly' or self.frequency == 'decadal':
                filedate[0]=filedate[0].replace(month=01)
                filedate[0]=filedate[0].replace(day=01)
                filedate[-1]=filedate[-1].replace(month=01)
                filedate[-1]=filedate[-1].replace(day=01)
                filedate[-1]=filedate[-1] + timedelta(years=1)
   
            elif self.frequency == 'fx' or self.frequency == 'annual_cycle':
                clogger.error('Check time consistency with a frequency equal to %s has no sense' %self.frequency)      

            else:
                clogger.error('Dataset frequency is non-standard: frequency = %s. ' %self.frequency +\
                              'Normalized frequency values are: decadal, yearly, monthly, '+\
                              'daily, 6h, 3h, fx and annual_cycle')
            #
            # Check period of datafiles vs dataset period
            clogger.debug('Period covered by selected files: %s'%filedate)
            file_period=cperiod(start=filedate[0],end=filedate[-1])
            #
            if file_period.includes(self.period):
                clogger.info("Time data in datafiles (i.e. %s) includes time data of " %file_period +\
                             "dataset (i.e. %s) => dataset are consistent." %self.period)
                return(True)
            else:
                clogger.info("Time data in datafiles (i.e. %s) don't include time data of " %file_period +\
                             "dataset (i.e. %s) => dataset are not consistent." %self.period)
                return(False)



[docs]class cens(cobject,dict):
    def __init__(self, dic={}, order=None, sortfunc=None ) :
        """Function cens creates a CliMAF object of class ``cens`` ,
        i.e. a dict of objects, which keys are member labels, and
        which members are ordered, using method ``set_order``

        In some cases, ensembles of datasets from the same project
        can also be built easily using :py:func:`~climaf.classes.eds()`

        When applying an operator to an ensemble, CliMAF will know,
        from operator's declaration (see
        :py:func:`~climaf.operators.cscript()`), whether the operator
        'wishes' to get the ensemble or, on the reverse, is not
        'ensemble-capable' :

         - if the operator is ensemble-capable it will deliver it :

           - if it is a script : with a string composed  by
             concatenating the corresponding input files; it will
             also provide the labels list to the script if its
             declaration calls for it with keyword ${labels}
             (see :py:func:`~climaf.operators.cscript()`)
           - if it is a Python function : with the dict of
             corresponding objects

         - if the operator is 'ensemble-dumb', CliMAF will loop
           applying it on each member, and will form a new ensemble
           with the results.

        The dict keys must be label strings, which describe what is
        basically different among members. They are usually used by
        plot scripts to provide a caption allowing to identify each
        dataset/object e.g using various colors. 

        Examples (see also :download:`../examples/ensemble.py`) :
        
        >>> cdef('project','example'); cdef('simulation',"AMIPV6ALB2G");
        >>> cdef('variable','tas');cdef('frequency','monthly')
        >>> #
        >>> ds1980=ds(period="1980")
        >>> ds1981=ds(period="1981")
        >>> #
        >>> myens=cens({'1980':ds1980 , '1981':ds1981 })
        >>> ncview(myens)  # will launch ncview once per member
        >>> 
        >>> myens=cens({'1980':ds1980 , '1981':ds1981 }, order=['1981','1980'])
        >>> myens.set_order(['1981','1980'])
        >>>
        >>> # Add a member
        >>> myens['abcd']=ds(period="1982")

        Limitations : Even if an ensemble is a dict, some dict methods
        are not properly implemented (popitem, fromkeys) and function
        iteritems does not use member order

        You can write an ensemble to a file using function
        :py:func:`~climaf.cache.efile`

        """
        if not all(map(lambda x : isinstance(x,str), dic.keys())):
            raise Climaf_Classes_Error("Ensemble keys/labels must be strings")
        if not all(map(lambda x : isinstance(x,cobject), dic.values())):
            raise Climaf_Classes_Error("Ensemble members must be CliMAF objects")
        self.sortfunc=sortfunc
        #
        dict.update(self,dic)
        #
        keylist=self.keys() ; keylist.sort()
        if order : self.set_order(order,keylist)
        elif sortfunc : self.order=sortfunc(keylist)
        else : self.order=keylist
        #
        self.crs=self.buildcrs()
        self.register()

    def set_order(self,order,ordered_keylist=None):
        ordered_list=[ o  for o in order ] ; ordered_list.sort()
        if ordered_keylist is None:
            ordered_keylist=self.keys() ; ordered_keylist.sort()
        if ordered_list != ordered_keylist :
            raise Climaf_Classes_Error(
                "Order list does not match dict keys list : %s   and %s"%
                (`ordered_list`,`ordered_keylist`))
        self.order=order

    def __setitem__(self,k,v):
        if ( not isinstance(k,str)) : 
            raise Climaf_Classes_Error("Ensemble keys/labels must be strings")
        if not isinstance(v,cobject) :
            raise Climaf_Classes_Error("Ensemble members must be CliMAF objects")
        dict.__setitem__(self,k,v)
        if k not in self.order :
            self.order.append(k)
            if self.sortfunc :
                self.order=self.sortfunc(self.keys())
        self.crs=self.buildcrs()
        self.register()
        
    def items(self):
        return [ (l,self[l]) for l in self.order ]

    def copy(self):
        e=cens(self,
               order=[ m for m in self.order],
               sortfunc=self.sortfunc)
        return(e)

    def pop(self,key,default=None):
        if key in self :
            self.order.remove(key)
            return dict.pop(self,key,default)
        else : return default

    def clear(self):
        dict.clear(self)
        self.order=[]
        
    def update(self,it) :
        dict.update(self,it)
        if isinstance(it,dict) :
            for el,val in it.items(): self.order.append(el)
        else:
            for el,val in it: self.order.append(el)
        if self.sortfunc : self.order=self.sortfunc(self.keys())
        
    def buildcrs(self,crsrewrite=None,period=None) :
        rep="cens({"
        for m in self.order :
            rep+="'"+m+"'"+":"+self[m].buildcrs(crsrewrite=crsrewrite,period=period)+","
        rep=rep+"}"
        rep=rep.replace(",}","}")
        rep=rep+")"
        return rep

    def check(self):
        """
        Check time consistency of first variable for each member of the ensemble :
        - check if first data time interval is consistent with dataset frequency
        - check if file data have a gap
        - check if period covered by data files actually includes the whole of dataset period
        
        Returns: True if period of data files included dataset period, False otherwise.
        
        Example:
    
        >>> # Ensemble with monthly frequency  
        >>> j0=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1980')
        >>> j1=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1981')
        >>> ens=cens({'1980':j0, '1981':j1})
        >>> res=ens.check()
        
        """
        
        # Call 'check' method of 'cdataset' for each member of the ensemble
        rep=True
        for memb in self:
            #clogger.info('Member: %s'%memb)
            rep=self[memb].check() and rep
        return rep
        


[docs]def eds(**kwargs):
    """
    Create a dataset ensemble using the same calling sequence as
    :py:func:`~climaf.classes.cdataset`, except that one of the facets
    is a list, which defines the ensemble members; this facet must be among
    the facets authorized for ensemble in the (single) project involved

    Example::

    >>> cdef("frequency","monthly") ;  cdef("project","CMIP5"); cdef("model","CNRM-CM5")
    >>> cdef("variable","tas"); cdef("period","1860")
    >>> ens=eds(experiment="historical", simulation=["r1i1p1","r2i1p1"])

    """
    attval=processDatasetArgs(**kwargs)
    # Check that any facet/attribute of type 'list' (for defining an
    # ensemble) is OK for the project, and that there is at most one
    nlist=0
    listattr=None
    for attr in attval :
        clogger.debug("Looking at attr %s for ensemble"%attr)
        if isinstance(attval[attr], list) and attr != "domain":
            if not attr in cprojects[attval["project"]].attributes_for_ensemble :
                raise Climaf_Classes_Error("Attribute %s cannot be used for ensemble"%attr)
            clogger.debug("Attr %s is used for an ensemble"%attr)
            nlist+=1
            listattr=attr
    if nlist != 1 :
        raise Climaf_Classes_Error("Must ask for an ensemble on exactly one attribute")
    #
    # Create an ensemble of datasets if applicable
    d=dict()
    for member in attval[listattr] :
        attval2=attval.copy()
        attval2[listattr]=member
        d[member]=cdataset(**attval2)
    return cens(d,order=attval[listattr])

[docs]def fds(filename, simulation=None, variable=None, period=None, model=None) :
    """
    fds stands for FileDataSet; it allows to create a dataset simply
    by providing a filename and optionally a simulation name , a
    variable name, a period and a model name.

    For dataset attributes which are not provided, these defaults apply :

    - simulation : the filename basename (without suffix '.nc')
    - variable : the set of variables in the data file
    - period : the period actually covered by the data file (if it has time_bnds)
    - model : the 'model_id' attribute if it exists, otherwise : 'no_model'
    - project  : 'file' (with separator = '|')

    The following restriction apply to such datasets :

    - functions :py:func:`~climaf.classes.calias` and 
      :py:func:`~climaf.operators.derive` cannot be used for project 
      'file'
    
    Results are unforeseen if all variables do not have the same time axis
    
    Examples : See :download:`data_file.py <../examples/data_file.py>`
    
    """
    filename=os.path.expanduser(filename)
    if not os.path.exists(filename): 
        raise Climaf_Classes_Error("File %s does no exist"%filename)
    #
    if model is None : model=model_id(filename)
    if simulation is None : simulation=os.path.basename(filename)[0:-3]
    #
    if variable is None :
        lvars=varsOfFile(filename)
        if len(lvars)==0 : 
            raise Climaf_Classes_Error("No variable in file %s"%filename)
        variable=lvars.pop()
        for v in lvars : variable+=","+v
    else :
        lvars=variable.split(',')
        for v in lvars :
            if not fileHasVar(filename,v) :
                raise Climaf_Classes_Error("No variable %s in file %s"%(v,filename))
    #
    fperiod=timeLimits(filename)
    if period is None :
        if fperiod is None :
            raise Climaf_Classes_Error("Must provide a period for file %s "\
                                           %(filename))
        else :
            period=`fperiod`
    else :
        if fperiod and not fperiod.includes(init_period(period)) :
            raise Climaf_Classes_Error("Max period from file %s is %s"\
                                           %(filename,`fperiod`))
    #
    d=ds(project='file', model=model, simulation=simulation, 
         variable=variable, period=period, path=filename)
    d.files=filename
    return d


class ctree(cobject):
    def __init__(self, climaf_operator, script, *operands, **parameters ) :
        """ Builds the tree of a composed object, including a dict for outputs.

        """
        self.operator=climaf_operator
        self.script=script
        import copy
        self.flags=copy.copy(script.flags)
        self.operands=operands
        if "period" in parameters :
            p=parameters["period"]
            if isinstance(p,cperiod) : parameters["period"]=`p`
        self.parameters=parameters
        for o in operands :
            if o and not isinstance(o,cobject) :
                raise Climaf_Classes_Error("operand "+`o`+" is not a CliMAF object")
        self.crs=self.buildcrs()
        self.outputs=dict()
        self.register()

    def buildcrs(self, crsrewrite=None, period=None) :
        """ Builds the CRS expression representing applying OPERATOR on OPERANDS with PARAMETERS.
        Forces period downtree if provided
        A function for rewriting operand's CRS may be provided
        """
        # Operators are listed in alphabetical order; parameters too
        rep=self.operator+"("
        #
        ops=[ o for o in self.operands ]
        for op in ops :
            if op :
                opcrs = op.buildcrs(crsrewrite=crsrewrite,period=period)
                if crsrewrite : opcrs=crsrewrite(opcrs)
                rep+= opcrs + ","
        #
        clefs=self.parameters.keys()
        clefs.sort()
        for par in clefs :
            if par != 'member_label' :
                rep += par+"="+`self.parameters[par]`+","
        rep += ")"
        rep=rep.replace(",)",")")
        return rep

    def setperiod(self,period):
        """ modifies the period for all datasets of a tree"""
        self.erase()
        for op in self.operands : op.setperiod(period)
        self.crs=self.buildcrs(period=period)
        self.register()

class scriptChild(cobject):
    def __init__(self, cobject,varname) :
        """
        Builds one of the child of a script call, which represents one output

        """
        self.father=cobject
        self.varname=varname
        self.crs=self.buildcrs()
        self.file=None
        self.register()

    def setperiod(self,period):
        self.erase()
        self.crs=self.father.crs.buildcrs(period=period)
        self.crs += "."+self.varname
        self.register()

    def buildcrs(self,period=None,crsrewrite=None):
        tmp= self.father.buildcrs(period=period)
        if (crsrewrite): tmp=crsrewrite(tmp)
        return tmp+"."+self.varname

def compare_trees(tree1,tree2,func,filter_on_operator=None) :
    """
    Recursively compares TREE1 and TREE2.
    
    For the nodes : compares operator and parameters; ensures 
    that FILTER_ON_OPERATOR(operator) is not true 
    
    For the leaves (datasets) : ensure that string representations of
    applying function FUNC to the pair of datasets returns the same
    value for all datasets pairs in the (parallel) trees
    
    Returns that common value : func(leave1,leave2)) or None
    
    FUNC cannot return None as a valid value
    """
    if isinstance(tree1,cdataset) and isinstance(tree2,cdataset):
        return func(tree1,tree2)
    elif isinstance(tree1,ctree) and isinstance(tree2,ctree):
        if tree1.operator == tree2.operator :
            if filter_on_operator :
                if filter_on_operator(tree1.operator): return None
            if tree1.parameters == tree2.parameters :
                return(reduce(lambda a,b : a if `a`==`b` else None, 
                   [ compare_trees(op1,op2,func,filter_on_operator) 
                     for op1,op2 in zip(tree1.operands, tree2.operands) ]))
    elif isinstance(tree1,scriptChild) and isinstance(tree2,scriptChild):
        if tree1.varname==tree2.varname :
            return compare_trees(tree1.father,tree2.father,
                                 func,filter_on_operator)

allow_errors_on_ds_call=True #False

def allow_error_on_ds(allow=True) :
    global allow_errors_on_ds_call
    allow_errors_on_ds_call=allow
    #print ('allow_errors_on_ds_call='+`allow_errors_on_ds_call`)

def select_projects(**kwargs):
    """
    If kwargs['project'] is a list (has multiple values), select_projects loops on the projects
    until it finds a file containing the aliased variable name.
    """
    if 'project' not in kwargs:
        return kwargs
    else:
        p_list = kwargs['project']
    if not isinstance(p_list,list):
        #p_list = [p_list]
        return kwargs
    for project in p_list:
        wkwargs = kwargs.copy()
        wkwargs.update(dict(project=project))
        dat = cdataset(**wkwargs)
	files = dat.baseFiles()
        if files:
            clogger.info('-- File found for project '+project+ ' and '+`wkwargs`)
            try:
                tmpVarInFile = varIsAliased(project,wkwargs['variable'])[0]
            except:
                tmpVarInFile = wkwargs['variable']
            if fileHasVar(files.split(" ")[0],tmpVarInFile):
   	        clogger.info('-- Variable '+tmpVarInFile+' (aliased to variable '+
                             wkwargs['variable']+') found in '+files.split(" ")[0])
                return wkwargs
            else:
                clogger.info('-- Variable '+tmpVarInFile+
                             ' (aliased to variable '+wkwargs['variable']+') was not found in '+files.split(" ")[0])
                #clogger.info('--> Try with another project than '+project+' or another variable name')
        else:
            clogger.info('-- No file found for project '+project+ ' and '+`wkwargs`)
    return kwargs


[docs]def ds(*args,**kwargs) :
    """
    Returns a dataset from its full Climate Reference Syntax string. Example ::

     >>> ds('CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last')

    Also a shortcut for :py:meth:`~climaf.classes.cdataset`, 
    when used with with only keywords arguments. Example ::

     >>> cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',\
              simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last')

    You must refer to doc at : :py:meth:`~climaf.classes.cdataset`
    """
    if len(args) >1 :
        raise Climaf_Classes_Error("Must provide either only a string or only keyword arguments")
    #clogger.debug("Entering , with args=%s, kwargs=%s"%(`args`,`kwargs`))
    if (len(args)==0) :
       return cdataset(**select_projects(**kwargs))
       #return cdataset(**kwargs) # Front-end to cdataset
    crs=args[0]
    results=[]
    for cproj in cprojects : 
        try : dataset = cprojects[cproj].crs2ds(crs) 
        except Climaf_Classes_Error: dataset=None
        if (dataset) : results.append(dataset)
    if len(results) > 1 :
        e="CRS expression %s is ambiguous among projects %s"%(crs,`cprojects.keys()`)
        if allow_errors_on_ds_call : clogger.info(e)
        else : raise Climaf_Classes_Error(e)
    elif len(results) == 0 :
        e="CRS expression %s is not valid for any project in %s"%(crs,`cprojects.keys()`)
        if allow_errors_on_ds_call : clogger.debug(e)
        else : raise Climaf_Classes_Error(e)
    else : 
        rep=results[0]
        if rep.project=='file' : rep.files=rep.kvp["path"]
        return rep

[docs]def cfreqs(project,dic) :
    """ 
    Allow to declare a dictionary specific to ``project`` for matching
    ``normalized`` frequency values to project-specific frequency values

    Normalized frequency values are : 
      decadal, yearly, monthly, daily, 6h, 3h, fx and annual_cycle

    When defining a dataset, any reference to a non-standard
    frequency will be left unchanged both in the datset's CRS and
    when trying to access corresponding datafiles
    
    Examples::

    >>> cfreqs('CMIP5',{'monthly':'mon' , 'daily':'day' })
    """
    #
    frequencies[project]=dic


[docs]def crealms(project,dic) :
    """
    Allow to declare a dictionary specific to ``project`` for matching
    ``normalized`` realm names to project-specific realm names

    Normalized realm names are :
      atmos, ocean, land, seaice

    When defining a dataset, any reference to a non-standard
    realm will be left unchanged both in the datset's CRS and
    when trying to access corresponding datafiles

    Examples::

    >>> crealms('CMIP5',{'atmos':'ATM' , 'ocean':'OCE' })
    """
    #
    realms[project]=dic


[docs]def calias(project,variable,fileVariable=None,scale=1.,offset=0.,units=None,missing=None,filenameVar=None) :
    """ Declare that in ``project``, ``variable`` is to be computed by
    reading ``filevariable``, and applying ``scale`` and ``offset``;

    Arg ``filenameVar`` allows to tell which fake variable name should be
    used when computing the filename for this variable in this project
    (for optimisation purpose);

    Can tell that a given constant must be interpreted as a missing value

    ``variable`` may be a list. In that case, ``fileVariable`` and
    ``filenameVar``, if provided, should be parallel lists

    `` variable`` can be a comma separated list of variables, in which
    case this tells how variables are grouped in files (it make sense
    to use filenameVar in that case, as this is a xway to provide the
    label which is unique to this grouping of variable; scale, offset
    and missing args must be the same for all variables in that case

    Example ::
    
    >>> calias('erai','tas','t2m',filenameVar='2T')
    >>> calias('erai','tas_degC','t2m',scale=1., offset=-273.15)  # scale and offset may be provided
    >>> calias('EM',[ 'sic', 'sit', 'sim', 'snd', 'ialb', 'tsice'], missing=1.e+20)
    >>> calias('data_CNRM','so,thetao',filenameVar='grid_T_table2.2')

    NB: A wrapper with same name of this function is defined in
    :py:func:`climaf.driver.calias` and it is the one which is
    exported by module climaf.api. It allows to use a list of
    variable.  
    
    """
    if not fileVariable : fileVariable = variable
    if not filenameVar  : filenameVar  = None
    if project not in cprojects : 
        raise Climaf_Classes_Error("project %s is not known"%project)
    if project not in aliases : aliases[project]=dict()
    if type(variable)     is not list : variable    = [variable]
    if type(filenameVar)  is not list : filenameVar = [filenameVar]
    if type(fileVariable) is not list : fileVariable= [fileVariable]
    if type(units)        is not list : units       = [units]
    for v,u,fv,fnv in zip(variable,units,fileVariable,filenameVar) :
        aliases[project][v]=(fv,scale,offset,u,fnv,missing)

def varIsAliased(project,variable) :
    """ 
    Return a n-uplet (fileVariable, scale, offset, filevarName,
    missing) defining how to compute a 'variable' which is not in
    files, for the 'project'
    """
    if project in aliases and variable in aliases[project] :
        return aliases[project][variable]

def cmissing(project,missing,*kwargs) :
    """ Declare that in 'project', a given constant must be interpreted
    as a missing value, for a given set of project's attributes values

    Such a declaration must follow all ``calias`` declarations for the
    same project
    """
    pass # TBD 



[docs]class cpage(cobject):
    def __init__(self, fig_lines=None, widths=None, heights=None, 
                 fig_trim=True, page_trim=True, format="png",
                 orientation=None,
                 page_width=1000., page_height=1500.,title="", x=0, y=26, ybox=50, pt=24,
                 font="Times-New-Roman", gravity="North", background="white"): 
        """
        Builds a CliMAF cpage object, which represents an array of figures (output:
        'png' or 'pdf' figure)
        
        Args:
        
          fig_lines (a list of lists of figure objects or an ensemble of figure objects):
           each sublist of 'fig_lines' represents a line of figures   
          widths (list, optional): the list of figure widths, i.e. the width of each 
           column. By default, if fig_lines is:
           
             - a list of lists: spacing is even
             - an ensemble: one column is used
          heights (list, optional): the list of figure heights, i.e. the
           height of each line. By default spacing is even
          fig_trim (logical, optional): to turn on/off triming for all figures.
           It removes all the surrounding extra space of figures in the page,
           either True (default) or False
          page_trim (logical, optional): to turn on/off triming for the page. It
           removes all the surrounding extra space of the page, either True
           (default) or False 
          format (str, optional) : graphic output format, either 'png' (default)
           or 'pdf'(not recommended)
          page_width (float, optional) : width resolution of resultant image;
           CLiMAF default: 1000. 
          page_height (float, optional) : height resolution of resultant image;
           CLiMAF default: 1500. 
          orientation (str,optional): if set, it supersedes page_width and 
           page_height with values 1000*1500 (for portrait) or 1500*1000 (for landscape)
          title (str, optional) : append a label below or above (depending optional
           argument 'gravity') figures in the page.

        If title is activated:
        
            - x, y (int, optional): annotate the page with text.
              x is the offset towards the right from the upper left corner
              of the page, while y is the offset upward or the bottom
              according to the optional argument 'gravity' (i.e. 'South' or 'North'
              respectively); CLiMAF default: x=0, y=26. For more details, see:
              http://www.imagemagick.org/script/command-line-options.php?#annotate ;
              where x and y correspond respectively to tx and ty
              in ``-annotate {+-}tx{+-}ty text``
            - ybox (int, optional): width of the assigned box for title;
              CLiMAF default: 50. For more details, see:
              http://www.imagemagick.org/script/command-line-options.php?#splice
            - pt (int, optional): font size of the title; CLiMAF default: 24
            - font (str, optional): set the font to use when creating title; CLiMAF
              default: 'Times-New-Roman'. To print a complete list of fonts, use:
              'convert -list font'
            - gravity (str, optional): the choosen direction specifies where to position
              title; CLiMAF default: 'North'. For more details, see:
              http://www.imagemagick.org/script/command-line-options.php?#gravity
            - background (str, optional): background color of the assigned box for
              title; default: 'white'. To print a complete list of color names, use:
              'convert -list color'

        Example:

         Using no default value, to create a page with 2 columns and 3 lines::

          >>> tas_ds=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
          >>> tas_avg=time_average(tas_ds)
          >>> fig=plot(tas_avg,title='title')
          >>> my_page=cpage([[None, fig],[fig, fig],[fig,fig]], widths=[0.2,0.8],
          ... heights=[0.33,0.33,0.33], fig_trim=False, page_trim=False,
          ... format='pdf', title='Page title', x=10, y=20, ybox=45,
          ... pt=20, font='Utopia', gravity='South', background='grey90',
          ... page_width=1600., page_height=2400.)
        """
        if fig_lines is None :
            raise Climaf_Classes_Error("fig_lines must be provided")
        self.fig_trim=fig_trim
        self.page_trim=page_trim
        self.format=format
        if orientation is not None :
            if orientation=='portrait' :
                page_width=1000.; page_height=1500.
            else : 
                if orientation=='landscape' :
                    page_width=1500.; page_height=1000.
                else :
                    raise Climaf_Classes_Error(
                    "if set, orientation must be 'portrait' or 'landscape'")
        self.page_width=page_width
        self.page_height=page_height
        self.title=title
        self.x=x
        self.y=y
        self.ybox=ybox
        self.pt=pt
        self.font=font
        self.gravity=gravity
        self.background=background
        if ( self.ybox < (self.y + self.pt) ) :
            raise Climaf_Classes_Error("Title exceeds the assigned box: ybox<y+pt")
        if not isinstance(fig_lines,list) and not isinstance(fig_lines,cens) :
            raise Climaf_Classes_Error(
                "fig_lines must be a CliMAF ensemble or a list "
                "of lists (each representing a line of figures)")
        if isinstance(fig_lines,list) :
            if not widths :
                widths=[]
                for line in fig_lines:
                    if len(line)!=len(fig_lines[0]):
                        raise Climaf_Classes_Error("each line in fig_lines must have same dimension")
                for column in fig_lines[0]: widths.append(round(1./len(fig_lines[0]),2))
            self.widths=widths

            if not heights :
                heights=[]
                for line in fig_lines: heights.append(round(1./len(fig_lines),2))
            self.heights=heights

            if len(fig_lines)!=len(self.heights) :
                raise Climaf_Classes_Error(
                    "fig_lines must have same size than heights")
            for line in fig_lines:
                if not isinstance(line,list) :
                    raise Climaf_Classes_Error(
                        "each element in fig_lines must be a list of figures")
                if len(line)!=len(self.widths) :
                    raise Climaf_Classes_Error(
                        "each line in fig_lines must have same dimension as "
                        "widths; pb for sublist "+`line`)
            self.fig_lines=fig_lines
        else: # case of an ensemble (cens) 
            if not widths and not heights :
                self.scatter_on_page([ fig_lines[label] for label in fig_lines.order])
            else:
                figs=[fig for fig in fig_lines.order]
                if not widths: widths=[1.]
                self.widths=widths
                if not heights :
                    heights=[]
                    for memb in figs: heights.append(round(1./len(figs),2))
                self.heights=heights
                    
                self.fig_lines=[]
                for l in heights :
                    line=[]
                    for c in widths :
                        if len(figs) > 0 : line.append(fig_lines[figs.pop(0)])
                        else : line.append(None)
                            
                    self.fig_lines.append(line)
        #
        self.crs=self.buildcrs()

    def scatter_on_page(self,figs) :
        """ Try to optimize nb of columns and lines, based on figs 
        list length
        """
        n=len(figs)
        if n == 1 or n==2 or n==3 : nx,ny=1,n
        if n == 4                 : nx,ny=2,2
        if n == 5 or n == 6   : nx,ny=2,3
        if n == 7 or n == 8   : nx,ny=2,4
        if n >= 9 and n <= 12  : nx,ny=3,4
        if n >= 13 and n <= 15 : nx,ny=3,5
        if n >=16 and n<=20     : nx,ny=4,5
        if n >=21  : raise Climaf_Classes_Error("Too many figures in page")
        lines=[]
        for i in range(len(figs)) : 
            if ( i %nx == 0) : 
                line=[] ; lines.append(line)
            line.append(figs[i])
        j=len(line)
        for i in range(j,nx) : line.append(None)
        self.fig_lines=lines
        self.widths =[ round(1./nx,2) for i in range(nx) ]
        self.heights=[ round(1./ny,2) for i in range(ny) ]

    def buildcrs(self,crsrewrite=None,period=None):
        rep="cpage(["
        for line in self.fig_lines :
            rep+="["
            for f in line :
                if f : rep+=f.buildcrs(crsrewrite=crsrewrite)+","
                else : rep+=`None`+","
            rep+=" ],"; 

        if self.title is "" :
            rep+=( "],"+`self.widths`+","+`self.heights`+", fig_trim='%s', page_trim='%s', format='"+self.format+\
                   "', page_width=%d, page_height=%d)" )\
                   %(self.fig_trim,self.page_trim,self.page_width,self.page_height)
            
        else:
            rep+=( "],"+`self.widths`+","+`self.heights`+\
                   ", fig_trim='%s', page_trim='%s', format='"+self.format+\
                   "', page_width=%d, page_height=%d, title='"+self.title+\
                   "', x=%d, y=%d, ybox=%d, pt=%d, font='"+self.font+\
                   "', gravity='"+self.gravity+"', background='"+self.background+"')" )\
                   %(self.fig_trim,self.page_trim,self.page_width,self.page_height,self.x,self.y,self.ybox,self.pt)
            
        rep=rep.replace(",]","]")
        rep=rep.replace(", ]","]")
        
        return rep


[docs]class cpage_pdf(cobject):
    def __init__(self, fig_lines=None, widths=None, heights=None,
                 orientation=None, page_width=1000., page_height=1500.,
                 scale=1., openright=False,
                 title="", x=0, y=2, titlebox=False, pt="Huge",
                 font="\\familydefault", background="white"):
        """
        Builds a CliMAF cpage_pdf object, which represents an array of figures (output:
        'pdf' figure). Figures are automatically centered in the page using 'pdfjam' tool; see
        http://www2.warwick.ac.uk/fac/sci/statistics/staff/academic-research/firth/software/pdfjam   

        Args:
          fig_lines (a list of lists of figure objects or an ensemble of figure objects):
           each sublist of 'fig_lines' represents a line of figures   
          widths (list, optional): the list of figure widths, i.e. the width of each 
           column. By default, if fig_lines is:

             - a list of lists: spacing is even
             - an ensemble: one column is used
          heights (list, optional): the list of figure heights, i.e. the
           height of each line. By default spacing is even        
          page_width (float, optional): width resolution of resultant image;
           CLiMAF default: 1000. 
          page_height (float, optional): height resolution of resultant image;
           CLiMAF default: 1500. 
          orientation (str,optional): if set, it supersedes page_width and 
           page_height with values 1000*1500 (for portrait) or 1500*1000 (for landscape)
          scale (float, optional): to scale all input pages; default:1.
          openright (logical, optional): this option puts an empty figure before the
           first figure; default: False. For more details, see: 
           http://ftp.oleane.net/pub/CTAN/macros/latex/contrib/pdfpages/pdfpages.pdf
          title (str, optional): append a label in the page.

        If title is activated, it is by default horizontally centered:
        
            - x (int, optional): title horizontal shift (in cm). 
            - y (int, optional): vertical shift from the top of the page (in cm); 
              only positive (down) values have an effect, default=2 cm 
            - titlebox (logical, optional): set it to True to frame the text in a box,
              frame color is 'black'
            - pt (str, optional): title font size; CLiMAF default: 'Huge'
              (corresponding to 24 pt). You can set or not a backslash before this
              argument. 
            - font (str, optional): font
              abbreviation among available LaTex fonts; default: '\\\\\\\\familydefault'.
            - background (str, optional): frame fill background color; among LaTex 
              'fcolorbox' colors; default: 'white'.

        Left and right margins are set to 2cm.

        Example:

         Using no default value, to create a PDF page with 2 columns and 3 lines::
         
          >>> tas_ds=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
          >>> tas_avg=time_average(tas_ds)
          >>> fig=plot(tas_avg,title='title',format='pdf')
          >>> crop_fig=cpdfcrop(fig)
          >>> my_pdfpage=cpage_pdf([[crop_fig,crop_fig],[crop_fig, crop_fig],[crop_fig,crop_fig]],
          ... widths=[0.2,0.8], heights=[0.33,0.33,0.33], page_width=800., page_height=1200., 
          ... scale=0.95, openright=True, title='Page title', x=-5, y=10, titlebox=True, 
          ... pt='huge', font='ptm', background='yellow') # Font name is 'Times'
        """
        if fig_lines is None :
            raise Climaf_Classes_Error("fig_lines must be provided")
        if orientation is not None :
            if orientation=='portrait' :
                page_width=1000.; page_height=1500.
            else : 
                if orientation=='landscape' :
                    page_width=1500.; page_height=1000.
                else :
                    raise Climaf_Classes_Error(
                    "if set, orientation must be 'portrait' or 'landscape'")
        self.page_width=page_width
        self.page_height=page_height
        self.scale=scale
        self.openright=openright
        self.title=title
        self.x=x
        self.y=y
        self.titlebox=titlebox
        self.pt=pt
        self.font=font
        self.background=background
        if not isinstance(fig_lines,list) and not isinstance(fig_lines,cens) :
            raise Climaf_Classes_Error(
                "fig_lines must be a CliMAF ensemble or a list "
                "of lists (each representing a line of figures)")
        if isinstance(fig_lines,list) :
            if not widths :
                widths=[]
                for line in fig_lines:
                    if len(line)!=len(fig_lines[0]):
                        raise Climaf_Classes_Error("each line in fig_lines must have same dimension")
                for column in fig_lines[0]: widths.append(round(1./len(fig_lines[0]),2))
            self.widths=widths

            if not heights :
                heights=[]
                for line in fig_lines: heights.append(round(1./len(fig_lines),2))
            self.heights=heights

            if len(fig_lines)!=len(self.heights) :
                raise Climaf_Classes_Error(
                    "fig_lines must have same size than heights")
            for line in fig_lines:
                if not isinstance(line,list) :
                    raise Climaf_Classes_Error(
                        "each element in fig_lines must be a list of figures")
                if len(line)!=len(self.widths) :
                    raise Climaf_Classes_Error(
                        "each line in fig_lines must have same dimension as "
                        "widths; pb for sublist "+`line`)
            self.fig_lines=fig_lines
        else: # case of an ensemble (cens) 
            figs=[fig for fig in fig_lines.order]

            if not widths: widths=[1.]
            self.widths=widths
            if not heights :
                heights=[]
                for memb in figs: heights.append(round(1./len(figs),2))
            self.heights=heights
            
            self.fig_lines=[]
            for l in heights :
                line=[]
                for c in widths :
                    if len(figs) > 0 : line.append(fig_lines[figs.pop(0)])
                    else : line.append(None)
                              
                self.fig_lines.append(line)
        #
        self.crs=self.buildcrs()
               
    def buildcrs(self,crsrewrite=None,period=None):
        rep="cpage_pdf(["
        for line in self.fig_lines :
            rep+="["
            for f in line :
                if f : rep+=f.buildcrs(crsrewrite=crsrewrite)+","
                else : rep+=`None`+","
            rep+=" ],"; 

        if self.title is "" :
            rep+=( "],"+`self.widths`+","+`self.heights`+\
                   "', page_width=%d, page_height=%d, scale=%.2f, openright='%s')" )\
                   %(self.page_width,self.page_height,self.scale,self.openright)
            
        else:
            rep+=( "],"+`self.widths`+","+`self.heights`+\
                   "', page_width=%d, page_height=%d, scale=%.2f, openright='%s', title='"\
                   +self.title+"', x=%d, y=%d, titlebox='%s', pt='"+self.pt+"', font='"\
                   +self.font+"', background='"+self.background+"')" )\
                   %(self.page_width,self.page_height,self.scale,self.openright,self.x,self.y,self.titlebox)

        rep=rep.replace(",]","]")
        rep=rep.replace(", ]","]")
        
        return rep
            

def guess_projects(crs) :
    """
    Return the list of projects involved in the datasets involved in a 
    CRS expression. 
    """
    def guess_project(crs) :
        """
        Guess which is the project name for a dataset's crs, with minimum 
        assumption on the separator used in the project
        """
        separators=[r'.',r'_',r'£',r'$',r'@',r'_',r'|',r'&',r"-",r"=",r"^",
                    r";",r":",r"!",r'§',r'/',r'.',r'ø',r'+',r'°']
        counts=dict()
        for sep in separators : counts[sep]=crs.count(sep)
        # Assume that the highest count gives the right separator
        max=0
        for key in counts : 
            if counts[key] >= max : 
                max=counts[key]
                sep=key
        return(crs[1:crs.find(sep)])
    return map(guess_project,re.findall(r"ds\(([^)]*)",crs))
    
def browse_tree(cobj,func,results):
    """ Browse a CliMAF object's tree, accumulating in 'results' the 
    values returned by 'func' on each tree node or leave (if they are 
    not None)
    """
    if isinstance(cobj,cdataset) or isinstance(cobj,cdummy) :
        res=func(cobj)
        if res : partial.append(res)
    elif isinstance(cobj,ctree) :
        res=func(cobj.operator)
        if res : partial.append(res)
        for op in cobj.operands : browse_tree(op,func,partial)
    elif isinstance(cobj,scriptChild) :
        browse_tree(cobj.father,func,partial)
    elif isinstance(cobj,cpage) :
        for line in cobj.fig_lines :
            map(lambda x : browse_tree(x,func,partial), line)
    elif cobj is None : return 
    else :
        clogger.error("Cannot yet handle object :%s", `cobj`)
        return


def domainOf(cobject) :
    """ Returns a domain for a CliMAF object : if object is a dataset, returns
    its domain, otherwise returns domain of first operand
    """
    if isinstance(cobject,cdataset) : 
	if type(cobject.domain) is list :
            rep=""
            for coord in cobject.domain[0:-1] : rep=r"%s%d,"%(rep,coord)
            rep="%s%d"%(rep,cobject.domain[-1])
            return(rep)
	else : 
	    if cobject.domain == "global" : return ""
	    else : return(cobject.domain)
    elif isinstance(cobject,ctree) :
        clogger.debug("For now, domainOf logic for scripts output is basic (1st operand) - TBD")
        return domainOf(cobject.operands[0])
    elif isinstance(cobject,scriptChild) :
        clogger.debug("For now, domainOf logic for scriptChilds is basic - TBD")
        return domainOf(cobject.father)
    elif isinstance(cobject,cens) :
        clogger.debug("for now, domainOf logic for 'cens' objet is basic (1st member)- TBD")
        return domainOf(cobject.values()[0])
    elif cobject is None : return "none"
    else : clogger.error("Unkown class for argument "+`cobject`)
             
def varOf(cobject) : return attributeOf(cobject,"variable")
def modelOf(cobject) : return attributeOf(cobject,"model")
def simulationOf(cobject) : return attributeOf(cobject,"simulation")
def projectOf(cobject) : return attributeOf(cobject,"project")
def realmOf(cobject) : return attributeOf(cobject,"realm")
def gridOf(cobject) : return attributeOf(cobject,"grid")

def attributeOf(cobject,attrib) :
    """ Returns the attribute for a CliMAF object : if object is a dataset, returns
    its attribute property, otherwise returns attribute of first operand
    """
    if isinstance(cobject,cdataset) : 
        val=getattr(cobject,attrib,None) 
        if val is not None : return val
        else : return(cobject.kvp.get(attrib))
    elif isinstance(cobject,cens) : return attributeOf(cobject.values()[0],attrib)
    elif getattr(cobject,attrib,None) : return getattr(cobject,attrib) 
    elif isinstance(cobject,ctree) :
        clogger.debug("for now, varOf logic is basic (1st operand) - TBD")
        return attributeOf(cobject.operands[0],attrib)
    elif isinstance(cobject,cdummy) :
        return "dummy"
    elif isinstance(cobject,cpage) or isinstance(cobject,cpage_pdf) : return None
    elif cobject is None : return ''
    else : raise Climaf_Classes_Error("Unknown class for argument "+`cobject`)

        
class Climaf_Classes_Error(Exception):
    def __init__(self, valeur):
        self.valeur = valeur
        clogger.error(self.__str__())
        dedent(100)
    def __str__(self):
        return `self.valeur`

class Climaf_Error(Exception):
    def __init__(self, valeur):
        self.valeur = valeur
        clogger.error(self.__str__())
        dedent(100)
    def __str__(self):
        return `self.valeur`


def test():
#    clogger.basicConfig(level=clogger.DEBUG) 
#    clogger.basicConfig(format='"%(asctime)s [%(funcName)s: %(filename)s,%(lineno)d] %(message)s : %(levelname)s', level=clogger.DEBUG)
    cdef("project","CMIP5")
    #cdef("project","PR6")
    cdef("model","CNRM-CM5")
    cdef("experiment","historical")
    cdef("simulation","r1i1p1")
    cdef("period","197901-198012")
    cdef("domain","global")
    #
    tos=cdataset(experiment="rcp85", variable="tos", period="19790101-19790102")
    tr=ctree("operator", tos, para1="val1",para2="val2")
    #tos.pr()
    #
    #ds1=Dataset(period="1850-2012")
    #genericDataSets(ds1.crs)
    #ds2=Dataset(project="CMIP3")
    #ex="toto("+ ds1.crs + "," + ds2.crs
    #print genericDataSets(ex)
    #print firstGenericDataSet(ex)

def t2() :
    p=period("1984-1984")
    

if __name__ == "__main__":
    test()
Navigation

Source code for climaf.classes

Quick search

Navigation