# -*- coding: iso-8859-1 -*-
"""
Basic types and syntax for a CLIMAF Reference Syntax interpreter and driver
This is a first protoype, where the interpreter is Python itself
"""
# Created : S.Senesi - 2014
import re, string, copy, os.path
import dataloc
from period import init_period, cperiod
from clogging import clogger, dedent
from netcdfbasics import fileHasVar, varsOfFile, timeLimits, model_id
from decimal import Decimal
#: Dictionary of declared projects (type is cproject)
cprojects=dict()
#: Dictionary of aliases dictionaries
aliases=dict()
#: Dictionary of frequency names dictionaries
frequencies=dict()
#: Dictionary of realms names dictionaries
realms = dict()
[docs]class cproject():
def __init__(self,name, *args, **kwargs) :
"""
Declare a project and its facets/attributes in CliMAF (see below)
Args:
name (string) : project name;
do not use the chosen separator in it (see below)
args (strings) : attribute names;
they are free; do not use the chosen separator in it (see below); **CliMAF
anyway will add attributes :
project, simulation, variable, period, and domain**
kwargs (dict) :
can only be used with keywords :
- ``sep`` or ``separator`` for indicating the symbol separating
facets in the dataset syntax. Defaults to ".".
- ``ensemble`` for declaring a list of attribute
names which are allowed for defining an ensemble in
this project ('simulation' is automatically allowed)
Returns : a cproject object, which string representation is
the pattern later used in CliMAF Refreence Syntax for
representing datasets in this project
A 'cproject' is the definition of a set of attributes, or
facets, which values will completely define a 'dataset' as
managed by CliMAF. Its name is one of the possible keys
for describing data locations (see
:py:class:`~climaf.dataloc.dataloc`)
For instance, cproject CMIP5, after its Data Reference Syntax,
has attributes :
model, simulation (used for rip), experiment, variable, frequency, realm, table, version
**A number of projects are built-in**. See :py:mod:`~climaf.projects`
A dataset in a cproject declared as ::
>>> cproject('MINE','myfreq','myfacet',sep='_')
will return ::
${project}_${simulation}_${variable}_${period}_${domain}_${myfreq}_${myfacet}
and will have datasets represented as e.g.::
'MINE_hist_tas_[1980-1999]_global_decadal_gabu'
while an example for built-in cproject CMIP5 will be::
'CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last'
The attributes list should include all facets which are useful
for distinguishing datasets from each other, and for computing
datafile pathnames in the 'generic' organization (see
:py:class:`~climaf.dataloc.dataloc`)
A default value for a given facet can be specified, by providing a tuple
(facet_name,default_value) instead of the facet name. This default value is
however of lower priority than the value set using :py:func:`~climaf.classes.cdef`
A project can be declared as having non-standard variable
names in datafiles, or variables that should undergo re-scaling; see
:py:func:`~climaf.classes.calias`
A project can be declared as having non-standard frequency names (this is
used when accessing datafiles); see :py:func:`~climaf.classes.cfreqs`)
"""
if name in cprojects : clogger.warning("Redefining project %s"%name)
self.project=name
#
self.facets=[]
self.facet_defaults=dict()
forced=['project','simulation', 'variable', 'period', 'domain']
for f in forced : self.facets.append(f)
for a in args :
if isinstance(a,tuple) :
facet_name,facet_default=a
self.facet_defaults[facet_name]=facet_default
else :
facet_name=a
if not facet_name in forced : self.facets.append(facet_name)
#
self.separator="."
if "separator" in kwargs : self.separator=kwargs['separator']
if "sep" in kwargs : self.separator=kwargs['sep']
if self.separator=="," :
raise Climaf_Classes_Error("Character ',' is forbidden as a project separator")
cprojects[name]=self
self.crs=""
# Build the pattern for the datasets CRS for this cproject
for f in self.facets :
self.crs += "${%s}%s"%(f,self.separator)
self.crs=self.crs[:-1]
# Create an attribute hodling the list of facets which are allowed
# for defining an ensemble, and put a first facet there
self.attributes_for_ensemble=['simulation']
if 'ensemble' in kwargs :
self.attributes_for_ensemble.extend(kwargs["ensemble"])
def __repr__(self):
return self.crs
def crs2ds(self,crs) :
"""
Try to interpret string ``crs`` as the CRS of a dataset for
the cproject. Return the dataset if OK
"""
fields=crs.split(self.separator)
if len(fields) == len(self.facets) :
if fields[0] == self.project :
kvp=dict()
for i,f in enumerate(self.facets) : kvp[f]=fields[i]
return cdataset(**kvp)
[docs]def cdef(attribute,value=None, project=None):
"""
Set or get the default value for a CliMAF dataset attribute
or facet (such as e.g. 'model', 'simulation' ...), for use by
next calls to :py:class:`~climaf.classes.cdataset()` or to
:py:func:`~climaf.classes.ds`
Argument 'project' allows to restrict the use/query of the default
value to the context of the given 'project'. On can also set the
(global) default value for attribute 'project'
There is no actual check that 'attribute' is a valid keyword for
a call to ``ds`` or ``cdataset``
Example::
>>> cdef('project','OCMPI5')
>>> cdef('frequency','monthly',project='OCMPI5')
"""
if project not in cprojects :
raise Climaf_Classes_Error("project '%s' has not yet been declared"%project)
if attribute == 'project' : project=None
#
if project and not attribute in cprojects[project].facets :
raise Climaf_Classes_Error("project '%s' doesn't use facet '%s'"%(project,attribute))
if value is None :
rep=cprojects[project].facet_defaults.get(attribute,None)
if not rep :
rep=cprojects[None].facet_defaults.get(attribute,"")
return rep
else :
cprojects[project].facet_defaults[attribute]=value
cproject(None)
cdef("domain","global")
# All Cobject instances are registered in this directory :
cobjects=dict()
class cobject():
def __init__(self):
# crs is the string expression defining the object
# in the CLIMAF Reference Syntax
self.crs="void"
def __str__(self):
#return "Climaf object : "+self.crs
return self.buildcrs()
def __repr__(self):
return self.buildcrs()
def register(self):
pass
#cobjects[self.crs]=self
#clogger.debug("Object Created ; crs = %s"%(self.crs))
def erase(self):
pass
#del(cobjects[self.crs])
#clogger.debug("Object deleted ; crs = %s"%(self.crs))
class cdummy(cobject):
def __init__(self):
"""
cdummy class represents dummy arguments in the CRS
"""
pass
def buildcrs(self,period=None,crsrewrite=None):
return('ARG')
def processDatasetArgs(**kwargs) :
"""
Perfom basic checks on kwargs for functions cdataset and eds
regarding the project where the dataset is defined
Also complement with default values as handled by the
project's definition and by cdef()
"""
if 'project' in kwargs : project=kwargs['project']
else : project= cdef("project")
if project is None :
raise Climaf_Classes_Error("Must provide a project (Can use cdef)")
elif project not in cprojects :
raise Climaf_Classes_Error(
"Dataset's project '%s' has not "
"been described by a call to cproject()"%project)
attval=dict()
attval["project"]=project
sep=cprojects[project].separator
#
# Register facets values
for facet in cprojects[project].facets :
if facet in kwargs and kwargs[facet] : val=kwargs[facet]
else: val=cdef(facet,project=project)
attval[facet]=val
if val :
if isinstance(val,list) : listval=val
else : listval=[val]
for lval in listval :
if isinstance(lval,str) and lval.find(sep) >= 0 :
raise Climaf_Classes_Error(
"You cannot use character '%s' when setting '%s=%s' because "
"it is the declared separator for project '%s'. "
"See help(cproject) for changing it, if needed"%(sep,facet,val,project))
#print "initalizing facet %s with value"%(facet,val)
if (attval['project'] == 'CMIP5'):
# Allow for a synonym for 'simulation' in CMIP5 : 'member'
if 'member' in kwargs and kwargs['member'] not in [None, '']:
attval['simulation']=kwargs['member']
clogger.info('Attribute "member" in project CMIP5 has been translated to "simulation"')
# Special processing for CMIP5 fixed fields : handling redundancy in facets
if ( attval['table']=='fx' or attval['period']=='fx' or
attval['simulation']=='r0i0p0' or attval['frequency']=='fx') :
attval['table']='fx' ; attval['period']='fx'
attval['simulation']='r0i0p0' ; attval['frequency']='fx'
# Special processing for CMIP6 : facet 'simulation' is forbidden (must use 'realization')
if (attval['project'] == 'CMIP6') and 'simulation' in kwargs and kwargs['simulation'] is not '':
raise Climaf_Classes_Error("You cannot use attribute 'simulation' in CMIP6; please use 'realization'. This if for kwargs=%s"%`kwargs`)
errmsg=""
for facet in cprojects[project].facets :
if attval[facet] is None :
e="Project '%s' needs facet '%s'. You may use cdef() for setting a default value"\
%(project,facet)
errmsg+=" "+e
if errmsg != "" : raise Climaf_Classes_Error(errmsg)
#
#print "kw="+`kwargs`
for facet in attval :
#print "checking facet %s"%facet
# Facet specific processing
if facet=='period' :
if not isinstance(attval['period'],cperiod) and attval['period'] != "*" :
try :
attval['period']=init_period(attval['period'])
except :
raise Climaf_Classes_Error("Cannot interpret period for %s"%`attval['period']`)
# Check for typing or user's logic errors
if not facet in cprojects[project].facets :
e="Project %s doesn't have facet %s"%(project,facet)
errmsg+=" "+e
if errmsg != "" : raise Climaf_Classes_Error(errmsg)
if 'period' in attval and not isinstance(attval['period'],cperiod) and attval['period'] != "*" :
Climaf_Classes_Error("at end of process.. : period is not a cperiod")
return attval
[docs]class cdataset(cobject):
#def __init__(self,project=None,model=None,simulation=None,period=None,
# rip=None,frequency=None,domain=None,variable=None,version='last') :
def __init__(self,**kwargs) :
"""
Create a CLIMAF dataset.
A CLIMAF dataset is a description of what the data (rather than
the data itself or a file). It is basically a set of pairs
attribute-value. The list of attributes actually used to
describe a dataset is defined by the project it refers
to.
To display the attributes you may use for a given project, type e.g.:
>>> cprojects["CMIP5"]
For further details on projects , see
:py:class:`~climaf.classes.cproject`
None of the project's attributes are mandatory arguments, because
all attributes defaults to the value set by
:py:func:`~climaf.classes.cdef` (which also applies if
providing a None value for an attribute)
Some attributes have a special format or processing :
- period : see :py:func:`~climaf.period.init_period`
- domain : allowed values are either 'global' or a list for
latlon corners ordered as in : [ latmin, latmax, lonmin,
lonmax ]
- variable : name of the geophysical variable ; this should be :
- either a variable actually included in the datafiles,
- or a 'derived' variable (see :py:func:`~climaf.operators.derive` ),
- or, an aliased variable name (see :py:func:`~climaf.classes.alias` )
- in project CMIP5 , for triplets (frequency, simulation, period, table ) :
if any is 'fx' (or 'r0i0p0 for simulation), the others are forced to
'fx' (resp. 'r0i0p0') too.
Example, using no default value, and adressing some CMIP5 data ::
>>> cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',
>>> simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last')
You may use wildcard ('*') in attribute values, and use :py:meth:`~climaf.classes.cdataset.explore`
for having CliMAF doing something sensible matching such attributes with available data
"""
#
attval=processDatasetArgs(**kwargs)
#
# TBD : Next lines for backward compatibility, but should re-engineer
self.project = attval["project"]
self.simulation= attval['simulation']
self.variable = attval['variable']
# alias is a n-plet : filevar, scale, offset, filenameVar, missing
self.period = attval['period']
self.domain = attval['domain']
#
self.model = attval.get('model',"*")
self.frequency = attval.get('frequency',"*")
# Normalized name is annual_cycle, but allow also for 'seasonal' for the time being
if (self.frequency=='seasonal' or self.frequency=='annual_cycle') :
self.period.fx=True
freqs_dic=frequencies.get(self.project,None)
#print freqs_dic
if freqs_dic :
for k in freqs_dic :
if freqs_dic[k]==self.frequency and k=='annual_cycle' :
self.period.fx=True
#
self.kvp=attval
self.alias=varIsAliased(self.project,self.variable)
#
if ("," in self.variable and self.alias) :
filevar,scale,offset,units,filenameVar,missing=self.alias
if (filevar != self.variable or scale != 1. or offset != 0 or missing ) :
raise Climaf_Classes_Error("Cannot alias/scale/setmiss on group variable")
# Build CliMAF Ref Syntax for the dataset
self.crs=self.buildcrs()
#
self.files=None
self.local_copies_of_remote_files=None
self.register()
def setperiod(self,period) :
self.erase()
self.period=period
self.kvp['period']=period
self.crs=self.buildcrs()
self.register()
def buildcrs(self,period=None,crsrewrite=None):
crs_template=string.Template(cprojects[self.project].crs)
dic=self.kvp.copy()
if period is not None : dic['period']=period
if type(dic['domain']) is list : dic['domain']=`dic['domain']`
rep="ds('%s')"%crs_template.safe_substitute(dic)
return rep
def isLocal(self) :
#return self.baseFiles().find(":")<0
model=getattr(self,"model","*")
return(dataloc.isLocal(project=self.project, model=model, \
simulation=self.simulation, frequency=self.frequency))
def isCached(self) :
""" TBD : analyze if a remote dataset is locally cached
"""
#clogger.error("TBD - remote datasets are not yet cached")
rep=False
return rep
def oneVarPerFile(self):
locs=dataloc.getlocs(project=self.project, model=self.model, simulation=self.simulation, \
frequency=self.frequency)
return(all([org for org,freq,url in locs]))
def periodIsFine(self):
clogger.debug("always returns False, yet - TBD")
return(False)
def domainIsFine(self):
clogger.debug("a bit too simple yet (domain=='global')- TBD")
return(self.domain == 'global')
def periodHasOneFile(self) :
return(len(self.baseFiles().split(" ")) < 2)
#clogger.debug("always returns False, yet - TBD")
#return(False)
def hasOneMember(self) :
clogger.debug("always returns True, yet - TBD")
return(True)
def hasExactVariable(self):
# Assume that group variable do not need aliasing
if ("," in self.variable) : return True
clogger.debug("always returns False, yet - TBD")
return(False)
def missingIsOK(self):
if (alias is None) : return True
_,_,_,_,_,missing=self.alias
return missing is None
[docs] def explore(self,option='check_and_store',sort_periods_on=None):
"""
Versatile datafile exploration for a dataset which possibly has wildcards (* and ? ) in
attributes.
``option`` can be :
- 'choices' for returning a dict which keys are wildcard attributes and entries
are values list
- 'resolve' for returning a NEW DATASET with instanciated attributes (if uniquely)
- 'ensemble' for returning AN ENSEMBLE based on multiple possible values of a
single attribute
- 'check_and_store' (or missing) for just identifying and storing dataset files list
(while ensuring non-ambiguity check for wildcard attributes)
This feature works only for projects which organization is of type 'generic'
Attribute 'period' cannot use a * without being == * ; in that case, the period of all
matching files will be either :
- aggregated among all instances of all attributes with wildcards (default)
- or aggregated after being sorted on attribute ``sort_periods_on``, if provided
Toy example ::
>>> rst=ds(project="example", simulation="*", variable="rst", period="1980-1981")
>>> rst
ds('example|*|rst|1980-1981|global|monthly')
>>> rst.explore('choices')
{'simulation': ['AMIPV6ALB2G']}
>>> instanciated_dataset=rst.explore('resolve')
>>> instanciated_dataset
ds('example|AMIPV6ALB2G|rst|1980-1981|global|monthly')
>>> my_ensemble=rst.explore('ensemble')
error : "Creating an ensemble does not make sense because all wildcard attributes have a single possible value ({'simulation': ['AMIPV6ALB2G']})"
Real life example for options ``choices`` and ``ensemble`` ::
>>> rst=ds(project="CMIP6", model='*', experiment="*ontrol*", realization="r1i1p1f*", table="Amon", variable="rsut", period="1980-1981")
>>> clog('info')
>>> rst.explore('choices')
info : Attribute institute has matching value CNRM-CERFACS
info : Attribute experiment has multiple values : set(['piClim-control', 'piControl'])
info : Attribute grid has matching value gr
info : Attribute realization has matching value r1i1p1f2
info : Attribute mip has multiple values : set(['CMIP', 'RFMIP'])
info : Attribute model has multiple values : set(['CNRM-ESM2-1', 'CNRM-CM6-1'])
{'institute': ['CNRM-CERFACS'], 'experiment': ['piClim-control', 'piControl'], 'grid': ['gr'],
'realization': ['r1i1p1f2'], 'mip': ['CMIP', 'RFMIP'], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1']}
# Let us further select by setting experiment=piCOntrol
>>> mrst=ds(project="CMIP6", model='*', experiment="piControl", realization="r1i1p1f*", table="Amon", variable="rsut", period="1980-1981")
>>> mrst.explore('choices')
{'institute': ['CNRM-CERFACS'], 'mip': ['CMIP'], 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1'], 'grid': ['gr'], 'realization': ['r1i1p1f2']}
>>> small_ensemble=mrst.explore('ensemble')
>>> small_ensemble
cens({
'CNRM-ESM2-1':ds('CMIP6%%rsut%1980-1981%global%/cnrm/cmip%CNRM-ESM2-1%CNRM-CERFACS%CMIP%Amon%piControl%r1i1p1f2%gr%latest'),
'CNRM-CM6-1' :ds('CMIP6%%rsut%1980-1981%global%/cnrm/cmip%CNRM-CM6-1%CNRM-CERFACS%CMIP%Amon%piControl%r1i1p1f2%gr%latest')
})
Identify period covered by data, and versions ::
>>> d=ds(project="CMIP6",experiment="piControl", realization='r1i1p1f2', variable="so",
... table="*", period="*" , model="*",version="*")
>>> clog('info')
>>> d.explore('choices')
info : Attribute institute='*' has matching value 'CNRM-CERFACS'
info : Attribute perios='*' has matching value [1850-2349]
info : Attribute version='*' has multiple values : ['v0', 'v20180720', 'latest']
info : Attribute grid='g*' has matching value 'gn'
info : Attribute mip='*' has matching value 'CMIP'
info : Attribute table='*' has matching value 'Omon'
info : Attribute model='*' has multiple values : ['CNRM-ESM2-1', 'CNRM-CM6-1']
{'institute': 'CNRM-CERFACS', 'period': [1850-2349], 'version': ['v0', 'v20180720', 'latest'], 'grid': 'gn', 'table': 'Omon', 'mip': 'CMIP', 'model': ['CNRM-ESM2-1', 'CNRM-CM6-1']}
Analyze available periods for each value of a given attribute ::
>>> rsut=ds(project="CMIP6", model='*', experiment="piControl*", realization="r1i1p1f*", table="Amon", variable="rsut", period="*")
>>> rsut.explore('choices','model')
{'institute': 'CNRM-CERFACS', 'period': {'CNRM-ESM2-1': [1850-2349], 'CNRM-CM6-1': [1850-2349]},
'experiment': 'piControl', 'grid': 'gr', 'realization': 'r1i1p1f2', 'mip': 'CMIP',
'model': ['CNRM-ESM2-1', 'CNRM-CM6-1']}
# Could also be written : rsut.explore(option='choices',sort_periods_on='model')
"""
dic=self.kvp.copy()
if self.alias :
filevar,_,_,_,filenameVar,_=self.alias
dic["variable"]=string.Template(filevar).safe_substitute(dic)
if filenameVar : dic["filenameVar"]=filenameVar
clogger.debug("Looking with dic=%s"%`dic`)
wildcards=None
if option != 'check_and_store' : wildcards=dict()
files=dataloc.selectFiles(return_wildcards=wildcards,sort_periods_on=sort_periods_on,**dic)
#
wildcard_attributes_list=[ k for k in dic if type(dic[k]) is str and "*" in dic[k]]
if option == 'resolve' :
clogger.debug("Trying to resolve on attributes %s"%wildcard_attributes_list)
for kw in wildcards :
dic[kw]=wildcards[kw][0]
if len(wildcards[kw]) > 1 :
raise Climaf_Classes_Error("Wildcard attribute %s is ambiguous %s"%(kw,wildcards[kw]))
return ds(**dic)
elif option == 'choices' :
clogger.debug("Listing possible values for %s"%wildcard_attributes_list)
return wildcards
elif option == 'ensemble' :
clogger.debug("Trying to create an ensemble on attributes %s"%wildcard_attributes_list)
ensemble_kw=None
for kw in wildcards :
if len(wildcards[kw]) > 1 :
if ensemble_kw is not None :
raise \
Climaf_Classes_Error("Cannot create an ensemble, because there are at least"+\
" two possible attributes for defining it : %s and %s"%\
(ensemble_kw,kw))
else: ensemble_kw=kw
dic[kw]=wildcards[kw]
else:
dic[kw]=wildcards[kw][0]
if ensemble_kw is None :
raise Climaf_Classes_Error("Creating an ensemble does not make sense because all wildcard "+\
"attributes have a single possible value (%s)"%wildcards)
return eds(**dic)
elif option == 'check_and_store' :
self.files=files
else:
raise Climaf_Classes_Error("Unknown option %s"%(option))
def baseFiles(self,force=False):
""" Returns the list of (local or remote) files which include the data
for the dataset
Use cached value (i.e. attribute 'files') unless called with arg force=True
"""
if (force and self.project != 'file') or self.files is None :
self.explore()
return self.files
[docs] def listfiles(self,force=False):
""" Returns the list of (local or remote) files which include the data
for the dataset
Use cached value unless called with arg force=True
"""
return self.baseFiles(force=force)
def hasRawVariable(self) :
""" Test local data files to tell if a dataset variable is actually included
in files (rather than being a derived, virtual variable)
For the time being, returns False, which leads to always consider that variables
declared as 'derived' actually are derived """
clogger.debug("TBD: actually test variables in files, rather than assuming that variable %s is virtual for dataset %s"\
%(self.variable,self.crs))
return(False)
[docs] def check(self):
"""
Check time consistency of first variable of a dataset or ensemble members:
- check if first data time interval is consistent with dataset frequency
- check if file data have a gap
- check if period covered by data files actually includes the whole of dataset period
Returns: True if period of data files included dataset period, False otherwise.
Examples:
>>> # Dataset with monthly frequency
>>> tas=ds(project='example', simulation='AMIPV6ALB2G', variable='tas',period='1980-1981')
>>> res1=tas.check()
>>>
>>> # Ensemble with monthly frequency
>>> j0=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1980')
>>> j1=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1981')
>>> ens=cens({'1980':j0, '1981':j1})
>>> res2=ens.check()
>>> # Define a new project for 'em' data with 3 hours frequency in particular
>>> cproject('em_3h','root','group','realm','frequency',separator='|')
>>> path='/cnrm/cmip/cnrm/simulations/${group}/${realm}/Regu/${frequency}/${simulation}/${variable}_??_YYYY.nc'
>>> dataloc(project='em_3h', organization='generic', url=path)
>>> # Dataset with 3h frequency for 'tas' variable (instant)
>>> tas_3h=ds(project='em_3h',variable='tas',group='AR4',realm='Atmos',frequency='3Hourly', simulation='A1B',period='2050-2100')
>>> res3=tas_3h.check()
>>> # Dataset with 3h frequency for 'pr' variable (time mean)
>>> pr_3h=ds(project='em_3h',variable='pr',group='AR4',realm='Atmos',frequency='3Hourly', simulation='A1B',period='2050-2100')
>>> res4=pr_3h.check()
"""
from anynetcdf import ncf
from datetime import datetime, timedelta
from netCDF4 import num2date
import numpy as np
# Returns the list of files which include the data for the dataset
# or for each member of the ensemble
if isinstance(self,cdataset):
if self.isLocal() or self.isCached() :
files=self.baseFiles()
else:
files=self.local_copies_of_remote_files
if not files:
clogger.error('No file found for: %s'%self)
if not ( self.isLocal() or self.isCached() ):
clogger.warning('For remote data, you have to do at first "cfile(%s)"'%self)
return(False)
else :
clogger.error("Cannot handle %s" %self)
return
#
if files:
filedate=[]
clogger.debug("List of selected files: %s"%files)
var=str.split(varOf(self),',')[0]
# Concatenate all data files
for filename in str.split(files,' '):
fileobj=ncf(filename)
#
if self.project in aliases and var in aliases[self.project]:
var=aliases[self.project][var][0]
#
dimname=''
for dim in fileobj.variables[var].dimensions:
if 'time' in dim: dimname=dim
if not dimname: clogger.error('No time dimension for variable %s'%var)
time_obj=fileobj.variables[dimname]
filedate=np.concatenate((filedate,num2date(time_obj.getValue(),\
units=time_obj.units,calendar=time_obj.calendar)))
clogger.debug('Time data of selected files: %s'%filedate)
# Check if first data time interval is consistent with dataset frequency
if len(filedate) > 1 :
filedate_delta=(filedate[1]-filedate[0]).total_seconds()
else:
clogger.error('Time dimension is degenerated.')
return
if ( (self.frequency == 'monthly' or not self.frequency) and \
(filedate_delta > 31.*24.*3600 or filedate_delta <= 29.*24.*3600.) ) \
or ( self.frequency == 'yearly' and \
(filedate_delta > 366.*24.*3600. or filedate_delta < 365.*24.*3600.) ) \
or ( self.frequency == 'decadal' and \
(filedate_delta > 3653.*24.*3600. or filedate_delta < 3651.*24.*3600.) ):
clogger.warning('First data time interval (= %.1f days) is not consistent with dataset frequency (i.e. %s)'\
%(filedate_delta/(24.*3600.),self.frequency))
elif self.frequency == 'daily' and filedate_delta != 86400.:
clogger.warning('First data time interval (= %.2f hours) is not consistent with dataset frequency (i.e. %s)'\
%(filedate_delta/3600.,self.frequency))
elif (self.frequency == '6h'or self.frequency == '3h' or self.frequency == '1h' \
or self.frequency == '3Hourly' or self.frequency == '6Hourly') \
and filedate_delta != float(self.frequency[0])*3600.:
clogger.warning('First data time interval (= %.2f hours) is different to dataset frequency (i.e. %.2f)'\
%(filedate_delta/3600.,float(self.frequency[0])))
# Check if file data have a gap
i=0
cpt=0
while i < len(filedate)-2:
i+=1
if (filedate[i+1]-filedate[i]).total_seconds() != filedate_delta:
cpt+=1
if cpt < 5:
if self.frequency == 'monthly' or not self.frequency or \
self.frequency == 'yearly' or self.frequency == 'decadal':
clogger.error('File data have a gap between indexes %i and %i: delta = %.0f days '\
%(i,i+1,(filedate[i+1]-filedate[i]).total_seconds()/(24.*3600.)) +\
'instead of %.0f days (<=> 1st data interval)'\
%(filedate_delta/(24.*3600.)))
elif self.frequency == 'daily' or self.frequency == '6h'or \
self.frequency == '3h' or self.frequency == '1h' or \
self.frequency == '3Hourly' or self.frequency == '6Hourly':
clogger.error('File data have a gap between indexes %i and %i: '%(i,i+1) +\
'delta = %.0f hours instead of %.0f hours (<=> 1st data interval)' \
%((filedate[i+1]-filedate[i]).total_seconds()/3600.,filedate_delta/3600.))
#
# Compute period covered by data files
if self.frequency == 'monthly' or not self.frequency:
filedate[0]=filedate[0].replace(day=01)
if filedate[-1].month > 11 :
filedate[-1]=filedate[-1].replace(year=filedate[-1].year+1)
filedate[-1]=filedate[-1].replace(month=01)
filedate[-1]=filedate[-1].replace(day=01)
else:
filedate[-1]=filedate[-1].replace(month=filedate[-1].month+1)
filedate[-1]=filedate[-1].replace(day=01)
elif self.frequency == 'daily':
filedate[0]=filedate[0].replace(hour=00)
filedate[-1]=filedate[-1].replace(hour=00)
filedate[-1]=filedate[-1] + timedelta(days=1)
elif self.frequency == '6h'or self.frequency == '3h' or self.frequency == '1h' \
or self.frequency == '3Hourly' or self.frequency == '6Hourly':
if 'cell_methods' in fileobj.variables[var].__dict__ : # time mean
regex=re.compile('.*time *: *mean *\(? *interval *: *([0-9]+.?[0-9]+?) ([a-zA-Z]+) *\)')
cell_meth_att = regex.search(fileobj.variables[var].cell_methods)
if cell_meth_att:
if cell_meth_att.group(2) == 'hours': freq=float(cell_meth_att.group(1))
elif cell_meth_att.group(2) == 'minutes': freq=float(cell_meth_att.group(1))/60.
else: # 'cell_methods' attribute defined with the value 'time: mean'
freq=filedate_delta/3600.
filedate[0] = filedate[0] - timedelta( minutes=(freq/2.)*60 + \
((filedate[0].hour*60 + filedate[0].minute)-(freq/2.)*60)%(freq*60) )
filedate[-1] = filedate[-1] - timedelta( minutes=(freq/2.)*60 + \
((filedate[-1].hour*60 + filedate[-1].minute)-(freq/2.)*60)%(freq*60) - freq*60 )
else: # assume it is instant data
freq=filedate_delta/3600.
filedate[-1] = filedate[-1] - timedelta( minutes=(freq/2.)*60 + \
((filedate[-1].hour*60 + filedate[-1].minute)-(freq/2.)*60)%(freq*60) - 2*freq*60 )
elif self.frequency == 'yearly' or self.frequency == 'decadal':
filedate[0]=filedate[0].replace(month=01)
filedate[0]=filedate[0].replace(day=01)
filedate[-1]=filedate[-1].replace(month=01)
filedate[-1]=filedate[-1].replace(day=01)
filedate[-1]=filedate[-1] + timedelta(years=1)
elif self.frequency == 'fx' or self.frequency == 'annual_cycle':
clogger.error('Check time consistency with a frequency equal to %s has no sense' %self.frequency)
else:
clogger.error('Dataset frequency is non-standard: frequency = %s. ' %self.frequency +\
'Normalized frequency values are: decadal, yearly, monthly, '+\
'daily, 6h, 3h, fx and annual_cycle')
#
# Check period of datafiles vs dataset period
clogger.debug('Period covered by selected files: %s'%filedate)
file_period=cperiod(start=filedate[0],end=filedate[-1])
#
if file_period.includes(self.period):
clogger.info("Time data in datafiles (i.e. %s) includes time data of " %file_period +\
"dataset (i.e. %s) => dataset are consistent." %self.period)
return(True)
else:
clogger.info("Time data in datafiles (i.e. %s) don't include time data of " %file_period +\
"dataset (i.e. %s) => dataset are not consistent." %self.period)
return(False)
[docs]class cens(cobject,dict):
def __init__(self, dic={}, order=None, sortfunc=None ) :
"""Function cens creates a CliMAF object of class ``cens`` ,
i.e. a dict of objects, which keys are member labels, and
which members are ordered, using method ``set_order``
In some cases, ensembles of datasets from the same project
can also be built easily using :py:func:`~climaf.classes.eds()`
When applying an operator to an ensemble, CliMAF will know,
from operator's declaration (see
:py:func:`~climaf.operators.cscript()`), whether the operator
'wishes' to get the ensemble or, on the reverse, is not
'ensemble-capable' :
- if the operator is ensemble-capable it will deliver it :
- if it is a script : with a string composed by
concatenating the corresponding input files; it will
also provide the labels list to the script if its
declaration calls for it with keyword ${labels}
(see :py:func:`~climaf.operators.cscript()`)
- if it is a Python function : with the dict of
corresponding objects
- if the operator is 'ensemble-dumb', CliMAF will loop
applying it on each member, and will form a new ensemble
with the results.
The dict keys must be label strings, which describe what is
basically different among members. They are usually used by
plot scripts to provide a caption allowing to identify each
dataset/object e.g using various colors.
Examples (see also :download:`../examples/ensemble.py`) :
>>> cdef('project','example'); cdef('simulation',"AMIPV6ALB2G");
>>> cdef('variable','tas');cdef('frequency','monthly')
>>> #
>>> ds1980=ds(period="1980")
>>> ds1981=ds(period="1981")
>>> #
>>> myens=cens({'1980':ds1980 , '1981':ds1981 })
>>> ncview(myens) # will launch ncview once per member
>>>
>>> myens=cens({'1980':ds1980 , '1981':ds1981 }, order=['1981','1980'])
>>> myens.set_order(['1981','1980'])
>>>
>>> # Add a member
>>> myens['abcd']=ds(period="1982")
Limitations : Even if an ensemble is a dict, some dict methods
are not properly implemented (popitem, fromkeys) and function
iteritems does not use member order
You can write an ensemble to a file using function
:py:func:`~climaf.cache.efile`
"""
if not all(map(lambda x : isinstance(x,str), dic.keys())):
raise Climaf_Classes_Error("Ensemble keys/labels must be strings")
if not all(map(lambda x : isinstance(x,cobject), dic.values())):
raise Climaf_Classes_Error("Ensemble members must be CliMAF objects")
self.sortfunc=sortfunc
#
dict.update(self,dic)
#
keylist=self.keys() ; keylist.sort()
if order : self.set_order(order,keylist)
elif sortfunc : self.order=sortfunc(keylist)
else : self.order=keylist
#
self.crs=self.buildcrs()
self.register()
def set_order(self,order,ordered_keylist=None):
ordered_list=[ o for o in order ] ; ordered_list.sort()
if ordered_keylist is None:
ordered_keylist=self.keys() ; ordered_keylist.sort()
if ordered_list != ordered_keylist :
raise Climaf_Classes_Error(
"Order list does not match dict keys list : %s and %s"%
(`ordered_list`,`ordered_keylist`))
self.order=order
def __setitem__(self,k,v):
if ( not isinstance(k,str)) :
raise Climaf_Classes_Error("Ensemble keys/labels must be strings")
if not isinstance(v,cobject) :
raise Climaf_Classes_Error("Ensemble members must be CliMAF objects")
dict.__setitem__(self,k,v)
if k not in self.order :
self.order.append(k)
if self.sortfunc :
self.order=self.sortfunc(self.keys())
self.crs=self.buildcrs()
self.register()
def items(self):
return [ (l,self[l]) for l in self.order ]
def copy(self):
e=cens(self,
order=[ m for m in self.order],
sortfunc=self.sortfunc)
return(e)
def pop(self,key,default=None):
if key in self :
self.order.remove(key)
return dict.pop(self,key,default)
else : return default
def clear(self):
dict.clear(self)
self.order=[]
def update(self,it) :
dict.update(self,it)
if isinstance(it,dict) :
for el,val in it.items(): self.order.append(el)
else:
for el,val in it: self.order.append(el)
if self.sortfunc : self.order=self.sortfunc(self.keys())
def buildcrs(self,crsrewrite=None,period=None) :
rep="cens({"
for m in self.order :
rep+="'"+m+"'"+":"+self[m].buildcrs(crsrewrite=crsrewrite,period=period)+","
rep=rep+"}"
rep=rep.replace(",}","}")
rep=rep+")"
return rep
def check(self):
"""
Check time consistency of first variable for each member of the ensemble :
- check if first data time interval is consistent with dataset frequency
- check if file data have a gap
- check if period covered by data files actually includes the whole of dataset period
Returns: True if period of data files included dataset period, False otherwise.
Example:
>>> # Ensemble with monthly frequency
>>> j0=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1980')
>>> j1=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', frequency='monthly', period='1981')
>>> ens=cens({'1980':j0, '1981':j1})
>>> res=ens.check()
"""
# Call 'check' method of 'cdataset' for each member of the ensemble
rep=True
for memb in self:
#clogger.info('Member: %s'%memb)
rep=self[memb].check() and rep
return rep
[docs]def eds(**kwargs):
"""
Create a dataset ensemble using the same calling sequence as
:py:func:`~climaf.classes.cdataset`, except that one of the facets
is a list, which defines the ensemble members; this facet must be among
the facets authorized for ensemble in the (single) project involved
Example::
>>> cdef("frequency","monthly") ; cdef("project","CMIP5"); cdef("model","CNRM-CM5")
>>> cdef("variable","tas"); cdef("period","1860")
>>> ens=eds(experiment="historical", simulation=["r1i1p1","r2i1p1"])
"""
attval=processDatasetArgs(**kwargs)
# Check that any facet/attribute of type 'list' (for defining an
# ensemble) is OK for the project, and that there is at most one
nlist=0
listattr=None
for attr in attval :
clogger.debug("Looking at attr %s for ensemble"%attr)
if isinstance(attval[attr], list) and attr != "domain":
if not attr in cprojects[attval["project"]].attributes_for_ensemble :
raise Climaf_Classes_Error("Attribute %s cannot be used for ensemble"%attr)
clogger.debug("Attr %s is used for an ensemble"%attr)
nlist+=1
listattr=attr
if nlist != 1 :
raise Climaf_Classes_Error("Must ask for an ensemble on exactly one attribute")
#
# Create an ensemble of datasets if applicable
d=dict()
for member in attval[listattr] :
attval2=attval.copy()
attval2[listattr]=member
d[member]=cdataset(**attval2)
return cens(d,order=attval[listattr])
[docs]def fds(filename, simulation=None, variable=None, period=None, model=None) :
"""
fds stands for FileDataSet; it allows to create a dataset simply
by providing a filename and optionally a simulation name , a
variable name, a period and a model name.
For dataset attributes which are not provided, these defaults apply :
- simulation : the filename basename (without suffix '.nc')
- variable : the set of variables in the data file
- period : the period actually covered by the data file (if it has time_bnds)
- model : the 'model_id' attribute if it exists, otherwise : 'no_model'
- project : 'file' (with separator = '|')
The following restriction apply to such datasets :
- functions :py:func:`~climaf.classes.calias` and
:py:func:`~climaf.operators.derive` cannot be used for project
'file'
Results are unforeseen if all variables do not have the same time axis
Examples : See :download:`data_file.py <../examples/data_file.py>`
"""
filename=os.path.expanduser(filename)
if not os.path.exists(filename):
raise Climaf_Classes_Error("File %s does no exist"%filename)
#
if model is None : model=model_id(filename)
if simulation is None : simulation=os.path.basename(filename)[0:-3]
#
if variable is None :
lvars=varsOfFile(filename)
if len(lvars)==0 :
raise Climaf_Classes_Error("No variable in file %s"%filename)
variable=lvars.pop()
for v in lvars : variable+=","+v
else :
lvars=variable.split(',')
for v in lvars :
if not fileHasVar(filename,v) :
raise Climaf_Classes_Error("No variable %s in file %s"%(v,filename))
#
fperiod=timeLimits(filename)
if period is None :
if fperiod is None :
raise Climaf_Classes_Error("Must provide a period for file %s "\
%(filename))
else :
period=`fperiod`
else :
if fperiod and not fperiod.includes(init_period(period)) :
raise Climaf_Classes_Error("Max period from file %s is %s"\
%(filename,`fperiod`))
#
d=ds(project='file', model=model, simulation=simulation,
variable=variable, period=period, path=filename)
d.files=filename
return d
class ctree(cobject):
def __init__(self, climaf_operator, script, *operands, **parameters ) :
""" Builds the tree of a composed object, including a dict for outputs.
"""
self.operator=climaf_operator
self.script=script
import copy
self.flags=copy.copy(script.flags)
self.operands=operands
if "period" in parameters :
p=parameters["period"]
if isinstance(p,cperiod) : parameters["period"]=`p`
self.parameters=parameters
for o in operands :
if o and not isinstance(o,cobject) :
raise Climaf_Classes_Error("operand "+`o`+" is not a CliMAF object")
self.crs=self.buildcrs()
self.outputs=dict()
self.register()
def buildcrs(self, crsrewrite=None, period=None) :
""" Builds the CRS expression representing applying OPERATOR on OPERANDS with PARAMETERS.
Forces period downtree if provided
A function for rewriting operand's CRS may be provided
"""
# Operators are listed in alphabetical order; parameters too
rep=self.operator+"("
#
ops=[ o for o in self.operands ]
for op in ops :
if op :
opcrs = op.buildcrs(crsrewrite=crsrewrite,period=period)
if crsrewrite : opcrs=crsrewrite(opcrs)
rep+= opcrs + ","
#
clefs=self.parameters.keys()
clefs.sort()
for par in clefs :
if par != 'member_label' :
rep += par+"="+`self.parameters[par]`+","
rep += ")"
rep=rep.replace(",)",")")
return rep
def setperiod(self,period):
""" modifies the period for all datasets of a tree"""
self.erase()
for op in self.operands : op.setperiod(period)
self.crs=self.buildcrs(period=period)
self.register()
class scriptChild(cobject):
def __init__(self, cobject,varname) :
"""
Builds one of the child of a script call, which represents one output
"""
self.father=cobject
self.varname=varname
self.crs=self.buildcrs()
self.file=None
self.register()
def setperiod(self,period):
self.erase()
self.crs=self.father.crs.buildcrs(period=period)
self.crs += "."+self.varname
self.register()
def buildcrs(self,period=None,crsrewrite=None):
tmp= self.father.buildcrs(period=period)
if (crsrewrite): tmp=crsrewrite(tmp)
return tmp+"."+self.varname
def compare_trees(tree1,tree2,func,filter_on_operator=None) :
"""
Recursively compares TREE1 and TREE2.
For the nodes : compares operator and parameters; ensures
that FILTER_ON_OPERATOR(operator) is not true
For the leaves (datasets) : ensure that string representations of
applying function FUNC to the pair of datasets returns the same
value for all datasets pairs in the (parallel) trees
Returns that common value : func(leave1,leave2)) or None
FUNC cannot return None as a valid value
"""
if isinstance(tree1,cdataset) and isinstance(tree2,cdataset):
return func(tree1,tree2)
elif isinstance(tree1,ctree) and isinstance(tree2,ctree):
if tree1.operator == tree2.operator :
if filter_on_operator :
if filter_on_operator(tree1.operator): return None
if tree1.parameters == tree2.parameters :
return(reduce(lambda a,b : a if `a`==`b` else None,
[ compare_trees(op1,op2,func,filter_on_operator)
for op1,op2 in zip(tree1.operands, tree2.operands) ]))
elif isinstance(tree1,scriptChild) and isinstance(tree2,scriptChild):
if tree1.varname==tree2.varname :
return compare_trees(tree1.father,tree2.father,
func,filter_on_operator)
allow_errors_on_ds_call=True #False
def allow_error_on_ds(allow=True) :
global allow_errors_on_ds_call
allow_errors_on_ds_call=allow
#print ('allow_errors_on_ds_call='+`allow_errors_on_ds_call`)
def select_projects(**kwargs):
"""
If kwargs['project'] is a list (has multiple values), select_projects loops on the projects
until it finds a file containing the aliased variable name.
"""
if 'project' not in kwargs:
return kwargs
else:
p_list = kwargs['project']
if not isinstance(p_list,list):
#p_list = [p_list]
return kwargs
for project in p_list:
wkwargs = kwargs.copy()
wkwargs.update(dict(project=project))
dat = cdataset(**wkwargs)
files = dat.baseFiles()
if files:
clogger.info('-- File found for project '+project+ ' and '+`wkwargs`)
try:
tmpVarInFile = varIsAliased(project,wkwargs['variable'])[0]
except:
tmpVarInFile = wkwargs['variable']
if fileHasVar(files.split(" ")[0],tmpVarInFile):
clogger.info('-- Variable '+tmpVarInFile+' (aliased to variable '+
wkwargs['variable']+') found in '+files.split(" ")[0])
return wkwargs
else:
clogger.info('-- Variable '+tmpVarInFile+
' (aliased to variable '+wkwargs['variable']+') was not found in '+files.split(" ")[0])
#clogger.info('--> Try with another project than '+project+' or another variable name')
else:
clogger.info('-- No file found for project '+project+ ' and '+`wkwargs`)
return kwargs
[docs]def ds(*args,**kwargs) :
"""
Returns a dataset from its full Climate Reference Syntax string. Example ::
>>> ds('CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last')
Also a shortcut for :py:meth:`~climaf.classes.cdataset`,
when used with with only keywords arguments. Example ::
>>> cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',\
simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last')
You must refer to doc at : :py:meth:`~climaf.classes.cdataset`
"""
if len(args) >1 :
raise Climaf_Classes_Error("Must provide either only a string or only keyword arguments")
#clogger.debug("Entering , with args=%s, kwargs=%s"%(`args`,`kwargs`))
if (len(args)==0) :
return cdataset(**select_projects(**kwargs))
#return cdataset(**kwargs) # Front-end to cdataset
crs=args[0]
results=[]
for cproj in cprojects :
try : dataset = cprojects[cproj].crs2ds(crs)
except Climaf_Classes_Error: dataset=None
if (dataset) : results.append(dataset)
if len(results) > 1 :
e="CRS expression %s is ambiguous among projects %s"%(crs,`cprojects.keys()`)
if allow_errors_on_ds_call : clogger.info(e)
else : raise Climaf_Classes_Error(e)
elif len(results) == 0 :
e="CRS expression %s is not valid for any project in %s"%(crs,`cprojects.keys()`)
if allow_errors_on_ds_call : clogger.debug(e)
else : raise Climaf_Classes_Error(e)
else :
rep=results[0]
if rep.project=='file' : rep.files=rep.kvp["path"]
return rep
[docs]def cfreqs(project,dic) :
"""
Allow to declare a dictionary specific to ``project`` for matching
``normalized`` frequency values to project-specific frequency values
Normalized frequency values are :
decadal, yearly, monthly, daily, 6h, 3h, fx and annual_cycle
When defining a dataset, any reference to a non-standard
frequency will be left unchanged both in the datset's CRS and
when trying to access corresponding datafiles
Examples::
>>> cfreqs('CMIP5',{'monthly':'mon' , 'daily':'day' })
"""
#
frequencies[project]=dic
[docs]def crealms(project,dic) :
"""
Allow to declare a dictionary specific to ``project`` for matching
``normalized`` realm names to project-specific realm names
Normalized realm names are :
atmos, ocean, land, seaice
When defining a dataset, any reference to a non-standard
realm will be left unchanged both in the datset's CRS and
when trying to access corresponding datafiles
Examples::
>>> crealms('CMIP5',{'atmos':'ATM' , 'ocean':'OCE' })
"""
#
realms[project]=dic
[docs]def calias(project,variable,fileVariable=None,scale=1.,offset=0.,units=None,missing=None,filenameVar=None) :
""" Declare that in ``project``, ``variable`` is to be computed by
reading ``filevariable``, and applying ``scale`` and ``offset``;
Arg ``filenameVar`` allows to tell which fake variable name should be
used when computing the filename for this variable in this project
(for optimisation purpose);
Can tell that a given constant must be interpreted as a missing value
``variable`` may be a list. In that case, ``fileVariable`` and
``filenameVar``, if provided, should be parallel lists
`` variable`` can be a comma separated list of variables, in which
case this tells how variables are grouped in files (it make sense
to use filenameVar in that case, as this is a xway to provide the
label which is unique to this grouping of variable; scale, offset
and missing args must be the same for all variables in that case
Example ::
>>> calias('erai','tas','t2m',filenameVar='2T')
>>> calias('erai','tas_degC','t2m',scale=1., offset=-273.15) # scale and offset may be provided
>>> calias('EM',[ 'sic', 'sit', 'sim', 'snd', 'ialb', 'tsice'], missing=1.e+20)
>>> calias('data_CNRM','so,thetao',filenameVar='grid_T_table2.2')
NB: A wrapper with same name of this function is defined in
:py:func:`climaf.driver.calias` and it is the one which is
exported by module climaf.api. It allows to use a list of
variable.
"""
if not fileVariable : fileVariable = variable
if not filenameVar : filenameVar = None
if project not in cprojects :
raise Climaf_Classes_Error("project %s is not known"%project)
if project not in aliases : aliases[project]=dict()
if type(variable) is not list : variable = [variable]
if type(filenameVar) is not list : filenameVar = [filenameVar]
if type(fileVariable) is not list : fileVariable= [fileVariable]
if type(units) is not list : units = [units]
for v,u,fv,fnv in zip(variable,units,fileVariable,filenameVar) :
aliases[project][v]=(fv,scale,offset,u,fnv,missing)
def varIsAliased(project,variable) :
"""
Return a n-uplet (fileVariable, scale, offset, filevarName,
missing) defining how to compute a 'variable' which is not in
files, for the 'project'
"""
if project in aliases and variable in aliases[project] :
return aliases[project][variable]
def cmissing(project,missing,*kwargs) :
""" Declare that in 'project', a given constant must be interpreted
as a missing value, for a given set of project's attributes values
Such a declaration must follow all ``calias`` declarations for the
same project
"""
pass # TBD
[docs]class cpage(cobject):
def __init__(self, fig_lines=None, widths=None, heights=None,
fig_trim=True, page_trim=True, format="png",
orientation=None,
page_width=1000., page_height=1500.,title="", x=0, y=26, ybox=50, pt=24,
font="Times-New-Roman", gravity="North", background="white"):
"""
Builds a CliMAF cpage object, which represents an array of figures (output:
'png' or 'pdf' figure)
Args:
fig_lines (a list of lists of figure objects or an ensemble of figure objects):
each sublist of 'fig_lines' represents a line of figures
widths (list, optional): the list of figure widths, i.e. the width of each
column. By default, if fig_lines is:
- a list of lists: spacing is even
- an ensemble: one column is used
heights (list, optional): the list of figure heights, i.e. the
height of each line. By default spacing is even
fig_trim (logical, optional): to turn on/off triming for all figures.
It removes all the surrounding extra space of figures in the page,
either True (default) or False
page_trim (logical, optional): to turn on/off triming for the page. It
removes all the surrounding extra space of the page, either True
(default) or False
format (str, optional) : graphic output format, either 'png' (default)
or 'pdf'(not recommended)
page_width (float, optional) : width resolution of resultant image;
CLiMAF default: 1000.
page_height (float, optional) : height resolution of resultant image;
CLiMAF default: 1500.
orientation (str,optional): if set, it supersedes page_width and
page_height with values 1000*1500 (for portrait) or 1500*1000 (for landscape)
title (str, optional) : append a label below or above (depending optional
argument 'gravity') figures in the page.
If title is activated:
- x, y (int, optional): annotate the page with text.
x is the offset towards the right from the upper left corner
of the page, while y is the offset upward or the bottom
according to the optional argument 'gravity' (i.e. 'South' or 'North'
respectively); CLiMAF default: x=0, y=26. For more details, see:
http://www.imagemagick.org/script/command-line-options.php?#annotate ;
where x and y correspond respectively to tx and ty
in ``-annotate {+-}tx{+-}ty text``
- ybox (int, optional): width of the assigned box for title;
CLiMAF default: 50. For more details, see:
http://www.imagemagick.org/script/command-line-options.php?#splice
- pt (int, optional): font size of the title; CLiMAF default: 24
- font (str, optional): set the font to use when creating title; CLiMAF
default: 'Times-New-Roman'. To print a complete list of fonts, use:
'convert -list font'
- gravity (str, optional): the choosen direction specifies where to position
title; CLiMAF default: 'North'. For more details, see:
http://www.imagemagick.org/script/command-line-options.php?#gravity
- background (str, optional): background color of the assigned box for
title; default: 'white'. To print a complete list of color names, use:
'convert -list color'
Example:
Using no default value, to create a page with 2 columns and 3 lines::
>>> tas_ds=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
>>> tas_avg=time_average(tas_ds)
>>> fig=plot(tas_avg,title='title')
>>> my_page=cpage([[None, fig],[fig, fig],[fig,fig]], widths=[0.2,0.8],
... heights=[0.33,0.33,0.33], fig_trim=False, page_trim=False,
... format='pdf', title='Page title', x=10, y=20, ybox=45,
... pt=20, font='Utopia', gravity='South', background='grey90',
... page_width=1600., page_height=2400.)
"""
if fig_lines is None :
raise Climaf_Classes_Error("fig_lines must be provided")
self.fig_trim=fig_trim
self.page_trim=page_trim
self.format=format
if orientation is not None :
if orientation=='portrait' :
page_width=1000.; page_height=1500.
else :
if orientation=='landscape' :
page_width=1500.; page_height=1000.
else :
raise Climaf_Classes_Error(
"if set, orientation must be 'portrait' or 'landscape'")
self.page_width=page_width
self.page_height=page_height
self.title=title
self.x=x
self.y=y
self.ybox=ybox
self.pt=pt
self.font=font
self.gravity=gravity
self.background=background
if ( self.ybox < (self.y + self.pt) ) :
raise Climaf_Classes_Error("Title exceeds the assigned box: ybox<y+pt")
if not isinstance(fig_lines,list) and not isinstance(fig_lines,cens) :
raise Climaf_Classes_Error(
"fig_lines must be a CliMAF ensemble or a list "
"of lists (each representing a line of figures)")
if isinstance(fig_lines,list) :
if not widths :
widths=[]
for line in fig_lines:
if len(line)!=len(fig_lines[0]):
raise Climaf_Classes_Error("each line in fig_lines must have same dimension")
for column in fig_lines[0]: widths.append(round(1./len(fig_lines[0]),2))
self.widths=widths
if not heights :
heights=[]
for line in fig_lines: heights.append(round(1./len(fig_lines),2))
self.heights=heights
if len(fig_lines)!=len(self.heights) :
raise Climaf_Classes_Error(
"fig_lines must have same size than heights")
for line in fig_lines:
if not isinstance(line,list) :
raise Climaf_Classes_Error(
"each element in fig_lines must be a list of figures")
if len(line)!=len(self.widths) :
raise Climaf_Classes_Error(
"each line in fig_lines must have same dimension as "
"widths; pb for sublist "+`line`)
self.fig_lines=fig_lines
else: # case of an ensemble (cens)
if not widths and not heights :
self.scatter_on_page([ fig_lines[label] for label in fig_lines.order])
else:
figs=[fig for fig in fig_lines.order]
if not widths: widths=[1.]
self.widths=widths
if not heights :
heights=[]
for memb in figs: heights.append(round(1./len(figs),2))
self.heights=heights
self.fig_lines=[]
for l in heights :
line=[]
for c in widths :
if len(figs) > 0 : line.append(fig_lines[figs.pop(0)])
else : line.append(None)
self.fig_lines.append(line)
#
self.crs=self.buildcrs()
def scatter_on_page(self,figs) :
""" Try to optimize nb of columns and lines, based on figs
list length
"""
n=len(figs)
if n == 1 or n==2 or n==3 : nx,ny=1,n
if n == 4 : nx,ny=2,2
if n == 5 or n == 6 : nx,ny=2,3
if n == 7 or n == 8 : nx,ny=2,4
if n >= 9 and n <= 12 : nx,ny=3,4
if n >= 13 and n <= 15 : nx,ny=3,5
if n >=16 and n<=20 : nx,ny=4,5
if n >=21 : raise Climaf_Classes_Error("Too many figures in page")
lines=[]
for i in range(len(figs)) :
if ( i %nx == 0) :
line=[] ; lines.append(line)
line.append(figs[i])
j=len(line)
for i in range(j,nx) : line.append(None)
self.fig_lines=lines
self.widths =[ round(1./nx,2) for i in range(nx) ]
self.heights=[ round(1./ny,2) for i in range(ny) ]
def buildcrs(self,crsrewrite=None,period=None):
rep="cpage(["
for line in self.fig_lines :
rep+="["
for f in line :
if f : rep+=f.buildcrs(crsrewrite=crsrewrite)+","
else : rep+=`None`+","
rep+=" ],";
if self.title is "" :
rep+=( "],"+`self.widths`+","+`self.heights`+", fig_trim='%s', page_trim='%s', format='"+self.format+\
"', page_width=%d, page_height=%d)" )\
%(self.fig_trim,self.page_trim,self.page_width,self.page_height)
else:
rep+=( "],"+`self.widths`+","+`self.heights`+\
", fig_trim='%s', page_trim='%s', format='"+self.format+\
"', page_width=%d, page_height=%d, title='"+self.title+\
"', x=%d, y=%d, ybox=%d, pt=%d, font='"+self.font+\
"', gravity='"+self.gravity+"', background='"+self.background+"')" )\
%(self.fig_trim,self.page_trim,self.page_width,self.page_height,self.x,self.y,self.ybox,self.pt)
rep=rep.replace(",]","]")
rep=rep.replace(", ]","]")
return rep
[docs]class cpage_pdf(cobject):
def __init__(self, fig_lines=None, widths=None, heights=None,
orientation=None, page_width=1000., page_height=1500.,
scale=1., openright=False,
title="", x=0, y=2, titlebox=False, pt="Huge",
font="\\familydefault", background="white"):
"""
Builds a CliMAF cpage_pdf object, which represents an array of figures (output:
'pdf' figure). Figures are automatically centered in the page using 'pdfjam' tool; see
http://www2.warwick.ac.uk/fac/sci/statistics/staff/academic-research/firth/software/pdfjam
Args:
fig_lines (a list of lists of figure objects or an ensemble of figure objects):
each sublist of 'fig_lines' represents a line of figures
widths (list, optional): the list of figure widths, i.e. the width of each
column. By default, if fig_lines is:
- a list of lists: spacing is even
- an ensemble: one column is used
heights (list, optional): the list of figure heights, i.e. the
height of each line. By default spacing is even
page_width (float, optional): width resolution of resultant image;
CLiMAF default: 1000.
page_height (float, optional): height resolution of resultant image;
CLiMAF default: 1500.
orientation (str,optional): if set, it supersedes page_width and
page_height with values 1000*1500 (for portrait) or 1500*1000 (for landscape)
scale (float, optional): to scale all input pages; default:1.
openright (logical, optional): this option puts an empty figure before the
first figure; default: False. For more details, see:
http://ftp.oleane.net/pub/CTAN/macros/latex/contrib/pdfpages/pdfpages.pdf
title (str, optional): append a label in the page.
If title is activated, it is by default horizontally centered:
- x (int, optional): title horizontal shift (in cm).
- y (int, optional): vertical shift from the top of the page (in cm);
only positive (down) values have an effect, default=2 cm
- titlebox (logical, optional): set it to True to frame the text in a box,
frame color is 'black'
- pt (str, optional): title font size; CLiMAF default: 'Huge'
(corresponding to 24 pt). You can set or not a backslash before this
argument.
- font (str, optional): font
abbreviation among available LaTex fonts; default: '\\\\\\\\familydefault'.
- background (str, optional): frame fill background color; among LaTex
'fcolorbox' colors; default: 'white'.
Left and right margins are set to 2cm.
Example:
Using no default value, to create a PDF page with 2 columns and 3 lines::
>>> tas_ds=ds(project='example',simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
>>> tas_avg=time_average(tas_ds)
>>> fig=plot(tas_avg,title='title',format='pdf')
>>> crop_fig=cpdfcrop(fig)
>>> my_pdfpage=cpage_pdf([[crop_fig,crop_fig],[crop_fig, crop_fig],[crop_fig,crop_fig]],
... widths=[0.2,0.8], heights=[0.33,0.33,0.33], page_width=800., page_height=1200.,
... scale=0.95, openright=True, title='Page title', x=-5, y=10, titlebox=True,
... pt='huge', font='ptm', background='yellow') # Font name is 'Times'
"""
if fig_lines is None :
raise Climaf_Classes_Error("fig_lines must be provided")
if orientation is not None :
if orientation=='portrait' :
page_width=1000.; page_height=1500.
else :
if orientation=='landscape' :
page_width=1500.; page_height=1000.
else :
raise Climaf_Classes_Error(
"if set, orientation must be 'portrait' or 'landscape'")
self.page_width=page_width
self.page_height=page_height
self.scale=scale
self.openright=openright
self.title=title
self.x=x
self.y=y
self.titlebox=titlebox
self.pt=pt
self.font=font
self.background=background
if not isinstance(fig_lines,list) and not isinstance(fig_lines,cens) :
raise Climaf_Classes_Error(
"fig_lines must be a CliMAF ensemble or a list "
"of lists (each representing a line of figures)")
if isinstance(fig_lines,list) :
if not widths :
widths=[]
for line in fig_lines:
if len(line)!=len(fig_lines[0]):
raise Climaf_Classes_Error("each line in fig_lines must have same dimension")
for column in fig_lines[0]: widths.append(round(1./len(fig_lines[0]),2))
self.widths=widths
if not heights :
heights=[]
for line in fig_lines: heights.append(round(1./len(fig_lines),2))
self.heights=heights
if len(fig_lines)!=len(self.heights) :
raise Climaf_Classes_Error(
"fig_lines must have same size than heights")
for line in fig_lines:
if not isinstance(line,list) :
raise Climaf_Classes_Error(
"each element in fig_lines must be a list of figures")
if len(line)!=len(self.widths) :
raise Climaf_Classes_Error(
"each line in fig_lines must have same dimension as "
"widths; pb for sublist "+`line`)
self.fig_lines=fig_lines
else: # case of an ensemble (cens)
figs=[fig for fig in fig_lines.order]
if not widths: widths=[1.]
self.widths=widths
if not heights :
heights=[]
for memb in figs: heights.append(round(1./len(figs),2))
self.heights=heights
self.fig_lines=[]
for l in heights :
line=[]
for c in widths :
if len(figs) > 0 : line.append(fig_lines[figs.pop(0)])
else : line.append(None)
self.fig_lines.append(line)
#
self.crs=self.buildcrs()
def buildcrs(self,crsrewrite=None,period=None):
rep="cpage_pdf(["
for line in self.fig_lines :
rep+="["
for f in line :
if f : rep+=f.buildcrs(crsrewrite=crsrewrite)+","
else : rep+=`None`+","
rep+=" ],";
if self.title is "" :
rep+=( "],"+`self.widths`+","+`self.heights`+\
"', page_width=%d, page_height=%d, scale=%.2f, openright='%s')" )\
%(self.page_width,self.page_height,self.scale,self.openright)
else:
rep+=( "],"+`self.widths`+","+`self.heights`+\
"', page_width=%d, page_height=%d, scale=%.2f, openright='%s', title='"\
+self.title+"', x=%d, y=%d, titlebox='%s', pt='"+self.pt+"', font='"\
+self.font+"', background='"+self.background+"')" )\
%(self.page_width,self.page_height,self.scale,self.openright,self.x,self.y,self.titlebox)
rep=rep.replace(",]","]")
rep=rep.replace(", ]","]")
return rep
def guess_projects(crs) :
"""
Return the list of projects involved in the datasets involved in a
CRS expression.
"""
def guess_project(crs) :
"""
Guess which is the project name for a dataset's crs, with minimum
assumption on the separator used in the project
"""
separators=[r'.',r'_',r'£',r'$',r'@',r'_',r'|',r'&',r"-",r"=",r"^",
r";",r":",r"!",r'§',r'/',r'.',r'ø',r'+',r'°']
counts=dict()
for sep in separators : counts[sep]=crs.count(sep)
# Assume that the highest count gives the right separator
max=0
for key in counts :
if counts[key] >= max :
max=counts[key]
sep=key
return(crs[1:crs.find(sep)])
return map(guess_project,re.findall(r"ds\(([^)]*)",crs))
def browse_tree(cobj,func,results):
""" Browse a CliMAF object's tree, accumulating in 'results' the
values returned by 'func' on each tree node or leave (if they are
not None)
"""
if isinstance(cobj,cdataset) or isinstance(cobj,cdummy) :
res=func(cobj)
if res : partial.append(res)
elif isinstance(cobj,ctree) :
res=func(cobj.operator)
if res : partial.append(res)
for op in cobj.operands : browse_tree(op,func,partial)
elif isinstance(cobj,scriptChild) :
browse_tree(cobj.father,func,partial)
elif isinstance(cobj,cpage) :
for line in cobj.fig_lines :
map(lambda x : browse_tree(x,func,partial), line)
elif cobj is None : return
else :
clogger.error("Cannot yet handle object :%s", `cobj`)
return
def domainOf(cobject) :
""" Returns a domain for a CliMAF object : if object is a dataset, returns
its domain, otherwise returns domain of first operand
"""
if isinstance(cobject,cdataset) :
if type(cobject.domain) is list :
rep=""
for coord in cobject.domain[0:-1] : rep=r"%s%d,"%(rep,coord)
rep="%s%d"%(rep,cobject.domain[-1])
return(rep)
else :
if cobject.domain == "global" : return ""
else : return(cobject.domain)
elif isinstance(cobject,ctree) :
clogger.debug("For now, domainOf logic for scripts output is basic (1st operand) - TBD")
return domainOf(cobject.operands[0])
elif isinstance(cobject,scriptChild) :
clogger.debug("For now, domainOf logic for scriptChilds is basic - TBD")
return domainOf(cobject.father)
elif isinstance(cobject,cens) :
clogger.debug("for now, domainOf logic for 'cens' objet is basic (1st member)- TBD")
return domainOf(cobject.values()[0])
elif cobject is None : return "none"
else : clogger.error("Unkown class for argument "+`cobject`)
def varOf(cobject) : return attributeOf(cobject,"variable")
def modelOf(cobject) : return attributeOf(cobject,"model")
def simulationOf(cobject) : return attributeOf(cobject,"simulation")
def projectOf(cobject) : return attributeOf(cobject,"project")
def realmOf(cobject) : return attributeOf(cobject,"realm")
def gridOf(cobject) : return attributeOf(cobject,"grid")
def attributeOf(cobject,attrib) :
""" Returns the attribute for a CliMAF object : if object is a dataset, returns
its attribute property, otherwise returns attribute of first operand
"""
if isinstance(cobject,cdataset) :
val=getattr(cobject,attrib,None)
if val is not None : return val
else : return(cobject.kvp.get(attrib))
elif isinstance(cobject,cens) : return attributeOf(cobject.values()[0],attrib)
elif getattr(cobject,attrib,None) : return getattr(cobject,attrib)
elif isinstance(cobject,ctree) :
clogger.debug("for now, varOf logic is basic (1st operand) - TBD")
return attributeOf(cobject.operands[0],attrib)
elif isinstance(cobject,cdummy) :
return "dummy"
elif isinstance(cobject,cpage) or isinstance(cobject,cpage_pdf) : return None
elif cobject is None : return ''
else : raise Climaf_Classes_Error("Unknown class for argument "+`cobject`)
class Climaf_Classes_Error(Exception):
def __init__(self, valeur):
self.valeur = valeur
clogger.error(self.__str__())
dedent(100)
def __str__(self):
return `self.valeur`
class Climaf_Error(Exception):
def __init__(self, valeur):
self.valeur = valeur
clogger.error(self.__str__())
dedent(100)
def __str__(self):
return `self.valeur`
def test():
# clogger.basicConfig(level=clogger.DEBUG)
# clogger.basicConfig(format='"%(asctime)s [%(funcName)s: %(filename)s,%(lineno)d] %(message)s : %(levelname)s', level=clogger.DEBUG)
cdef("project","CMIP5")
#cdef("project","PR6")
cdef("model","CNRM-CM5")
cdef("experiment","historical")
cdef("simulation","r1i1p1")
cdef("period","197901-198012")
cdef("domain","global")
#
tos=cdataset(experiment="rcp85", variable="tos", period="19790101-19790102")
tr=ctree("operator", tos, para1="val1",para2="val2")
#tos.pr()
#
#ds1=Dataset(period="1850-2012")
#genericDataSets(ds1.crs)
#ds2=Dataset(project="CMIP3")
#ex="toto("+ ds1.crs + "," + ds2.crs
#print genericDataSets(ex)
#print firstGenericDataSet(ex)
def t2() :
p=period("1984-1984")
if __name__ == "__main__":
test()