In [1]:
from IPython.display import Image
Image(url='https://vesg.ipsl.upmc.fr/thredds/fileServer/IPSLFS/jservon/CliMAF_Notebooks_html/CliMAF-logo-small.png')

Out[1]:

# Understand how the CliMAF cache works and manage the content¶

In this notebook we will see how the CliMAF cache works and how to:

1. totally erase the content of the cache: craz
1. CliMAF stores its results and knows that those results exist (and are linked with sequences of operations)
1. remove one specific result: cdrop
1. remove results using criteria: pattern, age... => crm
1. VERY IMPORTANT: remove everything but a list of results you want to keep: cprotect
1. csync: synchronize the index with the content of the cache

# ¶

## First, import climaf¶

In [2]:
from climaf.api import *

CliMAF version = 1.2.13

CliMAF install => /ciclad-home/jservon/Evaluation/CliMAF/climaf_installs/climaf_V1.2.13_post
python => /modfs/modtools-phw/miniconda2/envs/analyse_2.7/bin/python
---
Required softwares to run CliMAF => you are using the following versions/installations:
ncl 6.6.2 => /modfs/modtools-phw/miniconda2/envs/analyse_2.7/bin/ncl
cdo 1.9.6 => /opt/nco/1.9/bin/cdo
nco (ncks) 4.5.2 => /opt/nco-4.5.2/bin/ncks
ncdump fichier => /modfs/modtools-phw/miniconda2/envs/analyse_2.7/bin/ncdump
Check stamping requirements
nco (ncatted) found -> /opt/nco-4.5.2/bin/ncatted
convert found -> /usr/bin/convert
pdftk found -> /usr/bin/pdftk
---

Cache directory set to : /data/jservon/climafcache (use $CLIMAF_CACHE if set) Cache directory for remote data set to : /data/jservon/climafcache/remote_data (use$CLIMAF_REMOTE_CACHE if set)
warning  : When defining temp_penalty : duplicate declaration for input #0
warning  : When defining cquantile : duplicate declaration for input #0
warning  : When defining cquantile : duplicate declaration for input #0
Available macros read from ~/.climaf.macros are : []


### And set verbosity ('critical' -> minimum ; 'debug' -> maximum)¶

In [2]:
clog('critical') # min verbosity = critical < warning < info < debug = max verbosity


# 1. Start with a brand new cache: totally erase the content of the cache¶

### !! Consider using a 'test' cache to do this (simply add 'test' at the end of your CLIMAF_CACHE variable, and re-start)¶

In [3]:
# -- Check which cache your are using
from climaf.cache import cachedirs
cachedirs

Out[3]:
['/data/jservon/climafcache_test']

## If you already have a cache, you can see an index file at its root path:¶

In [4]:
cachedir = cachedirs[0]
!cat $cachedir/index  cat: /data/jservon/climafcache_test/index: Aucun fichier ou dossier de ce type  ### clist will retrieve the list of the CRS of the existing results from the index file$CLIMAF_CACHE/index¶

In [5]:
clist()

Content of CliMAF cache

Out[5]:
[]

In [6]:
craz()


## Now check the content of your cache with clist()¶

### clist will retrieve the list of the CRS of the existing results from the index file $CLIMAF_CACHE/index¶ In [7]: clist()  Content of CliMAF cache  Out[7]: [] ### Because we've just cleaned it, your cache is now totally empty.¶ # ¶ ## 2. How CliMAF stores its results and knows that those results exist (and are linked with sequences of operations)¶ ### As an example, we will extract a dataset from the CMIP5 archive and do some pretreatments to add results to the cache¶ In [9]: # -- Let's get some data as a start dat_cmip5 = ds(project='CMIP5', model='CNRM-CM5', variable='tos', experiment='historical', period='1980-2000', frequency='monthly', simulation='r1i1p1', ) summary(dat_cmip5)  /bdd/CMIP5/output/CNRM-CERFACS/CNRM-CM5/historical/mon/ocean/Omon/r1i1p1/latest/tos/tos_Omon_CNRM-CM5_historical_r1i1p1_198001-198912.nc /bdd/CMIP5/output/CNRM-CERFACS/CNRM-CM5/historical/mon/ocean/Omon/r1i1p1/latest/tos/tos_Omon_CNRM-CM5_historical_r1i1p1_199001-199912.nc /bdd/CMIP5/output/CNRM-CERFACS/CNRM-CM5/historical/mon/ocean/Omon/r1i1p1/latest/tos/tos_Omon_CNRM-CM5_historical_r1i1p1_200001-200512.nc  Out[9]: {'domain': 'global', 'experiment': 'historical', 'frequency': 'monthly', 'model': 'CNRM-CM5', 'period': 1980-2000, 'project': 'CMIP5', 'realization': 'r1i1p1', 'realm': '*', 'root': '/bdd', 'simulation': 'r1i1p1', 'table': '*', 'variable': 'tos', 'version': 'latest'} ## At this stage, dat_cmip5 is only a python object. CliMAF does not load data in memory.¶ In [10]: dat_cmip5.crs  Out[10]: "ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest')" ## If I want to get the result of this data request, I use cfile() to return the netcdf file:¶ https://climaf.readthedocs.io/en/master/functions_results_numeric.html?highlight=cfile#cfile-get-the-file-value-of-a-climaf-object ## CliMAF automatically provides a unique name to the output based on:¶ • the root path of the cache ($CLIMAF_CACHE, or the variable cachedirs)
• a hash of the CRS
In [11]:
cfile(dat_cmip5)

Out[11]:
'/data/jservon/climafcache_test/89f3a/1535f/b106c/37247/4f837/36150/4ecdb/20086/2a547/68ba7/cd864/2.nc'

## When using cfile on your CliMAF object, CliMAF stores the CRS of the new results in the cache in the memory of the python session¶

In [12]:
clist()

Content of CliMAF cache

Out[12]:
["select(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'))"]

## If you reinvoke cfile on the same object, it will:¶

• scan this list
• see that this sequence of operations has already been executed
• and directly return the file associated with it
In [13]:
cfile(dat_cmip5)

Out[13]:
'/data/jservon/climafcache_test/89f3a/1535f/b106c/37247/4f837/36150/4ecdb/20086/2a547/68ba7/cd864/2.nc'

## Let's add some more stuff in the cache¶

In [18]:
ref = ds(project='ref_climatos', variable='tos', product='WOA13-v2',clim_period='195501-201212')
summary(ref)

/data/jservon/Evaluation/ReferenceDatasets/climatos/ocn/mo/tos/WOA13-v2/ac/tos_Omon_WOA13-v2_observations_195501-201212-clim.nc

Out[18]:
{'clim_period': '195501-201212',
'clim_period_begin': '*',
'clim_period_end': '*',
'domain': 'global',
'frequency': 'annual_cycle',
'obs_type': '*',
'period': fx,
'product': 'WOA13-v2',
'project': 'ref_climatos',
'simulation': 'refproduct',
'table': '*',
'variable': 'tos'}
In [19]:
# -- Compute a DJF climatology
djf_dat = clim_average(dat_cmip5, 'DJF')
djf_ref = clim_average(ref, 'DJF')

# -- Regrid ref and simulation on the same regular grid
rgrd_dat = regridn(djf_dat, cdogrid='r360x180')
rgrd_ref = regridn(djf_ref, cdogrid='r360x180')

# -- And compute the difference
bias = minus(rgrd_dat, rgrd_ref)

# -- Make a simple plot
myplot = plot(bias)

# -- And execute the final object
cfile(myplot)

Out[19]:
'/data/jservon/climafcache_test/36dc7/12dea/f1e02/5fb40/ebfcb/12074/1e7e4/0d6ba/ca5af/e5493/c2ce7/d.png'

## When you cfile(myplot), CliMAF will:¶

• analyse the 'tree of operations' that lead to the result
• check if it has the operands of the tree
• use the ones it has
• and compute the ones missing

## The cache now have more results, corresponding to intermediate results leading to the final plot:¶

In [20]:
clist()

Content of CliMAF cache

Out[20]:
["ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg')",
"select(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'))",
"minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'))",
"ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg')",
"ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')",
"plot(minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')))",
"regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')",
"regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')",
"ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"]

### Let's say that we want to remove the regridded climatologies only. We use cdrop to do this:¶

In [21]:
cdrop(rgrd_dat)
cdrop(rgrd_ref)

Out[21]:
True

### You can confirm with clist() that the files are removed:¶

In [22]:
clist()

Content of CliMAF cache

Out[22]:
["ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg')",
"select(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'))",
"minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'))",
"ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg')",
"ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')",
"plot(minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')))",
"ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"]

## 4. Remove results using conditions (pattern, age, size): crm¶

### We use crm(pattern='CNRM-CM5') to remove all the results containing 'CNRM-CM5' in their CRS:¶

In [23]:
crm(pattern='CNRM-CM5')

Removed files:

Out[23]:
["ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg')",
"plot(minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')))",
"select(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'))",
"minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'))",
"ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"]

### Now you should only have the reference and its climatology¶

In [24]:
clist()

Content of CliMAF cache

Out[24]:
["ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg')",
"ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"]

### We can also use crm() to remove files on their age. For instance, let's remove all the results that haven't been touched for more than 20 days:¶

In [25]:
crm(age='+20')


### Or remove files on their sizes; remove all the files bigger than 2M¶

In [26]:
crm(size='2M')

Removed files:

Out[26]:
["ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg')"]

### Check the sizes of the elements in the cache with cdu()¶

In [27]:
cdu()

 260.0K : total
260.0K : ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')


### => you can combine those conditions:¶

In [23]:
crm(pattern='CNRM-CM5', age='+20', size='3M')


## 5. VERY IMPORTANT: remove everything but a list of results you want to keep¶

### Let's start by putting some results back in the cache:¶

In [28]:
# -- And execute the final object
cfile(myplot)

Out[28]:
'/data/jservon/climafcache_test/36dc7/12dea/f1e02/5fb40/ebfcb/12074/1e7e4/0d6ba/ca5af/e5493/c2ce7/d.png'
In [29]:
clist()

Content of CliMAF cache

Out[29]:
["ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg')",
"minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'))",
"ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')",
"plot(minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')))",
"regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')",
"regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')",
"ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"]

### cprotect will change the 'write' rights of the file: you can't delete them (except with a cdrop(..., force=True))¶

In [30]:
cprotect(djf_dat)
cprotect(djf_ref)
cprotect(myplot)


### Now I just clean the cache with craz(), and it will erase everything but my protected files¶

In [31]:
craz()

In [32]:
clist()

Content of CliMAF cache

Out[32]:
["ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')",
"plot(minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')))",
"ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"]

### If you want to delete a protected file, use cdrop()¶

In [36]:
cdrop(myplot, force=True)

Out[36]:
True
In [37]:
clist()

Content of CliMAF cache

Out[37]:
["ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')",
"ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"]

## 6. Synchronize the file $CLIMAF_CACHE/index with the actual content of the cache: csync(True)¶ ### CliMAF reads the content of the cache at the beginning of the session, and then stores the CRS of the new results in memory (and not in the index file during the session)¶ ### This can have drawbacks when:¶ • you have multiple processes working on the same cache => typically when you use parallel processes • some results of the cache have been manually removed ### You then need to synchronize your index file with the actual content of the cache. To do this, we use csync(): In [33]: # -- Check the content of the index file: !cat$cachedir/index

(dp0
.
In [34]:
# -- Synchronize
csync(True)

In [35]:
# -- And check the index file after
!cat \$cachedir/index

(dp0
S"ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"
p1
S'/data/jservon/climafcache_test/7d3ba/57a5f/418c6/8686d/5cefa/429b1/5d0b3/25fa1/89af1/e487f/476eb/6.nc'
p2
sS"plot(minus(regridn(ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180'),regridn(ccdo(ccdo(ds('ref_climatos%refproduct%tos%fx%global%annual_cycle%WOA13-v2%195501-201212%*%*%*%*'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12'),cdogrid='r360x180')))"
p3
S'/data/jservon/climafcache_test/36dc7/12dea/f1e02/5fb40/ebfcb/12074/1e7e4/0d6ba/ca5af/e5493/c2ce7/d.png'
p4
sS"ccdo(ccdo(ds('CMIP5%r1i1p1%tos%1980-2000%global%/bdd%CNRM-CM5%*%historical%r1i1p1%monthly%*%latest'),operator='ymonavg'),operator='timmean -seltimestep,1,2,12')"
p5
S'/data/jservon/climafcache_test/f97c9/0b1fc/f7160/ac47c/1f338/d17d7/01930/d2d7e/061b9/22f09/ec16b/a.nc'
p6
s.

## ¶

In [ ]: