jimsrc · November 23, 2016 18:36 · Nov 23, 2016 · Nov 23, 2016 · Nov 23, 2016 · Nov 23, 2016
diff --git a/funcs.py b/funcs.py
@@ -1 +1,394 @@
-funcs file
+#!/usr/bin/env ipython
+# -*- coding: utf-8 -*-
+import numpy as np
+import os, sys
+from scipy.io.netcdf import netcdf_file
+from h5py import File as h5
+from numpy import (
+    loadtxt, size, mean, isnan, array,
+    ones, zeros, empty, nanmean, nanmedian, nanstd,
+    concatenate,
+)
+from glob import glob
+from os.path import isfile, isdir
+from datetime import datetime, timedelta
+from scipy.interpolate import (
+    splrep,     # Spline interpolation
+    splev)      # Spline evaluator
+import argparse
+if 'DISPLAY' in os.environ:
+    from pylab import pause, find
+
+
+#--- globals
+HOME = os.environ['HOME']
+
+class arg_to_datetime(argparse.Action):
+    """
+    argparse-action to handle command-line arguments of 
+    the form "dd/mm/yyyy" (string type), and converts
+    it to datetime object.
+    NOTE: can be used even when `nargs`>1.
+    """
+    def __init__(self, option_strings, dest, **kwargs):
+        #if nargs is not None:
+        #    raise ValueError("nargs not allowed")
+        super(arg_to_datetime, self).__init__(option_strings, dest, **kwargs)
+    def __call__(self, parser, namespace, values, option_string=None):
+        #print '%r %r %r' % (namespace, values, option_string)
+        if len(values)==1:
+            dd,mm,yyyy = map(int, values.split('/'))
+            value = datetime(yyyy,mm,dd)
+        elif len(values)>1:
+            value = []
+            for val in values:
+                dd,mm,yyyy = map(int, val.split('/'))
+                value += [ datetime(yyyy,mm,dd) ]
+        setattr(namespace, self.dest, value)
+
+
+class arg_to_utcsec(argparse.Action):
+    """
+    argparse-action to handle command-line arguments of 
+    the form "dd/mm/yyyy" (string type), and converts
+    it to UTC-seconds.
+    """
+    def __init__(self, option_strings, dest, nargs=None, **kwargs):
+        if nargs is not None:
+            raise ValueError("nargs not allowed")
+        super(arg_to_utcsec, self).__init__(option_strings, dest, **kwargs)
+    def __call__(self, parser, namespace, values, option_string=None):
+        #print '%r %r %r' % (namespace, values, option_string)
+        dd,mm,yyyy = map(int, values.split('/'))
+        value = (datetime(yyyy,mm,dd)-datetime(1970,1,1)).total_seconds()
+        setattr(namespace, self.dest, value)
+
+def nans(shape, dtype=np.float32):
+    return np.nan*np.ones(shape, dtype=dtype)
+
+
+def ncep_gh(yyyy, mm, level=None, dir_src=None, sync=True, tsync=None):
+    """
+    input:
+    tsync: in utc-sec
+    """
+    if dir_src is None:
+        dir_src='../out/out.build_weather/ncep' #HOME+'/data_gdas'
+    if level is None:
+        lname = 'level_%04d'%100 # level in [hPa]
+    fname_inp = dir_src+'/test_%04d.h5'%yyyy
+    assert isfile(fname_inp), ' --> NO EXISTE: '+fname_inp 
+    f = h5(fname_inp,'r')
+    dpath = lname + '/%04d-%02d'%(yyyy,mm)
+    #print " ---> ", f[dpath].keys() 
+    g_t = f[dpath+'/utcsec'].value   # utc sec
+    g_h = f[dpath+'/h'].value        # geop height
+    if sync:
+        tck = splrep(g_t, g_h, s=0)
+        gh  = splev(tsync, tck, der=0)  # der: order of derivative to comp
+        return gh
+    else:
+        return g_h
+
+
+def equi_days(dini, dend, n):
+    """
+    returns most equi-partitioned tuple of number 
+    of days between date-objects 'dini' and 'dend'
+    """
+    days = (dend - dini).days
+    days_part = np.zeros(n, dtype=np.int)
+    resid = np.mod(days, n)
+    for i in range(n-resid):
+        days_part[i] = days/n
+    # last positions where I put residuals
+    last = np.arange(start=-1,stop=-resid-1,step=-1)
+    for i in last:
+        days_part[i] = days/n+1
+
+    assert np.sum(days_part)==days, \
+        " --> somethng went wrong!  :/ "
+
+    return days_part
+
+def equi_dates(dini, dend, n):
+    """
+    returns:
+    - inis: numpy-array-of-dates, of size (n+1); where,
+      - inis[:-1]: 'n' start-dates of most equi-partitioned 
+                 contiguous blocks of dates between 'dini' 
+                 and 'end' (object dates).
+      - inis[-1]: 'dend' (max date of whole block)
+    """
+    days_part = equi_days(dini, dend, n)
+    inis = np.empty(n+1, dtype=datetime)
+    inis[0]  = dini
+    inis[-1] = dend
+    for i in range(1, inis.size-1):
+        inis[i] = inis[i-1] + timedelta(days=days_part[i-1])
+
+    return inis
+
+def equi_dates_RoundMonth(dini,dend,n):
+    """ same as equi_dates() but rounded at 01/month """
+    assert dini.day==1, " ERROR: dini.day must be 1."
+    inis  = equi_dates(dini,dend,n)
+    inis_ = np.empty(n+1, dtype=datetime)
+    inis_[-1] = dend
+    for i, i_ in zip(inis, range(inis_.size-1)):
+        inis_[i_] = datetime(i.year,i.month,1)
+    return inis_
+
+def rebine_mats(a, b): # original version
+    """
+    Rebining of 'b' to resolution of 'a'.
+    where:
+    b[0] is denser than a[0] (IMPORTANT!)
+    a[0], a[1]: x and y components of a
+    b[0], b[1]: x and y components of b
+    NOTE: For this to make sense (*), the following should 
+          be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La,
+          where La=(ax[-1]-ax[0]); in other words,
+          that "the borders of 'bx' must *not* be very 
+          different from those of 'ax'."
+    """
+    ax, ay = a[0,:], a[1,:]
+    bx, by = b[0,:], b[1,:]
+    by_binned = np.zeros(ax.size, dtype=np.float64)
+    for j in range(ax.size):
+        if(j==0):
+            ax_lo  = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros
+            cc     = bx <= ax_lo # (*)
+        elif(j==ax.size-1):
+            ax_hi  = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos
+            cc     = bx >= ax_hi # (*)
+        else:
+            ax_min = 0.5*(ax[j-1] + ax[j])
+            ax_max = 0.5*(ax[j] + ax[j+1])
+            cc     = (bx>ax_min) & (bx<ax_max)
+        by_binned[j] = np.nanmean(by[cc])
+    return by_binned
+
+
+def rebine(ax, bx, by_data, nbef=1, naft=1):
+    """
+    Rebining of `b` to resolution of `ax`.
+    **IMPORTANT**: b[0] is denser than `ax`!
+    inputs:
+    ax      : time domain to which `by_data` will be adapted/rebined/synced.
+    bx      : time domain of `by_data`.
+    by_data : is assumed of shape (:,:) or (:).
+    NOTE: For this to make sense (*), the following should 
+          be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La,
+          where La=(ax[-1]-ax[0]); in other words,
+          that "the borders of `bx` must *not* be very 
+          different from those of `ax`."
+    """
+    nbsh = len(by_data.shape) # should be 1 or 2.
+    if nbsh==2:
+        nb_col = by_data.shape[1]
+        by_binned = np.zeros((ax.size,nb_col), dtype=np.float32)
+        by = by_data
+    elif nbsh==1:
+        by_binned = np.zeros((ax.size,1), dtype=np.float32)
+        by = np.reshape(by_data, (by_data.size,1))
+    else:
+        print " ERROR: len(b.shape) should be 1 or 2."
+        raise SystemExit
+    #--- first row
+    ax_lo  = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros
+    cc     = bx <= ax_lo # (*)
+    by_binned[0,:] = np.nanmean(by[cc,:])
+    #--- last row
+    ax_hi  = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos
+    cc     = bx >= ax_hi # (*)
+    by_binned[ax.size-1,:] = np.nanmean(by[cc,:])
+    #--- rest of rows
+    for j in range(1,ax.size-1):
+        ax_min = 0.5*(ax[j-1] + ax[j])
+        ax_max = 0.5*(ax[j] + ax[j+1])
+        cc     = (bx>ax_min) & (bx<ax_max)
+        by_binned[j,:] = np.nanmean(by[cc,:])
+    # output must have the same no of columns as input 'by'
+    if nbsh==1:
+        return by_binned[:,0]
+    else:
+        return by_binned
+
+
+def utc2date(t):
+    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
+    date = date_utc + timedelta(days=(t/86400.))
+    return date
+
+
+def date2utc(date):
+    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
+    utcsec = (date - date_utc).total_seconds() # [utc sec]
+    return utcsec
+
+
+def gps2date(t):
+    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
+    t_utc = t + 315964800 # [utc sec]
+    date = date_utc + timedelta(days=(t_utc/86400.))
+    return date
+
+
+def date2gps(date):
+    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
+    utcsec = (date - date_utc).total_seconds() # [utc sec]
+    gpssec = utcsec - 315964800  # [gps sec]
+    return gpssec
+
+
+class My2DArray(object):
+    """
+    wrapper around numpy array with:
+    - flexible number of rows
+    - records the maximum nrow requested
+    - `nrow`: max number of rows queried.
+    NOTE:
+    This was test for 1D and 2D arrays.
+    TODO:
+    try with `numpy.array` instead of `object` above to
+    make the wrapper more natural.
+    """
+
+    def __init__(self, shape, dtype=np.float32):
+        self.this  = np.empty(shape, dtype=dtype)
+        setattr(self, '__array__', self.this.__array__)
+        self.nrow = 0
+
+    def resize_rows(self, nx_new=None):
+        """ Increment TWICE the size of axis=0, **without**
+        losing data.
+        """
+        sh_new = np.copy(self.this.shape)
+        nx     = self.this.shape[0]
+        if nx_new is None:
+            sh_new[0] = 2*sh_new[0]
+        elif nx_new<=nx:
+            return 0 # nothing to do
+        else:
+            sh_new[0] = nx_new
+
+        tmp    = self.this.copy()
+        #print "----> tmp: ", tmp.shape
+        new    = np.nan*np.ones(sh_new,dtype=self.this.dtype)
+        new[:nx] = tmp
+        self.this = new
+        """
+        for some reason (probably due to numpy 
+        implementation), if we don't do this, the:
+        >>> print self.__array__()
+        
+        stucks truncated to the original size that was
+        set in __init__() time.
+        So we need to tell numpy our new resized shape!
+        """
+        setattr(self, '__array__', self.this.__array__)
+
+
+    def __get__(self, instance, owner):
+        return self.this #.__array__()
+
+    def __getitem__(self, i):
+        """
+        to retrieve non-trivial content:
+        >>> print m[...]
+        >>> print m[:]
+
+        to retrieve all the allocated array:
+        >>> print m.this[...]
+        >>> print m[:-1]
+        >>> print m[:-1,:]
+        """
+        if not(i is Ellipsis or i==slice(None,None,None)):
+            return self.this[i]
+        else:
+            #returns only non-trivial content
+            return self.this[:self.nrow]
+
+    def __setitem__(self, i, value):
+        """ 
+        We can safely use:
+        >>> ma[n:n+m,:] = [...]
+
+        assuming n+m is greater than our size in axis=0.
+        NOTE: `i` can be:
+        - scalar
+        - list
+        - tuple of (slice,scalar), (slice,slice), 
+          (scalar,slice), etc.
+        - slice
+        """
+        # we need to define `stop` to check if we are
+        # out of rows.
+        if type(i)==slice:
+            """
+            case:
+            >>> m[2:5]     = ...
+            """
+            stop = i.stop
+            _ind = i #slice(i.start,i.stop,i.step)
+            self.nrow = np.max([stop,self.nrow])
+
+        elif type(i)==tuple and type(i[0])==slice:
+            """
+            cases:
+            >>> m[n:n+m,:] = ... # (*)
+            >>> m[n:,:]    = ... # (**)
+            >>> m[n:,4]    = ...
+            >>> m[:,:]     = ... # (***)
+            >>> m[:,4]     = ...
+            """
+            if i[0].start is None and \
+                i[0].stop is None and \
+                i[0].step is None: # (***)
+
+                _ind = (slice(None,None,None),i[1])
+                stop = 0
+                self.nrow = np.max([np.size(value,axis=0), self.nrow])
+
+            else:
+                stop_aux = i[0].start + np.size(value,axis=0) # (*)
+                stop = stop_aux if i[0].stop is None else i[0].stop #(**) & (*)
+                _ind = (slice(i[0].start,stop,i[0].step), i[1])
+                self.nrow = np.max([stop,self.nrow])
+
+        elif type(i)==tuple and type(i[0]) in (list,int):
+            """
+            case:
+            >>> m[k,:]      = ... # type=int
+            >>> m[[3,4],:]  = ... # type=list
+            >>> m[[3,4],k]  = ... # type=list
+            """
+            stop = np.max(i[0])  # TEST
+            _ind = i
+            self.nrow = np.max([stop+1,self.nrow])
+
+        else:
+            raise IndexError(' --> don\'t know how \
+                handle %r' % i)
+
+        # check if we are short of rows, in which case, we
+        # will double size in axis=0.
+        if stop >= self.this.shape[0]:
+            nx_new = self.this.shape[0]
+            while nx_new<=stop:
+                nx_new *= 2
+            self.resize_rows(nx_new)
+
+        # finnally assign
+        self.this[_ind] = value
+
+        #--- register the maximum nrow requested.
+        # NOTE here we are referring to size, and *not* row-index.
+        #self.max_nrow_used = stop+1 if stop+1>self.max_nrow_used else self.max_nrow_used 
+
+    def __getattr__(self, attnm):
+        return getattr(self.this, attnm)
+
+
+#EOF
diff --git a/funcs.py b/funcs.py
@@ -0,0 +1 @@
+funcs file
diff --git a/README.md b/README.md
@@ -0,0 +1,28 @@
+Run for example:
+```bash
+./build_long.prof.py -- --years 2006 2015  --fname_inp ./test_iv.h5  --nbin 1200  --obs wAoP_wPrs  -fig ./test.png
+```
+where:
+
+- `wAoP_wPrs` is one of the available fields. Others are: `wAoP` (i.e. only AoP correction), 
+`wAoP_wPrs_wGh` (with AoP, pressure, and geopotential height) ,`aop` (AoP values), etc.
+
+- `-fig` refers to filename of output figure.
+
+---
+For more info on the arguments,
+```bash
+./build_long.prof.py -- -h
+```
+
+---
+## Important:
+
+Note that, in order to run consistenly, you'll need to place these 2 scripts with this structure:
+```bash
+|
+|- build_long.prof.py
+|- shared_funcs/
+   |- funcs.py
+   |- __init__.py  #<--- this is an empty file
+```
diff --git a/build_long.prof.py b/build_long.prof.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env ipython
+# -*- coding: utf-8 -*-
+import numpy as np
+from numpy import (
+nanmean, nanstd,
+)
+import argparse, os
+from h5py import File as h5
+from datetime import datetime, timedelta
+from shared_funcs.funcs import (
+My2DArray, date2utc, utc2date, rebine, ncep_gh, nans
+)
+import itertools
+#--- we need X to build the figure
+if 'DISPLAY' in os.environ:
+    from pylab import figure, show, close
+else:
+    raise SystemExit(
+    " >>> ERROR: the DISPLAY environment variable doesn't exist!"
+    )
+
+#--- retrieve args
+parser = argparse.ArgumentParser(
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter
+)
+parser.add_argument(
+'-y', '--years', 
+type=int, 
+nargs=2,
+default=[2006,2013],
+help='years for period of data-processing. So that the profile\
+ will be built from YearIni/Jan/01 until YearEnd/Dec/31)',
+)
+parser.add_argument(
+'-fid', '--fname_inp',
+type=str,
+default='./test.h5',
+help='filename of .h5 input (contains geopotential-height-corrected Scalers)',
+)
+parser.add_argument(
+'-obs', '--obs',
+type=str,
+nargs='+',
+default='wAoP_wPrs_wGh',
+help='observables to process; can be more than one, and must be one\
+ of the keys inside the .h5 file. They will be plotted in the\
+ same figure in superposed fashion.',
+)
+parser.add_argument(
+'-nb', '--nbin',
+type=int,
+default=100,
+help='total number of bins for time domain in the long-term profile'
+)
+parser.add_argument(
+'-fig', '--fname_fig',
+type=str,
+default='test.png',
+help='filename for output figure. Or "screen" for interactive plot.'
+)
+
+pa = parser.parse_args()
+
+#---
+finp_d   = h5(pa.fname_inp,'r')
+date_ini = datetime(pa.years[0],  1,  1)
+date_end = datetime(pa.years[1], 12, 31)
+assert date_end>date_ini, '`date_end` must be greater than `date_ini`!!'
+
+#---
+nbin = pa.nbin
+
+class d(object):
+    """
+    data buffer for each bin of the profile
+    """
+    def __init__(self):
+        self.n = 0 # number of original data points contributing
+        self.rates = My2DArray(1, dtype=np.float32)
+        self.mean = np.nan
+        self.std  = np.nan
+
+#obs = pa.obs #'wAoP_wPrs_wGh'
+utc_ini = date2utc(datetime(1970,1,1,0,0,0))
+ini,end=datetime(pa.years[0],1,1),datetime(pa.years[1],12,31)
+Dt  = (end-ini).total_seconds()
+dt  = Dt/nbin
+
+# list of all pairs (year,month) of interest
+YYYY_MM = list(itertools.product(
+    range(pa.years[0],pa.years[1]+1,1),
+    range(1,12+1,1),
+))
+
+dd_ = {} # several dd's
+for obs in pa.obs:
+    # buffer for each observable
+    dd  = [ d() for i in range(nbin) ]
+    for yyyy, mm in YYYY_MM:
+        rpath='%04d/%02d' % (yyyy,mm)
+        tutc = finp_d['t_utc/'+rpath][...] # [utc sec]
+        r    = finp_d[obs+'/'+rpath][...]  # rate
+        ind  = np.array((tutc-date2utc(ini))/dt, dtype=int) # (*)
+        ind_list = np.unique(ind) # (**)
+        print yyyy, mm, ind_list
+        if ind_list.size>1: 
+            assert ind_list[0]<ind_list[1],"wrong."# consistency
+        for i in ind_list:
+            cc = ind==i   # collect all with similar "time bins"
+            ri = r[cc]    # rates of this i-block
+            n = dd[i].n
+            dd[i].n += ri.size
+            dd[i].rates.resize_rows(n+ri.size)
+            dd[i].rates[n:n+ri.size] = ri
+
+        if len(ind_list)==1: # [likely]
+            continue 
+        else:                # [unlikely]
+            for j in ind_list[:-1]:
+                n = dd[j].n
+                dd[j].rates = dd[j].rates[:n]
+                dd[j].mean  = nanmean(dd[j].rates)
+                dd[j].std   = nanstd(dd[j].rates)
+                dd[j].rates = None # delete data
+        dd_[obs] = dd
+"""
+(*): list of integers corresponding to the nearest integer value
+     of the utc-sec discretized in units of `dt` time-windows.
+     For example, taking utc-sec="equivalent to 09/nov/2008 05:40",
+     `ini=datetime(2008,nov,1,0,0)`, and `dt=1day` results in 
+     an `ind` value of 9.
+(**): to collect all with similar `ind` values
+"""
+
+# build the figure
+do = {}
+fig = figure(1, figsize=(8,4))
+ax  = fig.add_subplot(111)
+for obs in pa.obs:
+    prof = np.array([dd_[obs][n].mean for n in range(nbin)])
+    t, r = np.arange(prof.size)*dt/(86400.*365), 100.*prof
+    do[obs] = np.array([t,r]).T
+    ax.plot(t, r, '-o', ms=2, alpha=.7, label=obs)
+    np.savetxt('test_%s.txt'%obs,do[obs],fmt='%12.2f')
+
+ax.legend(loc='best')
+ax.grid(True)
+ax.set_xlabel('years since '+date_ini.strftime('%b/%Y'))
+ax.set_ylabel('count rate')
+if pa.fname_fig=='screen':
+    print " >>> showing interactively..."
+    show()
+else:
+    fig.savefig(pa.fname_fig, dpi=135, bbox_inches='tight')
+    print "\n ---> we generated: "+pa.fname_fig
+
+close(fig)
+
+
+#EOF
No results found