Skip to content

Instantly share code, notes, and snippets.

@jimsrc
Last active November 23, 2016 18:36
Show Gist options
  • Select an option

  • Save jimsrc/d9c3ba19f9808d002fc9c37fee65a15e to your computer and use it in GitHub Desktop.

Select an option

Save jimsrc/d9c3ba19f9808d002fc9c37fee65a15e to your computer and use it in GitHub Desktop.

Revisions

  1. jimsrc revised this gist Nov 23, 2016. 1 changed file with 394 additions and 1 deletion.
    395 changes: 394 additions & 1 deletion funcs.py
    Original file line number Diff line number Diff line change
    @@ -1 +1,394 @@
    funcs file
    #!/usr/bin/env ipython
    # -*- coding: utf-8 -*-
    import numpy as np
    import os, sys
    from scipy.io.netcdf import netcdf_file
    from h5py import File as h5
    from numpy import (
    loadtxt, size, mean, isnan, array,
    ones, zeros, empty, nanmean, nanmedian, nanstd,
    concatenate,
    )
    from glob import glob
    from os.path import isfile, isdir
    from datetime import datetime, timedelta
    from scipy.interpolate import (
    splrep, # Spline interpolation
    splev) # Spline evaluator
    import argparse
    if 'DISPLAY' in os.environ:
    from pylab import pause, find


    #--- globals
    HOME = os.environ['HOME']

    class arg_to_datetime(argparse.Action):
    """
    argparse-action to handle command-line arguments of
    the form "dd/mm/yyyy" (string type), and converts
    it to datetime object.
    NOTE: can be used even when `nargs`>1.
    """
    def __init__(self, option_strings, dest, **kwargs):
    #if nargs is not None:
    # raise ValueError("nargs not allowed")
    super(arg_to_datetime, self).__init__(option_strings, dest, **kwargs)
    def __call__(self, parser, namespace, values, option_string=None):
    #print '%r %r %r' % (namespace, values, option_string)
    if len(values)==1:
    dd,mm,yyyy = map(int, values.split('/'))
    value = datetime(yyyy,mm,dd)
    elif len(values)>1:
    value = []
    for val in values:
    dd,mm,yyyy = map(int, val.split('/'))
    value += [ datetime(yyyy,mm,dd) ]
    setattr(namespace, self.dest, value)


    class arg_to_utcsec(argparse.Action):
    """
    argparse-action to handle command-line arguments of
    the form "dd/mm/yyyy" (string type), and converts
    it to UTC-seconds.
    """
    def __init__(self, option_strings, dest, nargs=None, **kwargs):
    if nargs is not None:
    raise ValueError("nargs not allowed")
    super(arg_to_utcsec, self).__init__(option_strings, dest, **kwargs)
    def __call__(self, parser, namespace, values, option_string=None):
    #print '%r %r %r' % (namespace, values, option_string)
    dd,mm,yyyy = map(int, values.split('/'))
    value = (datetime(yyyy,mm,dd)-datetime(1970,1,1)).total_seconds()
    setattr(namespace, self.dest, value)

    def nans(shape, dtype=np.float32):
    return np.nan*np.ones(shape, dtype=dtype)


    def ncep_gh(yyyy, mm, level=None, dir_src=None, sync=True, tsync=None):
    """
    input:
    tsync: in utc-sec
    """
    if dir_src is None:
    dir_src='../out/out.build_weather/ncep' #HOME+'/data_gdas'
    if level is None:
    lname = 'level_%04d'%100 # level in [hPa]
    fname_inp = dir_src+'/test_%04d.h5'%yyyy
    assert isfile(fname_inp), ' --> NO EXISTE: '+fname_inp
    f = h5(fname_inp,'r')
    dpath = lname + '/%04d-%02d'%(yyyy,mm)
    #print " ---> ", f[dpath].keys()
    g_t = f[dpath+'/utcsec'].value # utc sec
    g_h = f[dpath+'/h'].value # geop height
    if sync:
    tck = splrep(g_t, g_h, s=0)
    gh = splev(tsync, tck, der=0) # der: order of derivative to comp
    return gh
    else:
    return g_h


    def equi_days(dini, dend, n):
    """
    returns most equi-partitioned tuple of number
    of days between date-objects 'dini' and 'dend'
    """
    days = (dend - dini).days
    days_part = np.zeros(n, dtype=np.int)
    resid = np.mod(days, n)
    for i in range(n-resid):
    days_part[i] = days/n
    # last positions where I put residuals
    last = np.arange(start=-1,stop=-resid-1,step=-1)
    for i in last:
    days_part[i] = days/n+1

    assert np.sum(days_part)==days, \
    " --> somethng went wrong! :/ "

    return days_part

    def equi_dates(dini, dend, n):
    """
    returns:
    - inis: numpy-array-of-dates, of size (n+1); where,
    - inis[:-1]: 'n' start-dates of most equi-partitioned
    contiguous blocks of dates between 'dini'
    and 'end' (object dates).
    - inis[-1]: 'dend' (max date of whole block)
    """
    days_part = equi_days(dini, dend, n)
    inis = np.empty(n+1, dtype=datetime)
    inis[0] = dini
    inis[-1] = dend
    for i in range(1, inis.size-1):
    inis[i] = inis[i-1] + timedelta(days=days_part[i-1])

    return inis

    def equi_dates_RoundMonth(dini,dend,n):
    """ same as equi_dates() but rounded at 01/month """
    assert dini.day==1, " ERROR: dini.day must be 1."
    inis = equi_dates(dini,dend,n)
    inis_ = np.empty(n+1, dtype=datetime)
    inis_[-1] = dend
    for i, i_ in zip(inis, range(inis_.size-1)):
    inis_[i_] = datetime(i.year,i.month,1)
    return inis_

    def rebine_mats(a, b): # original version
    """
    Rebining of 'b' to resolution of 'a'.
    where:
    b[0] is denser than a[0] (IMPORTANT!)
    a[0], a[1]: x and y components of a
    b[0], b[1]: x and y components of b
    NOTE: For this to make sense (*), the following should
    be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La,
    where La=(ax[-1]-ax[0]); in other words,
    that "the borders of 'bx' must *not* be very
    different from those of 'ax'."
    """
    ax, ay = a[0,:], a[1,:]
    bx, by = b[0,:], b[1,:]
    by_binned = np.zeros(ax.size, dtype=np.float64)
    for j in range(ax.size):
    if(j==0):
    ax_lo = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros
    cc = bx <= ax_lo # (*)
    elif(j==ax.size-1):
    ax_hi = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos
    cc = bx >= ax_hi # (*)
    else:
    ax_min = 0.5*(ax[j-1] + ax[j])
    ax_max = 0.5*(ax[j] + ax[j+1])
    cc = (bx>ax_min) & (bx<ax_max)
    by_binned[j] = np.nanmean(by[cc])
    return by_binned


    def rebine(ax, bx, by_data, nbef=1, naft=1):
    """
    Rebining of `b` to resolution of `ax`.
    **IMPORTANT**: b[0] is denser than `ax`!
    inputs:
    ax : time domain to which `by_data` will be adapted/rebined/synced.
    bx : time domain of `by_data`.
    by_data : is assumed of shape (:,:) or (:).
    NOTE: For this to make sense (*), the following should
    be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La,
    where La=(ax[-1]-ax[0]); in other words,
    that "the borders of `bx` must *not* be very
    different from those of `ax`."
    """
    nbsh = len(by_data.shape) # should be 1 or 2.
    if nbsh==2:
    nb_col = by_data.shape[1]
    by_binned = np.zeros((ax.size,nb_col), dtype=np.float32)
    by = by_data
    elif nbsh==1:
    by_binned = np.zeros((ax.size,1), dtype=np.float32)
    by = np.reshape(by_data, (by_data.size,1))
    else:
    print " ERROR: len(b.shape) should be 1 or 2."
    raise SystemExit
    #--- first row
    ax_lo = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros
    cc = bx <= ax_lo # (*)
    by_binned[0,:] = np.nanmean(by[cc,:])
    #--- last row
    ax_hi = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos
    cc = bx >= ax_hi # (*)
    by_binned[ax.size-1,:] = np.nanmean(by[cc,:])
    #--- rest of rows
    for j in range(1,ax.size-1):
    ax_min = 0.5*(ax[j-1] + ax[j])
    ax_max = 0.5*(ax[j] + ax[j+1])
    cc = (bx>ax_min) & (bx<ax_max)
    by_binned[j,:] = np.nanmean(by[cc,:])
    # output must have the same no of columns as input 'by'
    if nbsh==1:
    return by_binned[:,0]
    else:
    return by_binned


    def utc2date(t):
    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
    date = date_utc + timedelta(days=(t/86400.))
    return date


    def date2utc(date):
    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
    utcsec = (date - date_utc).total_seconds() # [utc sec]
    return utcsec


    def gps2date(t):
    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
    t_utc = t + 315964800 # [utc sec]
    date = date_utc + timedelta(days=(t_utc/86400.))
    return date


    def date2gps(date):
    date_utc = datetime(1970, 1, 1, 0, 0, 0, 0)
    utcsec = (date - date_utc).total_seconds() # [utc sec]
    gpssec = utcsec - 315964800 # [gps sec]
    return gpssec


    class My2DArray(object):
    """
    wrapper around numpy array with:
    - flexible number of rows
    - records the maximum nrow requested
    - `nrow`: max number of rows queried.
    NOTE:
    This was test for 1D and 2D arrays.
    TODO:
    try with `numpy.array` instead of `object` above to
    make the wrapper more natural.
    """

    def __init__(self, shape, dtype=np.float32):
    self.this = np.empty(shape, dtype=dtype)
    setattr(self, '__array__', self.this.__array__)
    self.nrow = 0

    def resize_rows(self, nx_new=None):
    """ Increment TWICE the size of axis=0, **without**
    losing data.
    """
    sh_new = np.copy(self.this.shape)
    nx = self.this.shape[0]
    if nx_new is None:
    sh_new[0] = 2*sh_new[0]
    elif nx_new<=nx:
    return 0 # nothing to do
    else:
    sh_new[0] = nx_new

    tmp = self.this.copy()
    #print "----> tmp: ", tmp.shape
    new = np.nan*np.ones(sh_new,dtype=self.this.dtype)
    new[:nx] = tmp
    self.this = new
    """
    for some reason (probably due to numpy
    implementation), if we don't do this, the:
    >>> print self.__array__()
    stucks truncated to the original size that was
    set in __init__() time.
    So we need to tell numpy our new resized shape!
    """
    setattr(self, '__array__', self.this.__array__)


    def __get__(self, instance, owner):
    return self.this #.__array__()

    def __getitem__(self, i):
    """
    to retrieve non-trivial content:
    >>> print m[...]
    >>> print m[:]
    to retrieve all the allocated array:
    >>> print m.this[...]
    >>> print m[:-1]
    >>> print m[:-1,:]
    """
    if not(i is Ellipsis or i==slice(None,None,None)):
    return self.this[i]
    else:
    #returns only non-trivial content
    return self.this[:self.nrow]

    def __setitem__(self, i, value):
    """
    We can safely use:
    >>> ma[n:n+m,:] = [...]
    assuming n+m is greater than our size in axis=0.
    NOTE: `i` can be:
    - scalar
    - list
    - tuple of (slice,scalar), (slice,slice),
    (scalar,slice), etc.
    - slice
    """
    # we need to define `stop` to check if we are
    # out of rows.
    if type(i)==slice:
    """
    case:
    >>> m[2:5] = ...
    """
    stop = i.stop
    _ind = i #slice(i.start,i.stop,i.step)
    self.nrow = np.max([stop,self.nrow])

    elif type(i)==tuple and type(i[0])==slice:
    """
    cases:
    >>> m[n:n+m,:] = ... # (*)
    >>> m[n:,:] = ... # (**)
    >>> m[n:,4] = ...
    >>> m[:,:] = ... # (***)
    >>> m[:,4] = ...
    """
    if i[0].start is None and \
    i[0].stop is None and \
    i[0].step is None: # (***)

    _ind = (slice(None,None,None),i[1])
    stop = 0
    self.nrow = np.max([np.size(value,axis=0), self.nrow])

    else:
    stop_aux = i[0].start + np.size(value,axis=0) # (*)
    stop = stop_aux if i[0].stop is None else i[0].stop #(**) & (*)
    _ind = (slice(i[0].start,stop,i[0].step), i[1])
    self.nrow = np.max([stop,self.nrow])

    elif type(i)==tuple and type(i[0]) in (list,int):
    """
    case:
    >>> m[k,:] = ... # type=int
    >>> m[[3,4],:] = ... # type=list
    >>> m[[3,4],k] = ... # type=list
    """
    stop = np.max(i[0]) # TEST
    _ind = i
    self.nrow = np.max([stop+1,self.nrow])

    else:
    raise IndexError(' --> don\'t know how \
    handle %r' % i)

    # check if we are short of rows, in which case, we
    # will double size in axis=0.
    if stop >= self.this.shape[0]:
    nx_new = self.this.shape[0]
    while nx_new<=stop:
    nx_new *= 2
    self.resize_rows(nx_new)

    # finnally assign
    self.this[_ind] = value

    #--- register the maximum nrow requested.
    # NOTE here we are referring to size, and *not* row-index.
    #self.max_nrow_used = stop+1 if stop+1>self.max_nrow_used else self.max_nrow_used

    def __getattr__(self, attnm):
    return getattr(self.this, attnm)


    #EOF
  2. jimsrc revised this gist Nov 23, 2016. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions funcs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    funcs file
  3. jimsrc revised this gist Nov 23, 2016. 1 changed file with 28 additions and 0 deletions.
    28 changes: 28 additions & 0 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,28 @@
    Run for example:
    ```bash
    ./build_long.prof.py -- --years 2006 2015 --fname_inp ./test_iv.h5 --nbin 1200 --obs wAoP_wPrs -fig ./test.png
    ```
    where:

    - `wAoP_wPrs` is one of the available fields. Others are: `wAoP` (i.e. only AoP correction),
    `wAoP_wPrs_wGh` (with AoP, pressure, and geopotential height) ,`aop` (AoP values), etc.

    - `-fig` refers to filename of output figure.

    ---
    For more info on the arguments,
    ```bash
    ./build_long.prof.py -- -h
    ```

    ---
    ## Important:

    Note that, in order to run consistenly, you'll need to place these 2 scripts with this structure:
    ```bash
    |
    |- build_long.prof.py
    |- shared_funcs/
    |- funcs.py
    |- __init__.py #<--- this is an empty file
    ```
  4. jimsrc created this gist Nov 23, 2016.
    160 changes: 160 additions & 0 deletions build_long.prof.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,160 @@
    #!/usr/bin/env ipython
    # -*- coding: utf-8 -*-
    import numpy as np
    from numpy import (
    nanmean, nanstd,
    )
    import argparse, os
    from h5py import File as h5
    from datetime import datetime, timedelta
    from shared_funcs.funcs import (
    My2DArray, date2utc, utc2date, rebine, ncep_gh, nans
    )
    import itertools
    #--- we need X to build the figure
    if 'DISPLAY' in os.environ:
    from pylab import figure, show, close
    else:
    raise SystemExit(
    " >>> ERROR: the DISPLAY environment variable doesn't exist!"
    )

    #--- retrieve args
    parser = argparse.ArgumentParser(
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
    '-y', '--years',
    type=int,
    nargs=2,
    default=[2006,2013],
    help='years for period of data-processing. So that the profile\
    will be built from YearIni/Jan/01 until YearEnd/Dec/31)',
    )
    parser.add_argument(
    '-fid', '--fname_inp',
    type=str,
    default='./test.h5',
    help='filename of .h5 input (contains geopotential-height-corrected Scalers)',
    )
    parser.add_argument(
    '-obs', '--obs',
    type=str,
    nargs='+',
    default='wAoP_wPrs_wGh',
    help='observables to process; can be more than one, and must be one\
    of the keys inside the .h5 file. They will be plotted in the\
    same figure in superposed fashion.',
    )
    parser.add_argument(
    '-nb', '--nbin',
    type=int,
    default=100,
    help='total number of bins for time domain in the long-term profile'
    )
    parser.add_argument(
    '-fig', '--fname_fig',
    type=str,
    default='test.png',
    help='filename for output figure. Or "screen" for interactive plot.'
    )

    pa = parser.parse_args()

    #---
    finp_d = h5(pa.fname_inp,'r')
    date_ini = datetime(pa.years[0], 1, 1)
    date_end = datetime(pa.years[1], 12, 31)
    assert date_end>date_ini, '`date_end` must be greater than `date_ini`!!'

    #---
    nbin = pa.nbin

    class d(object):
    """
    data buffer for each bin of the profile
    """
    def __init__(self):
    self.n = 0 # number of original data points contributing
    self.rates = My2DArray(1, dtype=np.float32)
    self.mean = np.nan
    self.std = np.nan

    #obs = pa.obs #'wAoP_wPrs_wGh'
    utc_ini = date2utc(datetime(1970,1,1,0,0,0))
    ini,end=datetime(pa.years[0],1,1),datetime(pa.years[1],12,31)
    Dt = (end-ini).total_seconds()
    dt = Dt/nbin

    # list of all pairs (year,month) of interest
    YYYY_MM = list(itertools.product(
    range(pa.years[0],pa.years[1]+1,1),
    range(1,12+1,1),
    ))

    dd_ = {} # several dd's
    for obs in pa.obs:
    # buffer for each observable
    dd = [ d() for i in range(nbin) ]
    for yyyy, mm in YYYY_MM:
    rpath='%04d/%02d' % (yyyy,mm)
    tutc = finp_d['t_utc/'+rpath][...] # [utc sec]
    r = finp_d[obs+'/'+rpath][...] # rate
    ind = np.array((tutc-date2utc(ini))/dt, dtype=int) # (*)
    ind_list = np.unique(ind) # (**)
    print yyyy, mm, ind_list
    if ind_list.size>1:
    assert ind_list[0]<ind_list[1],"wrong."# consistency
    for i in ind_list:
    cc = ind==i # collect all with similar "time bins"
    ri = r[cc] # rates of this i-block
    n = dd[i].n
    dd[i].n += ri.size
    dd[i].rates.resize_rows(n+ri.size)
    dd[i].rates[n:n+ri.size] = ri

    if len(ind_list)==1: # [likely]
    continue
    else: # [unlikely]
    for j in ind_list[:-1]:
    n = dd[j].n
    dd[j].rates = dd[j].rates[:n]
    dd[j].mean = nanmean(dd[j].rates)
    dd[j].std = nanstd(dd[j].rates)
    dd[j].rates = None # delete data
    dd_[obs] = dd
    """
    (*): list of integers corresponding to the nearest integer value
    of the utc-sec discretized in units of `dt` time-windows.
    For example, taking utc-sec="equivalent to 09/nov/2008 05:40",
    `ini=datetime(2008,nov,1,0,0)`, and `dt=1day` results in
    an `ind` value of 9.
    (**): to collect all with similar `ind` values
    """

    # build the figure
    do = {}
    fig = figure(1, figsize=(8,4))
    ax = fig.add_subplot(111)
    for obs in pa.obs:
    prof = np.array([dd_[obs][n].mean for n in range(nbin)])
    t, r = np.arange(prof.size)*dt/(86400.*365), 100.*prof
    do[obs] = np.array([t,r]).T
    ax.plot(t, r, '-o', ms=2, alpha=.7, label=obs)
    np.savetxt('test_%s.txt'%obs,do[obs],fmt='%12.2f')

    ax.legend(loc='best')
    ax.grid(True)
    ax.set_xlabel('years since '+date_ini.strftime('%b/%Y'))
    ax.set_ylabel('count rate')
    if pa.fname_fig=='screen':
    print " >>> showing interactively..."
    show()
    else:
    fig.savefig(pa.fname_fig, dpi=135, bbox_inches='tight')
    print "\n ---> we generated: "+pa.fname_fig

    close(fig)


    #EOF