Last active
November 23, 2016 18:36
-
-
Save jimsrc/d9c3ba19f9808d002fc9c37fee65a15e to your computer and use it in GitHub Desktop.
Revisions
-
jimsrc revised this gist
Nov 23, 2016 . 1 changed file with 394 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1 +1,394 @@ #!/usr/bin/env ipython # -*- coding: utf-8 -*- import numpy as np import os, sys from scipy.io.netcdf import netcdf_file from h5py import File as h5 from numpy import ( loadtxt, size, mean, isnan, array, ones, zeros, empty, nanmean, nanmedian, nanstd, concatenate, ) from glob import glob from os.path import isfile, isdir from datetime import datetime, timedelta from scipy.interpolate import ( splrep, # Spline interpolation splev) # Spline evaluator import argparse if 'DISPLAY' in os.environ: from pylab import pause, find #--- globals HOME = os.environ['HOME'] class arg_to_datetime(argparse.Action): """ argparse-action to handle command-line arguments of the form "dd/mm/yyyy" (string type), and converts it to datetime object. NOTE: can be used even when `nargs`>1. """ def __init__(self, option_strings, dest, **kwargs): #if nargs is not None: # raise ValueError("nargs not allowed") super(arg_to_datetime, self).__init__(option_strings, dest, **kwargs) def __call__(self, parser, namespace, values, option_string=None): #print '%r %r %r' % (namespace, values, option_string) if len(values)==1: dd,mm,yyyy = map(int, values.split('/')) value = datetime(yyyy,mm,dd) elif len(values)>1: value = [] for val in values: dd,mm,yyyy = map(int, val.split('/')) value += [ datetime(yyyy,mm,dd) ] setattr(namespace, self.dest, value) class arg_to_utcsec(argparse.Action): """ argparse-action to handle command-line arguments of the form "dd/mm/yyyy" (string type), and converts it to UTC-seconds. """ def __init__(self, option_strings, dest, nargs=None, **kwargs): if nargs is not None: raise ValueError("nargs not allowed") super(arg_to_utcsec, self).__init__(option_strings, dest, **kwargs) def __call__(self, parser, namespace, values, option_string=None): #print '%r %r %r' % (namespace, values, option_string) dd,mm,yyyy = map(int, values.split('/')) value = (datetime(yyyy,mm,dd)-datetime(1970,1,1)).total_seconds() setattr(namespace, self.dest, value) def nans(shape, dtype=np.float32): return np.nan*np.ones(shape, dtype=dtype) def ncep_gh(yyyy, mm, level=None, dir_src=None, sync=True, tsync=None): """ input: tsync: in utc-sec """ if dir_src is None: dir_src='../out/out.build_weather/ncep' #HOME+'/data_gdas' if level is None: lname = 'level_%04d'%100 # level in [hPa] fname_inp = dir_src+'/test_%04d.h5'%yyyy assert isfile(fname_inp), ' --> NO EXISTE: '+fname_inp f = h5(fname_inp,'r') dpath = lname + '/%04d-%02d'%(yyyy,mm) #print " ---> ", f[dpath].keys() g_t = f[dpath+'/utcsec'].value # utc sec g_h = f[dpath+'/h'].value # geop height if sync: tck = splrep(g_t, g_h, s=0) gh = splev(tsync, tck, der=0) # der: order of derivative to comp return gh else: return g_h def equi_days(dini, dend, n): """ returns most equi-partitioned tuple of number of days between date-objects 'dini' and 'dend' """ days = (dend - dini).days days_part = np.zeros(n, dtype=np.int) resid = np.mod(days, n) for i in range(n-resid): days_part[i] = days/n # last positions where I put residuals last = np.arange(start=-1,stop=-resid-1,step=-1) for i in last: days_part[i] = days/n+1 assert np.sum(days_part)==days, \ " --> somethng went wrong! :/ " return days_part def equi_dates(dini, dend, n): """ returns: - inis: numpy-array-of-dates, of size (n+1); where, - inis[:-1]: 'n' start-dates of most equi-partitioned contiguous blocks of dates between 'dini' and 'end' (object dates). - inis[-1]: 'dend' (max date of whole block) """ days_part = equi_days(dini, dend, n) inis = np.empty(n+1, dtype=datetime) inis[0] = dini inis[-1] = dend for i in range(1, inis.size-1): inis[i] = inis[i-1] + timedelta(days=days_part[i-1]) return inis def equi_dates_RoundMonth(dini,dend,n): """ same as equi_dates() but rounded at 01/month """ assert dini.day==1, " ERROR: dini.day must be 1." inis = equi_dates(dini,dend,n) inis_ = np.empty(n+1, dtype=datetime) inis_[-1] = dend for i, i_ in zip(inis, range(inis_.size-1)): inis_[i_] = datetime(i.year,i.month,1) return inis_ def rebine_mats(a, b): # original version """ Rebining of 'b' to resolution of 'a'. where: b[0] is denser than a[0] (IMPORTANT!) a[0], a[1]: x and y components of a b[0], b[1]: x and y components of b NOTE: For this to make sense (*), the following should be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La, where La=(ax[-1]-ax[0]); in other words, that "the borders of 'bx' must *not* be very different from those of 'ax'." """ ax, ay = a[0,:], a[1,:] bx, by = b[0,:], b[1,:] by_binned = np.zeros(ax.size, dtype=np.float64) for j in range(ax.size): if(j==0): ax_lo = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros cc = bx <= ax_lo # (*) elif(j==ax.size-1): ax_hi = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos cc = bx >= ax_hi # (*) else: ax_min = 0.5*(ax[j-1] + ax[j]) ax_max = 0.5*(ax[j] + ax[j+1]) cc = (bx>ax_min) & (bx<ax_max) by_binned[j] = np.nanmean(by[cc]) return by_binned def rebine(ax, bx, by_data, nbef=1, naft=1): """ Rebining of `b` to resolution of `ax`. **IMPORTANT**: b[0] is denser than `ax`! inputs: ax : time domain to which `by_data` will be adapted/rebined/synced. bx : time domain of `by_data`. by_data : is assumed of shape (:,:) or (:). NOTE: For this to make sense (*), the following should be true: (bx[0]-ax[0])<<La && (bx[-1]-ax[-1])<<La, where La=(ax[-1]-ax[0]); in other words, that "the borders of `bx` must *not* be very different from those of `ax`." """ nbsh = len(by_data.shape) # should be 1 or 2. if nbsh==2: nb_col = by_data.shape[1] by_binned = np.zeros((ax.size,nb_col), dtype=np.float32) by = by_data elif nbsh==1: by_binned = np.zeros((ax.size,1), dtype=np.float32) by = np.reshape(by_data, (by_data.size,1)) else: print " ERROR: len(b.shape) should be 1 or 2." raise SystemExit #--- first row ax_lo = 0.5*(ax[0]+ax[1]) # semisuma de los 2 primeros cc = bx <= ax_lo # (*) by_binned[0,:] = np.nanmean(by[cc,:]) #--- last row ax_hi = 0.5*(ax[-2]+ax[-1]) # semisuma de los 2 ultimos cc = bx >= ax_hi # (*) by_binned[ax.size-1,:] = np.nanmean(by[cc,:]) #--- rest of rows for j in range(1,ax.size-1): ax_min = 0.5*(ax[j-1] + ax[j]) ax_max = 0.5*(ax[j] + ax[j+1]) cc = (bx>ax_min) & (bx<ax_max) by_binned[j,:] = np.nanmean(by[cc,:]) # output must have the same no of columns as input 'by' if nbsh==1: return by_binned[:,0] else: return by_binned def utc2date(t): date_utc = datetime(1970, 1, 1, 0, 0, 0, 0) date = date_utc + timedelta(days=(t/86400.)) return date def date2utc(date): date_utc = datetime(1970, 1, 1, 0, 0, 0, 0) utcsec = (date - date_utc).total_seconds() # [utc sec] return utcsec def gps2date(t): date_utc = datetime(1970, 1, 1, 0, 0, 0, 0) t_utc = t + 315964800 # [utc sec] date = date_utc + timedelta(days=(t_utc/86400.)) return date def date2gps(date): date_utc = datetime(1970, 1, 1, 0, 0, 0, 0) utcsec = (date - date_utc).total_seconds() # [utc sec] gpssec = utcsec - 315964800 # [gps sec] return gpssec class My2DArray(object): """ wrapper around numpy array with: - flexible number of rows - records the maximum nrow requested - `nrow`: max number of rows queried. NOTE: This was test for 1D and 2D arrays. TODO: try with `numpy.array` instead of `object` above to make the wrapper more natural. """ def __init__(self, shape, dtype=np.float32): self.this = np.empty(shape, dtype=dtype) setattr(self, '__array__', self.this.__array__) self.nrow = 0 def resize_rows(self, nx_new=None): """ Increment TWICE the size of axis=0, **without** losing data. """ sh_new = np.copy(self.this.shape) nx = self.this.shape[0] if nx_new is None: sh_new[0] = 2*sh_new[0] elif nx_new<=nx: return 0 # nothing to do else: sh_new[0] = nx_new tmp = self.this.copy() #print "----> tmp: ", tmp.shape new = np.nan*np.ones(sh_new,dtype=self.this.dtype) new[:nx] = tmp self.this = new """ for some reason (probably due to numpy implementation), if we don't do this, the: >>> print self.__array__() stucks truncated to the original size that was set in __init__() time. So we need to tell numpy our new resized shape! """ setattr(self, '__array__', self.this.__array__) def __get__(self, instance, owner): return self.this #.__array__() def __getitem__(self, i): """ to retrieve non-trivial content: >>> print m[...] >>> print m[:] to retrieve all the allocated array: >>> print m.this[...] >>> print m[:-1] >>> print m[:-1,:] """ if not(i is Ellipsis or i==slice(None,None,None)): return self.this[i] else: #returns only non-trivial content return self.this[:self.nrow] def __setitem__(self, i, value): """ We can safely use: >>> ma[n:n+m,:] = [...] assuming n+m is greater than our size in axis=0. NOTE: `i` can be: - scalar - list - tuple of (slice,scalar), (slice,slice), (scalar,slice), etc. - slice """ # we need to define `stop` to check if we are # out of rows. if type(i)==slice: """ case: >>> m[2:5] = ... """ stop = i.stop _ind = i #slice(i.start,i.stop,i.step) self.nrow = np.max([stop,self.nrow]) elif type(i)==tuple and type(i[0])==slice: """ cases: >>> m[n:n+m,:] = ... # (*) >>> m[n:,:] = ... # (**) >>> m[n:,4] = ... >>> m[:,:] = ... # (***) >>> m[:,4] = ... """ if i[0].start is None and \ i[0].stop is None and \ i[0].step is None: # (***) _ind = (slice(None,None,None),i[1]) stop = 0 self.nrow = np.max([np.size(value,axis=0), self.nrow]) else: stop_aux = i[0].start + np.size(value,axis=0) # (*) stop = stop_aux if i[0].stop is None else i[0].stop #(**) & (*) _ind = (slice(i[0].start,stop,i[0].step), i[1]) self.nrow = np.max([stop,self.nrow]) elif type(i)==tuple and type(i[0]) in (list,int): """ case: >>> m[k,:] = ... # type=int >>> m[[3,4],:] = ... # type=list >>> m[[3,4],k] = ... # type=list """ stop = np.max(i[0]) # TEST _ind = i self.nrow = np.max([stop+1,self.nrow]) else: raise IndexError(' --> don\'t know how \ handle %r' % i) # check if we are short of rows, in which case, we # will double size in axis=0. if stop >= self.this.shape[0]: nx_new = self.this.shape[0] while nx_new<=stop: nx_new *= 2 self.resize_rows(nx_new) # finnally assign self.this[_ind] = value #--- register the maximum nrow requested. # NOTE here we are referring to size, and *not* row-index. #self.max_nrow_used = stop+1 if stop+1>self.max_nrow_used else self.max_nrow_used def __getattr__(self, attnm): return getattr(self.this, attnm) #EOF -
jimsrc revised this gist
Nov 23, 2016 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1 @@ funcs file -
jimsrc revised this gist
Nov 23, 2016 . 1 changed file with 28 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,28 @@ Run for example: ```bash ./build_long.prof.py -- --years 2006 2015 --fname_inp ./test_iv.h5 --nbin 1200 --obs wAoP_wPrs -fig ./test.png ``` where: - `wAoP_wPrs` is one of the available fields. Others are: `wAoP` (i.e. only AoP correction), `wAoP_wPrs_wGh` (with AoP, pressure, and geopotential height) ,`aop` (AoP values), etc. - `-fig` refers to filename of output figure. --- For more info on the arguments, ```bash ./build_long.prof.py -- -h ``` --- ## Important: Note that, in order to run consistenly, you'll need to place these 2 scripts with this structure: ```bash | |- build_long.prof.py |- shared_funcs/ |- funcs.py |- __init__.py #<--- this is an empty file ``` -
jimsrc created this gist
Nov 23, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,160 @@ #!/usr/bin/env ipython # -*- coding: utf-8 -*- import numpy as np from numpy import ( nanmean, nanstd, ) import argparse, os from h5py import File as h5 from datetime import datetime, timedelta from shared_funcs.funcs import ( My2DArray, date2utc, utc2date, rebine, ncep_gh, nans ) import itertools #--- we need X to build the figure if 'DISPLAY' in os.environ: from pylab import figure, show, close else: raise SystemExit( " >>> ERROR: the DISPLAY environment variable doesn't exist!" ) #--- retrieve args parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( '-y', '--years', type=int, nargs=2, default=[2006,2013], help='years for period of data-processing. So that the profile\ will be built from YearIni/Jan/01 until YearEnd/Dec/31)', ) parser.add_argument( '-fid', '--fname_inp', type=str, default='./test.h5', help='filename of .h5 input (contains geopotential-height-corrected Scalers)', ) parser.add_argument( '-obs', '--obs', type=str, nargs='+', default='wAoP_wPrs_wGh', help='observables to process; can be more than one, and must be one\ of the keys inside the .h5 file. They will be plotted in the\ same figure in superposed fashion.', ) parser.add_argument( '-nb', '--nbin', type=int, default=100, help='total number of bins for time domain in the long-term profile' ) parser.add_argument( '-fig', '--fname_fig', type=str, default='test.png', help='filename for output figure. Or "screen" for interactive plot.' ) pa = parser.parse_args() #--- finp_d = h5(pa.fname_inp,'r') date_ini = datetime(pa.years[0], 1, 1) date_end = datetime(pa.years[1], 12, 31) assert date_end>date_ini, '`date_end` must be greater than `date_ini`!!' #--- nbin = pa.nbin class d(object): """ data buffer for each bin of the profile """ def __init__(self): self.n = 0 # number of original data points contributing self.rates = My2DArray(1, dtype=np.float32) self.mean = np.nan self.std = np.nan #obs = pa.obs #'wAoP_wPrs_wGh' utc_ini = date2utc(datetime(1970,1,1,0,0,0)) ini,end=datetime(pa.years[0],1,1),datetime(pa.years[1],12,31) Dt = (end-ini).total_seconds() dt = Dt/nbin # list of all pairs (year,month) of interest YYYY_MM = list(itertools.product( range(pa.years[0],pa.years[1]+1,1), range(1,12+1,1), )) dd_ = {} # several dd's for obs in pa.obs: # buffer for each observable dd = [ d() for i in range(nbin) ] for yyyy, mm in YYYY_MM: rpath='%04d/%02d' % (yyyy,mm) tutc = finp_d['t_utc/'+rpath][...] # [utc sec] r = finp_d[obs+'/'+rpath][...] # rate ind = np.array((tutc-date2utc(ini))/dt, dtype=int) # (*) ind_list = np.unique(ind) # (**) print yyyy, mm, ind_list if ind_list.size>1: assert ind_list[0]<ind_list[1],"wrong."# consistency for i in ind_list: cc = ind==i # collect all with similar "time bins" ri = r[cc] # rates of this i-block n = dd[i].n dd[i].n += ri.size dd[i].rates.resize_rows(n+ri.size) dd[i].rates[n:n+ri.size] = ri if len(ind_list)==1: # [likely] continue else: # [unlikely] for j in ind_list[:-1]: n = dd[j].n dd[j].rates = dd[j].rates[:n] dd[j].mean = nanmean(dd[j].rates) dd[j].std = nanstd(dd[j].rates) dd[j].rates = None # delete data dd_[obs] = dd """ (*): list of integers corresponding to the nearest integer value of the utc-sec discretized in units of `dt` time-windows. For example, taking utc-sec="equivalent to 09/nov/2008 05:40", `ini=datetime(2008,nov,1,0,0)`, and `dt=1day` results in an `ind` value of 9. (**): to collect all with similar `ind` values """ # build the figure do = {} fig = figure(1, figsize=(8,4)) ax = fig.add_subplot(111) for obs in pa.obs: prof = np.array([dd_[obs][n].mean for n in range(nbin)]) t, r = np.arange(prof.size)*dt/(86400.*365), 100.*prof do[obs] = np.array([t,r]).T ax.plot(t, r, '-o', ms=2, alpha=.7, label=obs) np.savetxt('test_%s.txt'%obs,do[obs],fmt='%12.2f') ax.legend(loc='best') ax.grid(True) ax.set_xlabel('years since '+date_ini.strftime('%b/%Y')) ax.set_ylabel('count rate') if pa.fname_fig=='screen': print " >>> showing interactively..." show() else: fig.savefig(pa.fname_fig, dpi=135, bbox_inches='tight') print "\n ---> we generated: "+pa.fname_fig close(fig) #EOF