#!/usr/bin/python from IPython.Debugger import Tracer; debug_here = Tracer() # Shit. temp don't have it installed!!! #import rpy2.robjects as robjects import os import re from copy import deepcopy #import matplotlib.pyplot as plt import numpy as np import matplotlib as mpl # Not yet used, may 2010 # WHAT!? matplotlib.pyplot is not the same as pylab!! #import matplotlib.pyplot as plt import pylab as plt #from pylab import * # Why not define these lobally? June 2010 from pylab import figure,plot,ylim,xlim,setp,clf,array,isnan,nan,find,text,isfinite,xlabel,ylabel,title,arange,subplot,gca NaN=nan # Defaults only for colours so far: if 1: #import cpblDefaults #defaults=cpblDefaults.defaults() import cifarColours cifar=cifarColours.defcolours() """ Solutions for bounding box / whitespace in output figures: plt.subplots_adjust(left = 0.05,right=1-tiny,bottom=0.1,top=1-tiny) # BRILLIANT!!! USE subplot_tool() to find values! """ print 'fyi: __See figureFontSetup() for plot settings (cpblUtilMathGraph)' ##################################################################################### def overplotLinFit(x,y,format=None,color=None,xscalelog=False): """ March 2010: upgrading it so that the fit line doesn't extrapolate beyond the x,y range of data. """ if format==None: format='k--' if color==None: color=cifar['grey'] from pylab import plot,array,xlim,any if all(isnan(x)): return() if 0: # Use quick method, no standard errors from rpy2 import r ls_fit = r.lsfit(x,y) gradient = ls_fit['coefficients']['X'] yintercept= ls_fit['coefficients']['Intercept'] xr=array([max(min(x),min(xlim())),min(max(x),max(xlim()))]) yr=[yintercept+gradient*xr[0],yintercept+gradient*xr[1]] plot(xr,yr,format,color=color) return(gradient) if xscalelog: x=np.log(x) if 1: # FIT A LINE TO THE NON-NAN ELEMENTS, OVERPLOT IT from scipy import stats as stats from pylab import where,isfinite,logical_and ii=where(logical_and(isfinite(x),isfinite(y))) ( gradient, yintercept, rrr, twoTailedProb, stderr)=stats.linregress(x[ii],y[ii]) s=stderr else: import rpy2.robjects as robjects rpy = robjects.r #import rpy2 as rpy rpy.set_default_mode(rpy.NO_CONVERSION) if xscalelog: x=np.log(x) linear_model = rpy.r.lm(rpy.r("y ~ x"), data = rpy.r.data_frame(x=x, y=y)) rpy.set_default_mode(rpy.BASIC_CONVERSION) gradient=linear_model.as_py()['coefficients']['x'] yintercept=linear_model.as_py()['coefficients']['(Intercept)'] s=rpy.r.summary(linear_model)['sigma'] print ' b: ',gradient #{'x': 5.3935773611970212, '(Intercept)': -16.281127993087839} print ' sigma: ',s #{'terms': , 'fstatistic': {'dendf': 2.0, 'value': 2.2088097871524752, 'numdf': 1.0}, 'aliased': {'x': False, '(Intercept)': False}, 'df': [2, 2, 2], 'call': , 'residuals': {'1': -9.3064376809571137, '3': -6.9622553363545983, '2': 6.3744808050079511, '4': 9.8942122123037599}, 'adj.r.squared': 0.28720941270437206, 'cov.unscaled': array([[ 2.1286381 , -0.42527178], [-0.42527178, 0.09626979]]), 'r.squared': 0.524806275136248, 'sigma': 11.696414461570097, 'coefficients': array([[-16.28112799, 17.06489672, -0.95407129, 0.44073772], [ 5.39357736, 3.62909012, 1.48620651, 0.27556486]])} """ import rpy2.robjects as robjects r = robjects.r # Create the data by passing a Python list to RPy2, which interprets as an R vector ctl = robjects.FloatVector(x) trt = robjects.FloatVector(y) group = r.gl(2, 10, 20, labels = ["Ctl","Trt"]) weight = ctl + trt # RPy2 uses Python dictionary types to index data robjects.globalEnv["weight"] = weight robjects.globalEnv["group"] = group # Run the models lm_D9 = r.lm("weight ~ group") print(r.anova(lm_D9)) lm_D90 = r.lm("weight ~ group - 1") print(r.summary(lm_D90)) """ # Limit extent of the plotted line to the x-extent of non-nan points xxx=[xx for ix,xx in enumerate(x) if isfinite(xx) and isfinite(y[ix])] lxlim=xlim() if xscalelog: lxlim=log(lxlim) xr=array([max(min(xxx),min(lxlim)),min(max(xxx),max(lxlim))]) yr=[yintercept+gradient*xr[0],yintercept+gradient*xr[1]] if xscalelog: xr=np.exp(xr) aline=plot(xr,yr,format,color=color) return(aline,gradient,s) # The following taken from skipper of pystatsmodels... because it allows for errorbars. so incorporate it into mine, above. def linfit2D(y, x=None, y_unc=None): import numpy as np import scipy.special as ss """ Fits a line to 2D data, optionally with errors in y. The method is robust to roundoff error. Parameters ---------- y: ndarray Ordinates, any shape. x: ndarray Abcissas, same shape. Defaults to np.indices(y.length)) y_unc: ndarray Uncertainties in y. If scalar or 1-element array, applied uniformly to all y values. [NOT IMPLEMENTED YET!] Must be positive. Returns ------- a: scalar 0 Fitted intercept b: scalar 1 Fitted slope a_unc: scalar 2 Uncertainty of fitted intercept b_unc: scalar 3 Uncertainty of fitted slope chisq: scalar 4 Chi-squared prob: scalar 5 Probability of finding worse Chi-squared for this model with these uncertainties. covar: ndarray 6 Covariance matrix: [[a_unc**2, covar_ab], [covar_ab, b_unc**2]] yfit: ndarray 7 Model array calculated for our abcissas Notes ----- If prob > 0.1, you can believe the fit. If prob > 0.001 and the errors are not Gaussian, you could believe the fit. Otherwise do not believe it. See Also -------- Press, et al., Numerical Recipes in C, 2nd ed, section 15.2, or any standard data analysis text. Examples -------- >>> import linfit >>> a = 1. >>> b = 2. >>> nx = 10 >>> x = np.arange(10, dtype='float') >>> y = a + b * x >>> y_unc = numpy.ones(nx) >>> y[::2] += 1 >>> y[1::2] -= 1 >>> a, b, sa, sb, chisq, prob, covar, yfit = linfit.linfit(y, x, y_unc) >>> print(a, b, sa, sb, chisq, prob, covar, yfit) (1.272727272727272, 1.9393939393939394, 0.58775381364525869, 0.11009637651263605, 9.6969696969696937, 0.28694204178663996, array([[ 0.34545455, -0.05454545], [-0.05454545, 0.01212121]]), array([ 1.27272727, 3.21212121, 5.15151515, 7.09090909, 9.03030303, 10.96969697, 12.90909091, 14.84848485, 16.78787879, 18.72727273])) Revisons -------- 2007-09-23 0.1 jh@... Initial version 2007-09-25 0.2 jh@... Fixed bug reported by Kevin Stevenson. 2008-10-09 0.3 jh@... Fixed doc bug. 2009-10-01 0.4 jh@... Updated docstring, imports. """ # standardize and test inputs if x == None: x = np.indices(y.length, dtype=y.dtype) x.shape = y.shape if y_unc == None: y_unc = np.ones(y.shape, dtype=y.dtype) # NR Eq. 15.2.4 ryu2 = 1. / y_unc**2 S = np.sum(1. * ryu2) Sx = np.sum(x * ryu2) Sy = np.sum(y * ryu2) # Sxx = np.sum(x**2 * ryu2) # not used in the robust method # Sxy = np.sum(x * y * ryu2) # not used in the robust method # NR Eq. 15.2.15 - 15.2.18 (i.e., the robust method) t = 1. / y_unc * (x - Sx / S) Stt = np.sum(t**2) b = 1. / Stt * np.sum(t * y / y_unc) a = (Sy - Sx * b) / S covab = -Sx / (S * Stt) # NR Eq. 15.2.21 sa = np.sqrt(1. / S * (1. - Sx * covab)) # NR Eq. 15.2.19 sb = np.sqrt(1. / Stt) # NR Eq. 15.2.20 rab = covab / (sa * sb) # NR Eq. 15.2.22 covar = np.array([[sa**2, covab], [covab, sb**2]]) yfit = a + b * x chisq = np.sum( ((y - yfit) / y_unc)**2 ) prob = 1. - ss.gammainc( (y.size - 2.) / 2., chisq / 2.) return a, b, sa, sb, chisq, prob, covar, yfit ############################################################################## ############################################################################## # def cpblOLS(y,xs,betacoefs=False,rhsOnly=None): ########################################################################## ########################################################################## """ I want to have my OLS routine: - take a list of vectors - get heteroskedasticity-robust errors back - have option to get standardized beta coeffcients rathr than raw b - ignore NaNs - take sample weights. (WLS) So xs can/should be a dict of named vectors. Actually, here's my preferred calling format: yname,xnames, dataDict but dataDict should be able to be a list of dicts or a dict of named vectors. sep 2010: this failed, ie is incompelte, since I can't see how to do weights with it. """ import numpy as np import scikits.statsmodels as sm # Not yet flexi calling format: assert isinstance(y,str) #assert isinstance(x,list) #assert isinstance(x[0],str) assert isinstance(xs,dict) dataDict=xs Y=dataDict.pop(y) # Remove y from the data to be used for x. if not rhsOnly: rhsOnly=dataDict.keys()#list(set(x.keys())-set([y])) # get data if betacoefs: X = sm.tools.add_constant(np.column_stack([(xs[kk]-np.mean(finiteValues(xs[kk])))/np.std(finiteValues(xs[kk])) for kk in rhsOnly])) else: X = sm.tools.add_constant(np.column_stack([xs[kk] for kk in rhsOnly])) #beta = np.array([1, 0.1, 10]) #y = np.dot(X, beta) + np.random.normal(size=nsample) # run the regression results = sm.OLS(Y, X,weights=Y).fit() foiu # look at the results #print results.summary() #and look at `dir(results)` to see some of the results #that are available return(bs,rses) ############################################################################## ############################################################################## # def _obselete_use_savefigall_exportPythonFigToFormats(fileName): ########################################################################## ########################################################################## from pylab import savefig for ff in ['pdf','png','eps']: # Should still develop this to get rid of borders! savefig(fileName+'.'+ff,format=ff,transparent=True) ############################################################################## ############################################################################## # def savefigall(fn,transparent=False,ifany=None,fig=None,skipIfExists=False,pauseForMissing=True,onlyPNG=False,jpeg=False,jpeghi=False,svg=False,pdf=True): ########################################################################## ########################################################################## """ Like savefig, but saves in multiple formats; make this standard April 2010: adding transaprent option for png only. (right now it's automatic for pdf) ifany must be an artist subclass taken by matplotlib.findobj(match=) Do not forget to use cpblStata's saveAndIncludeFig(self,figname=None,caption=None,texwidth=None,title=None,onlyPNG=False) if you're using a cpbl latex object! Sept 2010: skipIfExists is a way to skip saving if the file already exists (ie to save time). Sept 2010: Returns written (or existing) file stems. April 2011: what?! this has evolved to have dissimilar options from my latex class's saveAndIncludeFig() ! # saveAndIncludeFig(self,figname=None,caption=None,texwidth=None,title=None,onlyPNG=False,rcparams=None,transparent=False): # Okay. April 2011: I've hopefully implemented things so can use latex.saveAnd... with the above options. """ if fig==None: fig=plt.gcf() (root,tail)=os.path.split(fn) if not root: try: from cpblDefaults import defaults, paths root=paths['graphics']#'/home/cpbl/rdc/workingData/graphics/'#defaults['workingPath']+'graphics/'#'graphicsPath' except: root='./'#/home/cpbl/rdc/graphicsOuttest/graphics/'#defaults['workingPath']+'graphics/'#'graphicsPath' if root and not root.endswith('/'): root+='/' from cpblUtilities import str2pathname tail=str2pathname(tail) # Get rid of any punctuation in the name. if 0: print 'haha kludge skipping !!!!!!!!!!!!!!!!!!!!!!!! ',root,tail return(root+tail) if skipIfExists and os.path.exists(root+tail+'.png') and os.path.exists(root+tail+'.pdf'): print ' Skipping production of '+root+tail+'(png/pdf) because it already exists [skipIfExists]' return(root+tail) if ifany: if not plt.findobj(match=ifany): print ' savefigall: Empty plot (no %s), so not saving %s.'%(str(ifany),root+tail) plt.savefig(root+tail+'.png.FAILED',format='png') if pauseForMissing: plt.show() from cpblUtilities import cwarning cwarning(' savefigall: Empty plot (no %s), so not saving %s.'%(str(ifany),root+tail)) return(None) print 'Saving graphics: '+root+tail+' (+ext)' if transparent: fig.savefig(root+tail+'.png',transparent=transparent) else: fig.savefig(root+tail+'.png') #savefig(root+tail+'_t.png',transparent=True) #savefig(root+tail+'.pdf') if not onlyPNG and pdf: plt.savefig(root+tail+'.pdf',transparent=transparent) if svg: plt.savefig(root+tail+'.svg',transparent=transparent) if jpeg: # jpeg Not supported by pylab!! #plt.savefig(root+tail+'.jpeg')#transparent=True) os.system('convert '+ root+tail+'.png '+root+tail+'.jpeg') if jpeghi: os.system('convert -quality 100 '+ root+tail+'.png '+root+tail+'.jpeg') return(root+tail) def xTicksLogIncome_deprecated_USE_xTicksIncome(nticks=7,natlog=False,tenlog=False,kticks=None): """ For any plot in which the abscissa is log10(income) [set log10=True in this case] or log(income) , you can use this to make more readable tick points and labels. You can specify the income ticks in k$ in kticks. one of natlog or tenlog should be chose, until 2012 when I can set natlog=True as default, above. """ assert natlog or tenlog assert not natlog or not tenlog base,flog=10.0,np.log10 if natlog: base,flog=np.e,np.log xl=pow(base,plt.array(plt.xlim()))/1000 ones=[ik for ik in range(5,200,1) if ik<=xl[1] and ik>=xl[0]] fives=[ik for ik in range(5,200,5) if ik<=xl[1] and ik>=xl[0]] tens=[ik for ik in range(10,200,10) if ik<=xl[1] and ik>=xl[0]] oo=[ones,fives,tens] nn=[abs(len(ooo)-nticks) for ooo in oo] choice=oo[[inn for inn in range(len(nn)) if nn[inn]==min(nn)][0]] if kticks: choice=kticks plt.setp(plt.gca(),'xticks',[flog(cc*1000.0) for cc in choice]) plt.setp(plt.gca(),'xticklabels',[str(cc)+'k' for cc in choice]) return def xTicksIncome(nticks=7,natlog=False,tenlog=False,kticks=None,dollarsign=True): """ Oct 2011: generalise above to deal with non-log? For any plot in which the abscissa is log10(income) [set log10=True in this case] or log(income) , you can use this to make more readable tick points and labels. You can specify the income ticks in k$ in kticks. one of natlog or tenlog should be chose, until 2012 when I can set natlog=True as default, above. [not anymore, unless I make a new log=False argument] """ #assert natlog or tenlog assert not natlog or not tenlog base,flog=10.0,np.log10 if natlog: base,flog=np.e,np.log if natlog or tenlog: xl=pow(base,plt.array(plt.xlim()))/1000 ones=[ik for ik in range(5,200,1) if ik<=xl[1] and ik>=xl[0]] fives=[ik for ik in range(5,200,5) if ik<=xl[1] and ik>=xl[0]] tens=[ik for ik in range(10,200,10) if ik<=xl[1] and ik>=xl[0]] oo=[ones,fives,tens] nn=[abs(len(ooo)-nticks) for ooo in oo] choice=oo[[inn for inn in range(len(nn)) if nn[inn]==min(nn)][0]] if kticks: choice=kticks plt.setp(plt.gca(),'xticks',[flog(cc*1000.0) for cc in choice]) plt.setp(plt.gca(),'xticklabels',[str(cc)+'k' for cc in choice]) else: #Linear values: just put a dollar sign and take off the thousands.? xl=plt.array(plt.xlim())/1000.0 ones=[ik for ik in range(5,200,1) if ik<=xl[1] and ik>=xl[0]] fives=[ik for ik in range(5,200,5) if ik<=xl[1] and ik>=xl[0]] tens=[ik for ik in range(10,200,10) if ik<=xl[1] and ik>=xl[0]] oo=[ones,fives,tens] nn=[abs(len(ooo)-nticks) for ooo in oo] choice=oo[[inn for inn in range(len(nn)) if nn[inn]==min(nn)][0]] if kticks: choice=kticks plt.setp(plt.gca(),'xticks',[cc*1000.0 for cc in choice]) plt.setp(plt.gca(),'xticklabels',[r'\$'*dollarsign+str(cc)+'k' for cc in choice]) return ############################################################################## ############################################################################## # def stdev(vals): ########################################################################## ########################################################################## """ the available std does not give 0 if there's only one value """ from pylab import std if len(vals)>1: return(std(vals)) elif len(vals)>0: return(0.) else: return (float('nan')) ############################################################################## ############################################################################## # from numpy import mean # This one deals with no values!, so overwrite python's default mean.. #def mean(vals): # ########################################################################## # ########################################################################## # """ the available mean cannot deal with no values # """ # import pylab # if len(vals)>0: # return(np.mean(vals)) # else: # return(float('nan')) ############################################################################## ############################################################################## # def seMean(vals, ses): # A Weighted mean: weighted by standard errors # Takes a simple list of estimates and a simple list of their standard errors. # Returns an estimate of the scalar weighted mean and its standard error. # # - COVARIANCE IS IGNORED (ASSUMED ZERO) # # - NaNs are DROPPED! before taking mean ########################################################################## ########################################################################## #from pylab import mean #import pylab from pylab import sqrt,isnan if vals and any(vals): vals,ses=[v for v in vals if not isnan(v)],[v for v in ses if not isnan(v)] meanSE=sqrt(1.0/sum([1.0/ff/ff for ff in ses])) meanVals=sum([vals[ic]/ses[ic]/ses[ic] for ic in range(len(vals))])*meanSE*meanSE else: meanSE=''#[float('nan')] meanVals=''#[float('nan')] return(meanVals,meanSE) ############################################################################## ############################################################################## # def seSum(x=None, sx=None): # This takes a simple sum across x, i.e. x has length >1. ########################################################################## ########################################################################## #from pylab import mean #import pylab from pylab import sqrt,array,isnan,any f=sum(x) sf=sqrt(sum(array(sx)*array(sx))) assert not any(isnan(sf)) return(f,sf) ############################################################################## ############################################################################## # def seProduct(x=None,y=None,sx=None,sy=None,covs=None): ########################################################################## ########################################################################## """ For f=x/y, with s.e.'s of sx and sy, returns f and approx sf... # COVARIANCE SO FAR IGNORED! They need to be floats at the moment. The arguments are mandatory; I'm making them parameters for easier reading of the call. """ #from matplotlib import * x=array(x) y=array(y) sx=array(sx) sy=array(sy) f=x*y sfoverf=sqrt((sx/x)*(sx/x)+(sy/y)*(sy/y)) sf=sfoverf*abs(f) if x==0.0 and y==0.0: # How do I calculate se for this? sf=sqrt(sx**2+sy**2) # True for any limit of x==y? if (sx==0 and x==0) or (sy==0 and y==0): f,sf=0,0 assert isnan(f) or not isnan(sf) return(f,sf) ############################################################################## ############################################################################## # def seQuotient(x=None,y=None,sx=None,sy=None,covs=None): ########################################################################## ########################################################################## """ For f=x/y, with s.e.'s of sx and sy, returns f and approx sf... # COVARIANCE SO FAR IGNORED! They need to be floats at the moment """ #from matplotlib import * x=array(x) y=array(y) sx=array(sx) sy=array(sy) f=x/y sfoverf=sqrt((sx/x)*(sx/x)+(sy/y)*(sy/y)) sf=sfoverf*f return(f,sf) def categoryBarPlot(labels,y,skipEmpty=True,sortDecreasing=False,demo=False,labelLoc=None,yerr=None,horiz=True,barColour=None,labelColour=None,grouped=False,stacked=False,animate=False,filename='unnamedCategoryBarPlot',xlabel=None,ylabel=None,title=None,width=None): if sortDecreasing: print "CAUTION!!!!!!!!!!!!!!!may 2010: sortdecreasing does not sort the labels properly!!!!!!!!!!!!!!!!!!!!!!!!!1" """ April 2010: Stupid (bug??). barh does not return handles to the error bars -- it only gives you the rectangles. So I'm writing a new captureLines subroutine. Earlier: Obviously not finished. Jeezus. There's no grouped option. Write a group/stacked bar chart with labels .... etc. easy exercise. not sure why not done by others. Sept 2010: Demo now even fails: on grouped bar plot horiz: default bar orientation is horizontal. Set to False for vertical bars. Vertical is not yet implemented. labelLoc modes: 'eitherSideOfZero': put label start or ending on the zero line, depending on whether bar goes pos or neg 'opposite bar or inside': this is the default. 'biggest space': put right/left/inside the bar, whichever has most horizontal space there! 'bestEdge': 'alignLeftAxis': 'eitherSideOfZero': 'fromLeft': 'atRightAxis': 'atRight': 'atLeft': width: same option as taken by bar() function """ if demo: # Should do a bunch of different formats to demonstrate stacked, errorbars, etc, etc,.. tableFromXLS=[ ['', 'USA(SCCBS)', 'Canada (ESC2)', 'Gallup ladder', 'Gallup SWL'], ['log(household income)', '0.13', '0.13', '0.34', '0.24'], ['visits to family', '0.05', '0.05', '', ''], ['number of / contact with close friends', '0.1', '0.06', '', ''], ['talk/contact with neighbours', '0.04', '0.05', '', ''], ['volunteer groups /memberships', '0.03', '0.01', '', ''], ['people can be trusted', '0.08', '0.07', '', ''], ['trust neighbours (to return a wallet)', '0.1', '0.07', '', ''], ['trust in police (to return a wallet)', '', '0.05', '', ''], ['frequency of religious attendance', '0.04', '0.02', '0', '0.03'], ['freedom to choose', '', '', '0.08', '0.1'], ['perception of corruption (neg)', '', '', '0.09', '0.05'], ['donated time', '', '', '0.01', '0.03'], ['donated money', '', '', '0.06', '0.05'], ['helped a stranger', '', '', '0.03', '0.05'], ['safe at night', '', '', '0.03', '0.01'], ['importance of religion', '0.03', '0.03', '0', '0.05'], ['can count on friends', '', '', '0.09', '0.12'], ] sccbs=array([tonumeric(tt[1]) for tt in tableFromXLS[1:]]) esc2=array([tonumeric(tt[2]) for tt in tableFromXLS[1:]]) ladder=array([tonumeric(tt[3]) for tt in tableFromXLS[1:]]) swl=array([tonumeric(tt[4]) for tt in tableFromXLS[1:]]) labels=[tt[0] for tt in tableFromXLS[1:]] # Basic demo single bars: figure(2) categoryBarPlot(labels,swl)#,labelLoc='atRight')#plt.transpose([sccbs,esc2,ladder,swl])) figure(5) if 0: categoryBarPlot(labels,swl,horiz=False)#,labelLoc='atRight')#plt.transpose([sccbs,esc2,ladder,swl])) # Grouped figure(1) categoryBarPlot(labels,plt.transpose([sccbs,esc2,ladder,swl]),grouped=['sccbs','esc2','ladder','swl'])#,labelLoc='atRight')# # Animated for presentations figure(6) yerr=0.2*swl categoryBarPlot(labels,swl,yerr=yerr,horiz=True,animate=True)#,labelLoc='atRight')#plt.transpose([sccbs,esc2,ladder,swl])) return() if plt.size(y,0)==len(labels): pass assert array(y).shape[0]==len(labels) # size(y,0) simplevector=len(array(y).shape)==1 # ie not stacked???? #print 'simplevector',simplevector nGroups=len(labels) if not simplevector: nInGroup=array(y).shape[1] assert nGroups==array(y).shape[0] if not simplevector: if not stacked and not grouped: grouped=True assert stacked or grouped assert not stacked or not grouped if skipEmpty and simplevector: # Clean out empty/nan values # Not yet made skipEmpty for stacked, grouped. ii=plt.where(isfinite(y)) labels=array(labels)[ii] y=array(y)[ii] if not barColour and grouped: barColour=[cifar['green'],cifar['cyan'],cifar['pink'],cifar['grey'],'r','b','g','k','c','m',cifar['green'],cifar['cyan'],cifar['pink'],cifar['grey'],'r','b','g','k','c','m',cifar['green'],cifar['cyan'],cifar['pink'],cifar['grey'],'r','b','g','k','c','m',cifar['green'],cifar['cyan'],cifar['pink'],cifar['grey'],'r','b','g','k','c','m'] if not barColour: barColour={} if isinstance(barColour,dict): barColourList=[barColour.get(xx,cifar['cyan']) for xx in labels] elif isinstance(barColour,list): barColourList=barColour else: assert 0 # Can't parse barColour def unzip(recs): return zip(*recs) if sortDecreasing and simplevector: sss=zip(y,labels) sss.sort()# inverse=True WHY INVERSE FAILES? [y,labels]=unzip(sss) def str2latexL(ss): subs=[ ['$',r'\$'], ['_',r'\_'], ['%',r'\%'], ['#','\#'], ['^','\^'], ] for asub in subs: ss=ss.replace(asub[0],asub[1]) return(ss) ind=arange(len(labels)) if horiz: # We want to go from top to bottom of page, ie decreasing ordinate ind=arange(len(labels)-1,-1,-1) if width==None: width=1.0 if labelLoc== 'atRight': width=0.8 # pick TextColour: print ' April 2010: set colours porperly' if 1: # April 2010!!!!!!!!!!!!!! THIS IS NOT FINISHED YET!!! # And choose colour for the text labels: if labelColour==None: labelColour=[['k'],[cifar['pink']]][int(simplevector)] elif isinstance(labelColour,'str'): labelColour=labelColour*len(labels) #lcolour=labelColour else: foiuoi lcolour=labelColour[ii] #return(lcolour) if len(labelColour)==1: labelColour=labelColour*len(labels) htext=[[] for ff in labelColour] if simplevector: yloffset=width/2 # offset of labels from ind if horiz==True: if 0: # April 2010: do not do them all at once?? so tht I can animate... rects=plt.barh(ind,y,color=barColourList,height=width,xerr=yerr,ecolor=cifar['grey'])#,width=width) rects=plt.barh(ind,y,color=barColourList,height=width) xlrect=plt.xlim() erb=[] # list of errorbar handles if yerr in [None]: erb=[[]]*len(y) else: for ii in range(len(yerr)): erb+=[plt.errorbar(y[ii],ind[ii]+yloffset, xerr=yerr[ii],ecolor=cifar['grey'],fmt=None)[1:]] plt.xlim(xlrect) plot([0,0],plt.ylim(),'k-') #,xerr=yerr,ecolor=cifar['grey'])#,width=width) """ plt.bar(pos,val, align='center') erb = plt.errorbar(pos, val, yerr=[np.zeros(len(yerr)), yerr], fmt=None) erb[1][0].set_visible(False) # make lower error cap invisible plt.xticks(pos, ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')) errorbar((0+w/2,1+w/2),V,e,elinewidth=3,ecolor='k',fmt=None,capsize=10, mew=5) plt.show() """ else: rects=plt.bar(ind,y,color=barColourList,width=width,yerr=yerr,ecolor=cifar['grey'])#,width=width) cifar['cyan'] if not simplevector: y=array(y) assert len(y.shape)==2 if grouped: width=(1.0/(nInGroup+.8)) # So.. leave 0.8 bar's width spacing between groups yloffset=width*(nInGroup/2.0) # This isn't quite right!! rects=[] for iy in range(nInGroup): if yerr: rects+=[ plt.barh(ind+(nInGroup-1-iy)*width, y[:,iy], height=width,color=barColourList[iy],xerr=yerr[:,iy],ecolor=cifar['grey'])] else: rects+=[ plt.barh(ind+(nInGroup-1-iy)*width, y[:,iy], height=width,color=barColourList[iy],ecolor=cifar['grey'])] if horiz: plt.gca().set_yticklabels([]) else: plt.gca().set_xticklabels([]) 'fromLeft' tsize1=12*(min(1,16.0/nGroups)) if labelLoc in ['opposite bar or inside'] and not simplevector: # Default: Place each label in whichever place has more space: just right or left of the error bar, or inside the bar!!! if not horiz: from cpblUtilities import cwarning cwarning('oh-oh... this not programmed ye!!!!!!')#assert horiz # vert mode not coded yet.. just change xlim to ylim.. tsize1=0.6*18*(min(1,16.0/nGroups)) for ii in range(len(ind)):#[1:]: if not yerr==None: ye=yerr[ii] else: ye=0.0 ys=array([yyy for yyy in y[ii] if not isnan(yyy)]) if len(ys)==0: continue if all([yy<=0 or isnan(yy) for yy in y[ii]]): # For horizontal mode: posparams=sorted([[ min(ys-ye)-xlim()[0], min(ys-ye),'right'], [ -max(ys+ye), 0,'right'], [ xlim()[1], 0, 'left'], ],reverse=True)##.sort()##.sort(key=lambda x:x[0])[0] else: posparams=sorted([[ xlim()[1]-max(ys+ye), max(ys+ye),'left'], [ min(ys-ye), 0,'left'], [ -xlim()[0], 0, 'right'], ],reverse=True)#.sort()##.sort(key=lambda x:x[0])[0] pp=[ppp for ppp in posparams if not isnan(ppp[0]) or 1] #pp.sort(reverse=True) #print labels[ii],pp if not pp or isnan(pp[0][1]): assert 0 pp[0]=[nan,xlim()[1],'right'] pp=pp[0] # Don't convert labels with str2latex, as they may already be converted. htext[ii]=text(pp[1],ind[ii]+yloffset,' '+labels[ii],verticalalignment='center',horizontalalignment=pp[2],size=tsize1,color=labelColour[ii]) if 'number of' in labels[ii]: assert 1 if labelLoc in ['opposite bar or inside'] and simplevector: # Default: Place each label in whichever place has more space: just right or left of the error bar, or inside the bar!!! """ Jan 2010: this used to be the default. But is there a bug? Maybe my intention was the "biggest space" behaviour, which follows. The only change there was to change an xlim()[0] to xlim()[1]... which looks like a bug or?? """ if not horiz: cwarning('oh-oh... this not programmed ye!!!!!!')#assert horiz # vert mode not coded yet.. just change xlim to ylim.. #assert horiz # vert mode not coded yet.. just change xlim to ylim.. for ii in range(len(ind)):#[1:]: if not yerr==None: ye=yerr[ii] else: ye=0.0 if y[ii]<=0: # For horizontal mode:, bar is to the left posparams=sorted([[ (y[ii]-ye)-xlim()[0], y[ii]-ye,'right'], [ -(y[ii]+ye), 0,'right'], [ xlim()[1], 0, 'left'], ],reverse=True)##.sort()##.sort(key=lambda x:x[0])[0] else: # Bar is to the right posparams=sorted([[ xlim()[0]-y[ii]-ye, y[ii]+ye,'left'], # [ (y[ii]-ye), 0,'left'], [ -xlim()[0], min([0,y[ii]-ye]), 'right'], ],reverse=True)#.sort()##.sort(key=lambda x:x[0])[0] pp=posparams #pp.sort(reverse=True) # Until pylab allows me to right-pad text with spaces: pp=pp[0] dpad=(xlim()[1]-xlim()[0])/100.0 padOffset={'right':-dpad,'left':dpad}[pp[2]] htext[ii]=text(pp[1]+padOffset,ind[ii]+yloffset,' '+labels[ii],verticalalignment='center',horizontalalignment=pp[2],size=tsize1,color=labelColour[ii]) print 'NOT DON HEREE::' if 1 and labelLoc in [None,'biggest space'] and simplevector: # : put right/left/inside the bar, whichever has most horizontal space there! if not horiz: cwarning('oh-oh... this not programmed ye!!!!!!')#assert horiz # vert mode not coded yet.. just change xlim to ylim.. #assert horiz # vert mode not coded yet.. just change xlim to ylim.. if horiz: for ii in range(len(ind)):#[1:]: if not yerr==None: ye=yerr[ii] else: ye=0.0 if y[ii]<=0: # For horizontal mode:, bar is to the left posparams=sorted([[ (y[ii]-ye)-xlim()[0], y[ii]-ye,'right'], [ -(y[ii]+ye), 0,'right'], [ xlim()[1], 0, 'left'], ],reverse=True)##.sort()##.sort(key=lambda x:x[0])[0] else: # Bar is to the right posparams=sorted([[ xlim()[1]-y[ii]-ye, y[ii]+ye,'left'], # [ (y[ii]-ye), 0,'left'], [ -xlim()[0], min([0,y[ii]-ye]), 'right'], ],reverse=True)#.sort()##.sort(key=lambda x:x[0])[0] pp=posparams #pp.sort(reverse=True) # Until pylab allows me to right-pad text with spaces: pp=pp[0] dpad=(xlim()[1]-xlim()[0])/100.0 padOffset={'right':-dpad,'left':dpad}[pp[2]] htext[ii]=text(pp[1]+padOffset,ind[ii]+yloffset,' '+labels[ii],verticalalignment='center',horizontalalignment=pp[2],size=tsize1,color=labelColour[ii]) else: print "This is not finished!! won't work" for ii in range(len(ind)):#[1:]: if not xerr==None: xe=xerr[ii] else: xe=0.0 if x[ii]<=0: # For horizontal mode:, bar is to the left posparams=sorted([[ (x[ii]-xe)-ylim()[0], x[ii]-xe,'right'], [ -(x[ii]+xe), 0,'right'], [ ylim()[1], 0, 'left'], ],reverse=True)##.sort()##.sort(key=lambda x:x[0])[0] else: # Bar is to the right posparams=sorted([[ ylim()[1]-x[ii]-xe, x[ii]+xe,'left'], # [ (x[ii]-xe), 0,'left'], [ -ylim()[0], min([0,x[ii]-xe]), 'right'], ],reverse=True)#.sort()##.sort(key=lambda x:x[0])[0] pp=posparams #pp.sort(reverse=True) # Until pylab allows me to right-pad text with spaces: pp=pp[0] dpad=(xlim()[1]-ylim()[0])/100.0 padOffset={'right':-dpad,'left':dpad}[pp[2]] htext[ii]=text(pp[1]+padOffset,ind[ii]+yloffset,' '+labels[ii],verticalalignment='center',horizontalalignment=pp[2],size=tsize1,color=labelColour[ii]) if labelLoc=='bestEdge': # Place text at chart edge furthest from zero: if xlim()[0]<0 and xlim()[1]>=0: if -xlim()[0]>xlim()[1]: labelLoc='alignLeftAxis' if -xlim()[0]0),size=tsize1,color=cifar['pink']) if labelLoc=='fromLeft': for ii in range(len(ind)): htext[ii]=text(0,ind[ii]+width/2,' '+labels[ii],verticalalignment='center') elif labelLoc== 'atRight': for ii in range(nGroups): htext[ii]=text(max(y[ii]),ind[ii]+yloffset,' '+labels[ii]+' ',verticalalignment='center',horizontalalignment='right',size=tsize1,color=labelColour[ii]) plt.yticks([]) elif labelLoc== 'atRightAxis': for ii in range(nGroups): htext[ii]=text(xlim()[1],ind[ii]+yloffset,' '+labels[ii]+' ',verticalalignment='center',horizontalalignment='right',size=tsize1,color=labelColour[ii]) plt.yticks([]) elif labelLoc== 'atLeft': plt.yticks(ind+width/2., labels) leg=None if grouped and 0: leg=plt.legend( [rr[0] for rr in rects], grouped) if xlabel: plt.xlabel(xlabel) if ylabel: plt.ylabel(ylabel) if title: plt.title(title) # Orgainse all the plot objects to they can be accessed by cateogry: byBar=[[rects[ii],htext[ii],erb[ii]] for ii in range(len(y))] if animate: if animate in [True]: animate=range(len(y)) else: from cpblUtilities import orderListByRule assert isinstance(animate,list) animate=orderListByRule(range(len(y)),animate) plt.setp([rects,htext,erb],'visible',False) for ii,ia in enumerate(animate): plt.setp(byBar[ia],'visible',True) savefigall(filename+'-%d'%ii) return({'legend':leg,'bars':rects,'labels':htext,'errors':erb,'byCategory':byBar}) ########################################################################## ########################################################################## # fNaN=float('nan') def tonumeric(lists,to='auto',nokeys=False,cNaN=fNaN,skipkeys=None,doNothing=False): # ########################################################################## ########################################################################## """ Just a recursive version of float(), almost. It converts empty sets to NaNs, empty dicts to {}, empty strings to NaNs. By default it converts strings to floats. If nokeys is used, it will not affect keys in dicts, though it will still affect their contents. 2009 August: it now considers strings equal to "." to be null, ie nan. 2010 Feb: now able to deal with arrays (rather than lists) of strings? 2010 March: adding new option "skipkeys" which means not to convert any elements of a dict named with something in this list. Also, "doNothing" means return lists unmodified, useful for internal programming, below , in recursion. """ if not skipkeys: skipkeys=[] if doNothing: return(lists) if lists.__class__ not in [list,str,unicode, int,float]: import numpy if isinstance(lists,numpy.ndarray): # Cannot use "not lists" expression for arrays, so rest of function would break return(array([tonumeric(ele,to=to,nokeys=nokeys,cNaN=cNaN) for ele in lists])) if not lists and lists.__class__ in [list,str,unicode]: return(cNaN) if not lists and isinstance(lists,dict): return({}) if isinstance(lists,int) or isinstance(lists,float): return(lists) if lists.__class__ in [str,unicode]: try: if lists.strip() in ['.','','NaN','nan']: return(cNaN) if to=='auto': if '.' in lists: to=float else: to=int if to==float: return(float(lists)) elif to==int: if '.' in lists: assert abs(float(lists))>=1 # or 5e-8 would fail here? return(int(float(lists))) else: return(int(lists)) except: # this catches everything right now. ie returns strings intact if they don't look numeric return(lists) #return(float('nan')) elif isinstance(lists,list): return([tonumeric(ele,to=to,nokeys=nokeys,cNaN=cNaN) for ele in lists]) elif isinstance(lists,dict): if nokeys: return(dict([ (k,tonumeric(lists[k],to=to,nokeys=nokeys,cNaN=cNaN,doNothing=k in skipkeys )) for k in lists.keys()])) else: return(dict([(tonumeric(k,to=to,nokeys=nokeys,cNaN=cNaN),tonumeric(lists[k],to=to,nokeys=nokeys,cNaN=cNaN,doNothing=k in skipkeys )) for k in lists.keys() ])) def labelLine(lines): """ Wrap this around (ie feed it hte output of) any plot or errorbar command. In both cases it will put a label at the right end of the line... """ aline=lines while not aline.__class__== matplotlib.lines.Line2D: aline=aline[0] alabel=aline._label text(aline._x[-1],aline._y[-1],alabel,color=aline.get_color()) return(lines) def plotWithEnvelope(x,y,yLow,yHigh,linestyle='-',linecolor=None,facecolor=None,alpha=0.5,label=None,lineLabel=None,patchLabel=None,laxSkipNaNsSE=False,laxSkipNaNsXY=False,skipZeroSE=False,ax=None,laxFail=True): """ For plotting a line with standard errors at each point... This makes a patch object that gives the envelope around the line. yLow and yHigh are the confidence interval lower and upper points. uhh.. this could have been done with plt.fill() more compactly! [Done] you can then make a legend (if you've specified lineLabel or patchLabel on each) using "legend()". CPBL, Jan 2010 April 2010: rewrote this with pylab.fill() and pylab.poly_between(): Vastly nicer! than old patch() kludge. April 2010: Added facility for dealing with NaNs. We just excise them rather than splitting the plot.. (this could be an option) May 2010: laxSkipNaNs says that rather than requiring all values to be defined for all or none of x,y,yLow,yHigh, the function will allow different subsets of each to be defined. So all that will be plotted are those x values for which all are defined (ie non NaN). Maybe a better behaviour would be to mark points or something where x,y are defined but not yLow,yHigh...? August 2010: Huh? No explanation for what laxSkipNaNsSE was supposed to do. if lax..XY is turned on, then lax..SE is forced on. I think what I want is simply to drop all points with SE==NaN if lax is set, right? Jan 2011: Maybe, but I should also simply skip the envelope whenever all the se's are nan! Jan 2011: There exists now a fill_between() in matplotlib... I now use that below? """ from pylab import isnan,isfinite,find,where,logical_and,logical_or,logical_not#,any,sqrt,array if linecolor==None and not facecolor==None: linecolor=facecolor if not linecolor==None and facecolor==None: facecolor=linecolor if 1: # NO!!! Aug 2010 I need to make this use the next colour in matplotlib's colour sequence... where is that?? if facecolor==None: facecolor='g' if linecolor==None: linecolor='g' if laxSkipNaNsXY: laxSkipNaNsSE=True if label: assert not lineLabel assert not patchLabel lineLabel=label patchLabel=label if ax==None: ax=plt.gca() explainQuit=True quitRS=' plotWithEnvelope: Not making any envelope line "%s" '%str(lineLabel) if isinstance(x,float) and isnan(x): assert laxFail return([],[]) if not list( x ) and not list( y ) and not list( yLow ) and not list( yHigh): print quitRS+' because x and y are empty' assert laxFail return([],[]) if all(isnan( x )) and all(isnan( y )) and all(isnan( yLow )) and all(isnan( yHigh)): print quitRS+' because x and y are all NaN' assert laxFail return([],[]) if any(isfinite( x )) and all( isnan( y )) and all( isnan( yLow )) and all( isnan(yHigh)): print quitRS+' because y are all NaN' assert laxFail return([],[]) if len(find(isfinite(y)))==1: print quitRS+" because there's only one y point to plot!" assert laxFail return([],[]) # Deal is NaNs. """ What should we allow?? there must be matching between y,yLow,yHigh. But y need not match x. ? Aug 2010: I'd like to rewrite this whole thing (?) Should just have an iGood, and add each criterion one by one; can do asserts and notes when something changes. """ xNaN=set(find(isnan(x))) yNaN=set(find(isnan(y))) seNaN=set(find(logical_or(logical_not(isfinite(yLow)),logical_not(isfinite(yHigh))))) bNoSE=all(logical_or(logical_not(isfinite(yLow)),logical_not(isfinite(yHigh)))) seZero=set(find(logical_or(yLow==y,yHigh==y))) iGood=set(range(len(y))) assert len(x)==len(y) assert len(x)==len(yLow) assert len(x)==len(yHigh) if laxSkipNaNsXY and not bNoSE: # bNoSE means we should entirely skip the envelope iGood=iGood-seNaN if skipZeroSE: iGood=iGood-seZero if laxSkipNaNsXY: iGood=iGood-xNaN-yNaN else: assert xNaN==yNaN iGood=sorted(list(iGood)) assert iGood # Ahh! How to subscript with a vector?? vals=deepcopy([x,y,yLow,yHigh]) xorig,yorig,yLoworig,yHighOrig=deepcopy( (x,y,yLow,yHigh)) x,y,yLow,yHigh=[[vvv[ii] for ii in iGood] for vvv in vals] #x[iGood],y[iGood],yLow[iGood],yHigh[iGood] if 0: # Old stuff, sitll needs to be reincorporated with above!!!!! if any(isnan(x)) or any(isnan(y)) or any(isnan(yLow)) or any(isnan(yHigh)): na,nb,nc,nd=where(isfinite(x)),where(isfinite(y)),where(isfinite(yLow)),where(isfinite(yHigh)) if laxSkipNaNsSE: ne=where(logical_and(logical_and(isfinite(x),isfinite(y)),logical_and(isfinite(yLow),isfinite(yHigh)))) if not laxSkipNaNsXY: assert set(na[0])==set(nb[0]) # Still require x and y elements to match NaNs if not set(nb[0])==set(nc[0]) or not set(nc[0])==set(nd[0]): print 'Warning: laxSkipNaNs is letting go a mismatch .......' # Aug 2010: I think above ifs are wrong! What has it got to do with laxSE? iGood=ne[0] else: assert set(na[0])==set(nb[0]) assert set(nb[0])==set(nc[0]) assert set(nc[0])==set(nd[0]) iGood=nb[0] # Is below redundant?!/ ancient? assert list(na[0]) not in [[]] # For now, don't deal yet with nan's in x..?? assert list(iGood) not in [[]] # For now, don't deal yet with nan's in x..?? # --------------- OLD SECTION ABOVE TO BE REINCORPORATED IN TO FURTHER ABOVE -------------- assert any(isfinite(x)) and any(isfinite(y)) hLine=ax.plot(x,y,linestyle=linestyle,color=linecolor,linewidth=2,label=lineLabel) #lseX=list(seX) #lseY=list(seY) #polyXY=plt.array(zip(seX,seY))### # was: if 0: # Obselete now that fill_between exists: xs, ys = plt.poly_between(x, yLow, yHigh) if bNoSE: envelopePatch=None else: envelopePatch=plt.fill(xs,ys,facecolor=facecolor,alpha=alpha,linewidth=0,label=patchLabel)# edgecolor=None, does not work!! So use linewidth instead? if bNoSE: envelopePatch=None else: if patchLabel is not None: envelopePatch=plt.fill_between(x, yLow, yHigh,facecolor=facecolor,alpha=alpha,linewidth=0,label=patchLabel)# edgecolor=None, does not work!! So use linewidth instead? else: envelopePatch=plt.fill_between(x, yLow, yHigh,facecolor=facecolor,alpha=alpha,linewidth=0)# edgecolor=None, does not work!! So use linewidth instead? #seY=yLow+yHigh[::-1] #seX=x+x[::-1] #polyXY=plt.transpose(plt.array([seX,seY])) #envelopePatch=mpl.patches.Polygon(polyXY,True,facecolor=facecolor,alpha=alpha,linewidth=0,label=patchLabel)# edgecolor=None, does not work!! So use linewidth instead? #ax.add_patch(envelopePatch) return(hLine,envelopePatch) def axisNearlyTight(ax=None): from pylab import axis,getp,xlim,ylim,gca,exp,log if ax==None: ax=gca() plt.axes(ax) plt.axis('tight') # Sigh... this seems not to work well. if getp(ax,'xscale')=='log': xr=max(xlim())/min(xlim()) xlim(min(xlim())/exp(log(xr)/20.0),max(xlim())*exp(log(xr)/20.0)) else: xr=max(xlim())-min(xlim()) xlim(min(xlim())-xr/20.0,max(xlim())+xr/20.0) if getp(ax,'yscale')=='log': yr=max(ylim())/min(ylim()) ylim(min(ylim())/exp(log(yr)/20.0),max(ylim())*exp(log(yr)/20.0)) else: yr=max(ylim())-min(ylim()) ylim(min(ylim())-yr/20.0,max(ylim())+yr/20.0) def figureFontSetup(): """ Set font size settings for matplotlib figures so that they are reasonable for exporting to PDF to use in publications / presentations..... """ params = {#'backend': 'ps', 'axes.labelsize': 16, 'text.fontsize': 14, 'legend.fontsize': 10, 'xtick.labelsize': 16, 'ytick.labelsize': 16, 'text.usetex': True,} #'figure.figsize': fig_size} plt.rcParams.update(params) return(params) def addSignatureToPlot(): """ my translegend can now also add annotations [annotate] at the bottom (or top!?) of the legend. But this is nice; you can include newlines etc in the text. Could add option for box, and options for placement. """ Dx=max(xlim())-min(xlim()) Dy=max(ylim())-min(ylim()) th=plt.text(max(xlim())-Dx/100.0,min(ylim())+Dy/50.0,'C. Barrington-Leigh\nMcGill'); plt.setp(th,'fontsize',7,'verticalalignment','bottom','horizontalalignment','right') return(th) if __name__ == '__main__': from cpblUtilities import * categoryBarPlot([],[],demo=True) show() ########################################################################## ########################################################################## # class Position: # ########################################################################## ########################################################################## """ This is about as much GIS as I really need here: calculate great circle distance between two points on a sphere. This is done through the subtraction operation (-) for this clas. """ def __init__(self, longitude, latitude): import math 'init position with longitude/latitude coordinates' llx = math.radians(longitude) lly = math.radians(latitude) self.x = math.sin(llx) * math.cos(lly) self.y = math.cos(llx) * math.cos(lly) self.z = math.sin(lly) def __sub__(self, other): import math 'get distance in km between two positions' d = self.x * other.x + self.y * other.y + self.z * other.z if d < -1: d = -1 if d > 1: d = 1 km = math.acos(d) / math.pi * 20000.0 return km #def __mul__(self, other): # Scalar multiply? # import math # assert isinstance(other,float) # return(self.__init(self.x*other,self.y*other)) # Yikes! This needs to be able to return lat, lon, etc.. much more needed. ########################################################################## ########################################################################## # This function is meant to absorb scatterPlotSEs, eventually. def cpblScatterPlot( # Common arguments: x,y, # Deprecated: fn=None, # arguments for by-region group-coloured plots: ids=None,xlab=None,ylab=None,fname=None,fig=None,xlog=False,byRegion=None,labelFunction=None,labelFunctionArgs=None,fitEachRegion=False,regionOrder=None,legendMode=None,markersize=None,markerMode=None,subsetIndices=None,forceUpdate=True, # arguments for error-bar plots (from scatterPlotSE, not yet incorporated, but holding space....) xse=None,yse=None,labels=None,nearlyTight=True,color=None): #markersize=10 # ########################################################################## ########################################################################## """ Feb 2010: Adding "byRegion" option: pass the region name for each country March 2010: Moved to cpblUtilitiesMathGraph from regressionsGallup (may need generalising!?) # So this is for Gallup... Hmmm. trying to generalise, so: if labelFunction(x,y,ids,color=None, labelFunctionArgsDict=None) is specified, it will be used to label nations within each region. fitEachRegion: turn this on to add a temporary regression line to each subsample, and one to the overall set. March 2010: Getting rid of logx and making an xlog, ie just a flag that the x value is a log value already. If so, this is noted in the filename and reflected in the axis scaling. Prior to now, this function used to make both non-log and log versions whenever the defunct logx was True. N.B.!!!!! I now have two scatter plot functions. One that makes SEs and one that does it by regions. I am in the process of combining these. There will be ACTIONMODE to do different things, in some cases, depending on what is passed. LAlignMode: nnMLB MLT April 2010: Automatically choose this based on density of points in four quadrants? Argh, am I reinventing the auto legend placements?? So far I haven't done the following at the beginning... hmmm. (elementwise logic) xx=x[find(~isnan(x) & ~isnan(y))] yy=x[find(~isnan(x) & ~isnan(y))] September 2010: added z=None. z data will be represented in symbol size, at least for the currently-displayed region... At first, z data will need to be in the actual right units of size, not some arbitrary scale. well, maybe it should be renamed o be symbolsize (yes) for now. markerMode: This will specify various behaviour about when/whether dots or scaled-size scatterplot symbols are shown. Default behaviour for starters (Sept 2010): when symbolSize data are provided, they are only displayed for each region when it's being highlighted. ie simple dots are shown the rest of the time. Also, the symbolsizes are just relative, and are all rescaled. Also, they are rescaled for EACH GROUP, not for the whole set. Note tht markerMode can be a list of strings to specify various settings. Here are some options ['hiddenInGlobalView', # Agh.. no, right now this doesn't work, cause i want hidden in dots view but not hidden in final, with lines and annotations view. 'rescaledByGroup' or 'rescaledGlobal' or 'absoluteScale'] ] I also need to make a "labelsMode" or get the country labels working somehow. Hm, did I know about scatter() when I wrote this? Sept 2010: forceUpdate=True: By default, this function always recreates plots. But if forceUdpate=False, it will skip actually saving the plots if it already exists! Sept 2010: TO DO!!! incorporate the cpblUtilitiesLabelPlacement code! In general, it seems the label placement is so customised/kludged, it may be better to just use the labelfunction as much as possible... Sept 2010: Now returns a list of filenames (or stems) ...? Sep 2010: This used to capitalise axis labels in the byregion mode. I'm taking that off. Can put it back in as an option, but then need to not capitalise stuff inside $...$. ! """ from cpblUtilities import uniqueInOrder import pylab as plt from pylab import xlim,ylim,arrow,errorbar,figure,savefig,setp,getp,ion,show,array,gca,log10 if fig==None: fig=figure() else: figure(fig) if ylab==None: ylab='' if xlab==None: xlab='' def dummyLabelFunction(x,y,ids,args=None,color=None): return([]) if labelFunction==None: labelFunction=dummyLabelFunction if xlog==True: xlog='-logx' else: assert xlog in [False, None] xlog='' if fn is not None: assert fname is None fname=fn print " WARNING! USE OF fn IS DEPRECATED IN CPBLSCATTERPLOT. USE FNAME" fDir=None if not fname is None: fDir,fname= os.path.split(fname) if not fDir: from cpblDefaults import paths fDir=paths['graphics']#'/home/cpbl/gallup/graphicsOut/' else: fDir+='/' if markersize in [None]: markersize=10 # This is needed for s.e. version.. if 0: #Agh, no! The following fails in one of the kinds of scatter plots! (se) markersize=[None for _ in x] assert markerMode in [None] #if markersize.__class__ in [int,float]: # markersize=array([markersize for xx in x]) if markerMode in [None]: markerMode=['hiddenInGlobalView','rescaledByGroup'] assert not ('rescaledByGroup' in markerMode and 'rescaledGlobally' in markerMode) assert not ('absoluteScale' in markerMode and ('rescaledByGroup' in markerMode or 'rescaledGlobally' in markerMode)) ACTIONMODE=[] if xse is not None or yse is not None: ACTIONMODE+=['errorbars'] if byRegion: ACTIONMODE+=['by region'] if not ACTIONMODE: ACTIONMODE+=['errorbars'] outfns=[] #Collect list of graphics files written. def safelen(stuff): # return len if it's not in [None, [], array([])]: if stuff in [None,[]]: return(0) if stuff.__class__ in [int,float]: return(1) return(array(stuff).size)# isinstance(a, numpy.ndarray) assert len(x)==len(y) origLen=len(x) x=array(x) y=array(y) if 'by region' in ACTIONMODE: assert len(x)==len(ids) ids=array(ids) if subsetIndices not in [None]:#.any(): x=x[subsetIndices] y=y[subsetIndices] if 'by region' in ACTIONMODE: byRegion=array(byRegion)[subsetIndices] if safelen(ids): ids=ids[subsetIndices] if safelen(markersize)==origLen: markersize=array(markersize)[subsetIndices] if xse is not None: xse=xse[subsetIndices] if yse is not None: yse=yse[subsetIndices] if isinstance(labels,dict): # So labels is a lookup by ids. Convert to a list. assert safelen(ids) labels=[labels[anid] for anid in ids] if safelen(labels): assert not labelFunction or labelFunction == dummyLabelFunction if not labelFunction == dummyLabelFunction: assert safelen(ids) if 'errorbars' in ACTIONMODE: """ This is my Sept 2010 kludge to start merging these functions.""" """ def _scatterPlotSE(x,y,xse,yse,fname=None,labels=None,nearlyTight=True,color=None,markersize=10): NearlyTight sets the xlim to just slightly bigger than tight. N.B.!!!!! I now have two scatter plot functions. One that makes SEs and one that does it by regions. Shouldn't I combine these???? yes. They're now both in cpblScatterPlot() $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $ BEGIN: Scatter plot with STANDARD ERRORS $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ """ if safelen(labels):##labels and labels.size: labels=array(labels) else: labels=array([]) if not fname: fnameTitle='test' try: # June 2011: Not sure why fnameTitle is not available now... fnameTitle except: print ' caution: using '+fname+' in place of fnameTitle (undefined) (loc2)' fnameTitle=fname figureFontSetup() fign= plt.mod(sum([ord(f)^2 for f in fnameTitle]),1000) fig = plt.figure(fign) #plt.setp(fig,'figheight',11.0125,'figwidth', 8.1125) fig.clear() ax = fig.add_subplot(111) #LL=0.1*(max(xlim())-min(xlim()))*min([sss for sss in ses if not isnan(sss)]) if color==None: color='k' plabels,bplabels,perrorbars=[],[],[] secolor='0.85' # ie a shade of gray. secolor='gray' # If xse, yse =None,Non, following will not barf, but nothing plotted if isinstance(color,list): for ixx,xx in enumerate(x): perrorbars+=ax.errorbar(xx,y[ixx],yerr=yse[ixx],xerr=xse[ixx],fmt='o',ecolor=secolor,markersize=10,color=color[ixx],label=labels[ixx]) else: perrorbars+=ax.errorbar(x,y,yerr=yse,xerr=xse,fmt='o',ecolor=secolor,markersize=markersize,color=color) adot=ax.plot(x,y,'.',markersize=10,color=color[0]) rotateLabels=False print '+++++++++++++++++++++++++',fnameTitle if 0 and "LadderVLadder" in fnameTitle: oiuoiu if 0 and any(perrorbars): setp(adot,'visible',False) ## else: ## adot=[] ## for ixx,xx in enumerate(x): ## adot+=ax.plot(xx,y[ixx],'.',markersize=10,label=labels[ixx],color=color[ixx]) if labelFunction is not dummyLabelFunction: plabels=labelFunction(x,y,ids,args=labelFunctionArgs)#,color=rcolors[ir])#,label=aregion) elif safelen(labels):#.size: # ie if not empty """ I can make the labels go along the lines, but I'd need to convert dy and dx to more physical units, since the axes are not in image mode. """ for iix in range(len(x)):#if 1:#len(coefsByCRuid.keys())<20:#postPlotArgs['geoOrder'])<20: #geoLabels=dict(provinceNames) xi,yi=x[iix],y[iix] # Bug in matplotlib: text fails if NaN: if isnan(xi) or isnan(yi): continue if not isnan(xi) and not isnan(yi): if rotateLabels: plabels+=[plt.text(xi,yi,namesByCRuid[CRuid],rotation=plt.arctan2(dy,dx)*57.0)] else: plabels+=[plt.text(xi+.01,yi,labels[iix]+' ',rotation='horizontal',horizontalalignment='right',fontsize=14,verticalalignment='center',color=plt.getp(adot[0],'color'))] if 'betterLabels': Dx=max(xlim())-min(xlim()) Dy=max(ylim())-min(ylim()) if xi>= min(xlim())+.8*Dx: offset=-Dx/100.0 ha='right' else: offset=Dx/100 ha='left' bplabels+=[plt.text(xi+offset,yi+Dy/100.0,labels[iix]+' ',rotation='horizontal',horizontalalignment=ha,fontsize=14,verticalalignment='center',color=plt.getp(adot[0],'color'))] plt.setp(plabels,'visible',False) if rotateLabels: plt.axis('image') #horizontalalignment='right',fontsize=8,verticalalignment='center', #plt.ylabel(yName.upper(),size=20) #plt.xlabel(xName.upper(),size=20) if 0: # Sorry..!! July 2011 I'm cutting out this kludge from some specific thing?!. oh.. no.. it's a bug in pylab that I'm hoping will disappear at some point?? # Uhhh... fix an annoying bug in text placed with whitespace at end: xr=max(xlim())-min(xlim()) [label.set_position(label.get_position()+plt.array([-xr/12.0,0])) for label in plabels] figure(fign) if nearlyTight: plt.axis('tight') # Sigh... this seems not to work well. xr=max(xlim())-min(xlim()) yr=max(ylim())-min(ylim()) xlim(min(xlim())-xr/20.0,max(xlim())+xr/20.0) ylim(min(ylim())-yr/20.0,max(ylim())+yr/20.0) #return(plabels,[]) """ for ic=1:length(names) name=names{ic}; Dx=max(xlim)-min(xlim); Dy=max(ylim)-min(ylim); if x(ic)>= min(xlim)+.8*Dx, offset=-Dx/100; ha='right'; %offset=Dx/100; ha='left'; else offset=Dx/100; ha='left';%.006 end % if if length(name)>1, th=text(x(ic)+offset,y(ic)+.01,name);%[upper(name(1)) lower(name(2:end))]); set(th,'fontsize',10,'verticalalignment','middle','horizontalalignment',ha) if th, ths=[ths th]; end %if end %if fprintf(' Labelling "%s"\n',name); if settings.signature, th=text(max(xlim)-Dx/100,min(ylim)+Dy/50,'C. Barrington-Leigh, UBC'); set(th,'fontsize',7,'verticalalignment','bottom','horizontalalignment','right') end % if end %for """ #_scatterPlotSE(x,y,xse,yse,fname,labels,nearlyTight,color,markersize) xlabel(xlab) ylabel(ylab) if 'by region' in ACTIONMODE: """ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $ BEGIN: Scatter plot with REGION GROUPS $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ """ # These are hardcoded or Gallup, so need fixing/parameterising for general case! limsDefault={ 'SWLylim':(2,8.5), 'INCFRACxlim': [0.15,12], 'INCFRACxlimLOG':(0.007,2), 'POWERxlimLOG':(.2,10.5), } lims=limsDefault ## if 'INCOME' in xlab.upper() and 'FRACTION' in xlab.upper(): ## xlim(INCFRACxlimLOG) ## if 'POWER PER CAPITA' in xlab.upper(): ## xlim(POWERxlimLOG) ## if 'SATISFACTION' in ylab.upper(): ## ylim(SWLylim) clf() adot=plot(x,y,'.',hold=True) if xlog: setp(gca(),'xscale','log') axisNearlyTight() if 'SATISFACTION' in ylab.upper(): ylim(lims['SWLylim']) if xlog: if 'INCOME' in xlab.upper() and 'FRACTION' in xlab.upper(): xlim(lims['INCFRACxlimLOG']) if 'POWER PER CAPITA' in xlab.upper(): xlim(lims['POWERxlimLOG']) if 'SATISFACTION' in ylab.upper(): ylim(lims['SWLylim']) #INCFRACxlim= [0.15,12] hDbyRegion,hLbyRegion,fitsByRegion,bigMarkersByRegion={},{},{},{} """'cifar['cyan']'# 'CIFARdarkgreen' 'cifar['green']'# 'cifar['grey']' 'CIFARlightblue' 'CIFARpalekhaki' 'cifar['pink']'# 'CIFARyellow' """ rcolors=['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar]+['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar]+['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar]+['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar] if regionOrder: regs=regionOrder else: regs=uniqueInOrder([kk for kk in byRegion if not (isinstance(kk,float) and isnan(kk))], drop= ['98','99','']+['nan']+[nan]) regs=uniqueInOrder( byRegion, drop= ['98','99','']+['nan']+[nan,NaN])#dropNaN=True) #assert regs or not uniqueInOrder(byRegion) assert not len(regs)==0 # Now evaluate where to put legend based on density of points: xx=x[find(~isnan(x) & ~isnan(y))] yy=y[find(~isnan(x) & ~isnan(y))] if xlog: LL=xx<=sqrt(xlim()[0]*xlim()[1]) else: LL=xx<=mean(xlim()) TT=yy>= mean(ylim()) quadrants=[len(find(LL * TT)), len(find((~ LL) * TT)), len(find(LL * (~ TT))), len(find((~ LL) * (~ TT))), ] wquadrant=[0,0, 0,0] # Automatically choose legendMode: assert not legendMode # unused now. if not legendMode: LAlignMode='auto' iLA=sorted( enumerate(quadrants),key=lambda z:z[1])[0][0] #['LLT','RRT', 'LLB','RRB'] if xlog: xspace=pow(xlim()[1]/xlim()[0],(1.0/50)) xbase=[min(xlim())*xspace, max(xlim())/xspace, min(xlim())*xspace, max(xlim())/xspace ] else: xspace=(max(xlim())-min(xlim()))/50.0 xbase=[min(xlim())+xspace, max(xlim())-xspace, min(xlim())+xspace, max(xlim())-xspace, ] yspace=(max(ylim())-min(ylim()))/50.0 ybase=[max(ylim())-yspace, max(ylim())-yspace, min(ylim())+yspace, min(ylim())+yspace ] ' 0.32 was for the Gallup incomes/SWL plots... ie 7 regions??' yincrement=array([-0.32, -0.32, +0.32, +0.32])/.32*(max(ylim())-min(ylim()))/2.0/len(uniqueInOrder(regs)) #+0.32, +0.32])/.32*(max(ylim())-min(ylim()))/15.0 horizontalalignment=['left', 'right', 'left', 'right'] verticalalignment=['top','top', 'bottom','bottom'] for ir,aregion in enumerate(regs): inRegion=find(array(byRegion) == aregion) #if lastMarkers: # plt.setp(lastMarkers,'visible',False)#fitsByRegion[aregion] hDbyRegion[aregion]= plt.plot(x[inRegion],y[inRegion],'.',hold=True,color=rcolors[ir],label=aregion) if safelen(markersize)==safelen(x) and any(isfinite(array(markersize)[inRegion])): # This if line excludes Nones and NaNs , respectively. z=array(markersize)[inRegion]#array(markersize)[inRegion] #if z.__class__ in [int,float]: #z=array([markersize for xx in x]) if 'rescaledByGroup' in markerMode: aa,bb=min(z),max(z) z=((z/bb)-(aa/bb)+.1)*100 if 'rescaledGlobally' in markerMode: aa,bb=min(markersize),max(markersize) z=((z/bb)-(aa/bb)+.1)*100 if 'absoluteScale' in markerMode: z=array(markersize[inRegion]) bigMarkersByRegion[aregion]= plt.scatter(x[inRegion],y[inRegion],s=z,marker='o',hold=True,c=rcolors[ir],label=aregion) else: print ' Not using large markers here becuase markersize was not specified for each point' hDbyRegion[aregion]= plot(x[inRegion],y[inRegion],'.',hold=True,color=rcolors[ir],label=aregion) if labelFunction: hLbyRegion[aregion]=labelFunction(x[inRegion],y[inRegion],ids[inRegion],args={'xoff':0},color=rcolors[ir])#,label=aregion) setp(hLbyRegion[aregion],'label',aregion) else: if LAlignMode=='auto': hLbyRegion[aregion]+=[text(xbase[iLA],ybase[iLA]+ir*yincrement[iLA],aregion,color=rcolors[ir], verticalalignment=verticalalignment[iLA],horizontalalignment=horizontalalignment[iLA])] elif xlog and LAlignMode=='MLB': #middle, left-aligned, bottom hLbyRegion[aregion]+=[text(sqrt(xlim()[0]*xlim()[1]),min(ylim())+yspace+ir*.32,aregion,color=rcolors[ir])] elif not xlog and LAlignMode=='MLB': #middle, left-aligned, bottom hLbyRegion[aregion]+=[text(mean(xlim()), min(ylim())+yspace+ir*.32,aregion,color=rcolors[ir])] elif xlog and LAlignMode=='MLT': #middle, left-aligned, top hLbyRegion[aregion]+=[text(sqrt(xlim()[0]*xlim()[1]),max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top')] elif not xlog and LAlignMode=='MLT': #middle, left-aligned, top hLbyRegion[aregion]+=[text(mean(xlim()), max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top')] elif xlog and LAlignMode=='RRB': #middle, left-aligned, bottom hLbyRegion[aregion]+=[text(max(xlim())/xspace,min(ylim())+yspace+ir*.32,aregion,color=rcolors[ir],horizontalalignment='right')] elif not xlog and LAlignMode=='RRB': #middle, left-aligned, bottom hLbyRegion[aregion]+=[text(max(xlim())-xspace, min(ylim())+yspace+ir*.32,aregion,color=rcolors[ir],horizontalalignment='right')] elif xlog and LAlignMode=='RRT': #middle, left-aligned, top hLbyRegion[aregion]+=[text(max(xlim())/xspace,max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top',horizontalalignment='right')] elif not xlog and LAlignMode=='RRT': #middle, left-aligned, top hLbyRegion[aregion]+=[text(max(xlim())-xspace, max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top',horizontalalignment='right')] elif xlog and LAlignMode=='LLT': #left, left-aligned, top hLbyRegion[aregion]+=[text(xspace*min(xlim()),max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top')] elif not xlog and LAlignMode=='LLT': #left, left-aligned, top hLbyRegion[aregion]+=[text(xspace+min(xlim()), max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top')] else: assert 0 # Don't know LAlignMode ###print 'aaaaaaaaaaaaaaaaaaaaahhhhhhh this fails on fedora!!!!! ' if any(isfinite(x[inRegion])) and any(isfinite(y[inRegion])) :#'fails on fedora only': fitsByRegion[aregion],gradient,s=overplotLinFit(x[inRegion],y[inRegion],format='-',color=rcolors[ir],xscalelog=xlog) assert fitsByRegion[aregion] else: fitsByRegion[aregion],gradient,s=[],[],[] #legend([hLbyRegion[kk][0] for kk in regs]) #labs=labelFunction(x,y,ids,xoff=0) #plot(x,y,'.',hold=True) setp(adot,'visible',False) del adot xlabel(xlab.replace('KW/','kW/')) # .upper() ylabel(ylab) #.upper() if 'SATISFACTION' in ylab.upper(): ylim(lims['SWLylim']) from cpblUtilities import flattenList #allFits=flattenList([fitsByRegion[kk] for kk in fitsByRegion]) #print allFits setp(fitsByRegion.values(),'visible',False) setp(hLbyRegion.values(),'visible',False) setp(bigMarkersByRegion.values(),'visible',False) #savefigall('/home/cpbl/gallup/graphicsOut/'+fname+'-8')#,transparent=transparent) for ir,region in enumerate(regs): if fname: outfns.append(savefigall(fDir+fname+xlog+'-%d'%(0+ir),skipIfExists=not forceUpdate))#(len(regs)-ir))#,transparent=transparent) setp(fitsByRegion.values(),'visible',False) #region=regs[len(regs)-ir-1] #setp(fitsByRegion.values(),'visible',False) setp(bigMarkersByRegion.values(),'visible',False) setp(hLbyRegion.values(),'visible',False) setp(fitsByRegion[region],'visible',True) setp(hLbyRegion[region],'visible',True) setp(bigMarkersByRegion.get(region,[]),'visible',True) ###setp(fitsByRegion[region],'visible',False) if fname: outfns.append(savefigall(fDir+fname+xlog+'-%d'%(0+len(regs)),skipIfExists=not forceUpdate))#(len(regs)-ir))#,transparent=transparent) setp(fitsByRegion.values(),'visible',False) setp(hLbyRegion.values(),'visible',True) setp(bigMarkersByRegion.values(),'visible',True) if 'fails on fedora only': #It won't anymore, though the python stats linreg causes a warning... obselete?! fitsByRegion['all'],gradient,s=overplotLinFit(x,y,format='-',color=[0.5,0.5,0.5],xscalelog=xlog) if fname: outfns.append(savefigall(fDir+fname+xlog+'-%d'%(1+len(regs)),skipIfExists=not forceUpdate)) setp(fitsByRegion.values(),'visible',False) if fname: outfns.append(savefigall(fDir+fname+xlog+'-%d'%(2+len(regs)),skipIfExists=not forceUpdate)) try: # June 2011: Not sure why fnameTitle is not available now... fnameTitle except: print ' caution: using '+fname+' in place of fnameTitle (undefined) ' fnameTitle=fname title(fnameTitle+xlog) if fname and not 'by region' in ACTIONMODE: outfns.append(savefigall(fDir+fname,skipIfExists=not forceUpdate)) title(fname) return(outfns) ## #savefig(fDir+fname+'.png',transparent=transparent) ## #savefig(fDir+fname+'.pdf',transparent=transparent) ## # Make log version too? ## if 'INCOME' in xlab.upper() or logx: ## clf() ## dummy=plot(x,y,'.',hold=True) ## axisNearlyTight() ## if 'SATISFACTION' in ylab.upper(): ## ylim(SWLylim) ## if 0: ## setp(fitsByRegion.values(),'visible',False) ## setp(hLbyRegion.values(),'visible',False) ## # "del" below does NOT get rid of them! So do two lines above. ## del hLbyRegion ## del fitsByRegion ## setp(gca(),'xscale','log') ## axisNearlyTight() ## if 'INCOME' in xlab.upper() and 'FRACTION' in xlab.upper(): ## xlim(INCFRACxlimLOG) ## if 'POWER PER CAPITA' in xlab.upper(): ## xlim(POWERxlimLOG) ## if 'SATISFACTION' in ylab.upper(): ## ylim(SWLylim) ## #INCFRACxlim= [0.15,12] ## hDbyRegion,hLbyRegion,fitsByRegion={},{},{} ## rcolors=['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar] ## ncolor=0 ## x=array(x) ## y=array(y) ## ids=array(ids) ## #regs=[kk for kk in uniqueInOrder(byRegion) if kk not in ['98','99','']] ## for aregion in regs: ## inRegion=find(array(byRegion) == aregion) ## hDbyRegion[aregion]= plot(x[inRegion],y[inRegion],'.',hold=True,color=rcolors[ncolor],label=aregion) ## hLbyRegion[aregion]=labelFunction(x[inRegion],y[inRegion],ids[inRegion],xoff=0,color=rcolors[ncolor])#,label=aregion) ## setp(hLbyRegion[aregion],'label',aregion) ## if 0:#hLbyRegion[aregion]: ## hLbyRegion[aregion].set_label(aregion) ## text(sqrt(xlim()[0]*xlim()[1]),min(ylim())+.1+ncolor*.32,aregion,color=rcolors[ncolor]) ## ncolor+=1 ## fitsByRegion[aregion],gradient,s=overplotLinFit(x[inRegion],y[inRegion],format='-',color='grey') ## #labelFunction(x,y,ids,xoff=0) ## for ir,region in enumerate(regs): ## savefigall(fDir+fname+'-logx-%d'%(1+ir))#(len(regs)-ir))#,transparent=transparent) ## setp(fitsByRegion.values(),'visible',False) ## #region=regs[len(regs)-ir-1] ## #setp(fitsByRegion.values(),'visible',False) ## setp(fitsByRegion[region],'visible',True) ## setp(hLbyRegion[region],'visible',True) ## ###setp(fitsByRegion[region],'visible',False) ## savefigall(fDir+fname+'-logx-%d'%(1+len(regs)))#(len(regs)-ir))#,transparent=transparent) ## setp(fitsByRegion.values(),'visible',False) ## fitsByRegion['all'],gradient,s=overplotLinFit(x,y,format='-',color=[0.5,0.5,0.5]) ## savefigall(fDir+fname+'-logx-%d'%(2+len(regs))) ## setp(fitsByRegion.values(),'visible',False) ## savefigall(fDir+fname+'-logx-%d'%(3+len(regs))) ## #savefig(fDir+fname+'_logx.png',transparent=transparent) ## #savefig(fDir+fname+'_logx.pdf',transparent=transparent) ## #savefig(fDir+fname+'_t.png',transparent=transparent) scatterPlotByRegion=cpblScatterPlot from math import modf, floor def quantile(x, q, qtype = 7, issorted = False): """ I want something like this but which admits weights for the data! Args: x - input data q - quantile qtype - algorithm issorted- True if x already sorted. Compute quantiles from input array x given q.For median, specify q=0.5. References: http://reference.wolfram.com/mathematica/ref/Quantile.html http://wiki.r-project.org/rwiki/doku.php?id=rdoc:stats:quantile Author: Ernesto P.Adorio Ph.D. UP Extension Program in Pampanga, Clark Field. """ if not issorted: y = sorted(x) else: y = x if not (1 <= qtype <= 9): return None # error! # Parameters for the Hyndman and Fan algorithm abcd = [(0, 0, 1, 0), # inverse empirical distrib.function., R type 1 (0.5, 0, 1, 0), # similar to type 1, averaged, R type 2 (0.5, 0, 0, 0), # nearest order statistic,(SAS) R type 3 (0, 0, 0, 1), # California linear interpolation, R type 4 (0.5, 0, 0, 1), # hydrologists method, R type 5 (0, 1, 0, 1), # mean-based estimate(Weibull method), (SPSS,Minitab), type 6 (1, -1, 0, 1), # mode-based method,(S, S-Plus), R type 7 (1.0/3, 1.0/3, 0, 1), # median-unbiased , R type 8 (3/8.0, 0.25, 0, 1) # normal-unbiased, R type 9. ] a, b, c, d = abcd[qtype-1] n = len(x) g, j = modf( a + (n+b) * q -1) if j < 0: return y[0] elif j > n: return y[n] j = int(floor(j)) if g == 0: return y[j] else: return y[j] + (y[j+1]- y[j])* (c + d * g) def quantileTest(): x = [11.4, 17.3, 21.3, 25.9, 40.1, 50.5, 60.0, 70.0, 75] for qtype in range(1,10): print qtype, quantile(x, 0.35, qtype) def transAnnotation(comments,pos=None): """ I think this is waiting to find the algorithm for optimal-positioning. [oct 2011] I suppose in the mean time you could just use it as a shorthand for putting things in corners? But it's hardly worth functionalising it. Just copy the code! """ leftB,rightB=plt.xlim() botB,topB=plt.ylim() dX=rightB-leftB dY=topB-botB plt.gca().text(rightB-.025*dX,topB-.025*dY,waves[wave]+'\n'+r'$\pm$2 s.e. shown'+0*('Chris Barrington-Leigh\n McGill'), ha='right', va="top", size=14, bbox=dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9)) def transLegend(figlegend=False,comments=None,removeOldLegends=True,showOnly=None,title=None,loc='best',titlesize=None): # Make a legend and, if it's not empty, set some transparency properties. """ Started out simply making a translucent legend (somewhat safely). If comments (extra lines in legend) supplied, then return value gives them too in a tuple. Comments come AFTER evverything else. The title gives stuff to go before the main content of the legend.. removeOldLegends gets rid of one if there's already May 2010: how the hell do i restrict which objects get listed in the legend??????????? it says this is not supported?!?! showOnly takes a possibly nested list of objects. only these are shown.. Hm. But what about the order?! damn! Can I use this purely to make annotations? Or should I have a separate one to do that? how about transLegend(showOnly=[],comments='hello') Hm, no. I should just make a box when there's nothing but annotations, so that the annotations can start flush left. Not done. See boxstyle below... Sept 2010: Figured out how to add comments nicely!!! So edit above notes [oh? i've not got comments only done yet.] So, title goes on top; comments go below. Need to do some error checking here to deal with pylab bugs and fragility: e.g. if tex mode not set, warnings ..? April 2011: Agh! I've broken my own work. showOnly=None: should work like legend(), using the best guess (all) of labelled objects. showOnly=[]: should avoid showing any legend entries. This just should produce a comment box...? Annotations: you can set loc to be a 3-vector of x,y,h-alignment. n.b. if you want to include handles from more than one axis, eg. axes made with twinx(), just specify the handles with "showonly" """ from pylab import xlim,ylim,flatten,gca import os if 1: #os.uname()[2][0]=='3': from pylab import legend lh=legend() lh.get_frame().set_alpha(0.5) return if showOnly==[] and not comments==None: # Just do an annotation!!! # I guess I'm going to need to specify loc for this option. grrr. if titlesize is None: # Should really use textsize, but not imple yet titlesize=20 xl,yl=xlim(),ylim() dx,dy=xl[1]-xl[0],yl[1]-yl[0] if loc in ['northeast','NE','ne','topright']: tlx,tly,ha=xl[1]-.05*dx, yl[1]-.1*dy,'right' # top right tlx,tly,ha=xl[0]+.1*dx, yl[1]-.1*dy,'left' # top left if loc in ['southwest','SW','sw']: tlx,tly,ha=xl[0]+.1*dx, yl[0]+.1*dy,'left' # bottmleft if isinstance(loc,list): assert len(loc)==3 tlx,tly,ha=loc gca().text(tlx, tly, comments, ha=ha, va="center", size=titlesize, bbox=dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9)) print """this is not finished. and no auto placement!. Actually, you shoul djust use a one-line boxed text, e.g.: gca().text(.252,-.13,'Chris Barrington-Leigh\n McGill', ha='left', va="center", size=7, bbox=dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9)) gca().text(.328,-.13,r'$\pm$1 s.e. shown', ha='right', va="center", size=7, bbox=dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9)) """ return() commentsH=[] if comments and isinstance(comments,str): comments=[comments] #if comments: # if isinstance(comments,str): # comments=[comments] # for astr in comments: # commentsH+=plt.plot([xlim()[0],xlim()[0]],[ylim()[0],ylim()[0]],'-',color=[1,1,1],visible=False,label=astr) if 0 and showOnly in [[],None] and commments is None and title is None: print 'No transLegend unless you specify showonly?! or title or comments' return if 0 and showOnly==None: # No, don't do this! legend() makes the best choice, ie of how to deal with groups of lines?? The following gives redundant entries. #import matplotlib.text as text from matplotlib.lines import Line2D from matplotlib.patches import Patch, Rectangle, Shadow, FancyBboxPatch #from matplotlib.collections import LineCollection, RegularPolyCollection, CircleCollection showOnly=plt.gcf().findobj(Line2D)#[h for h in self.legendHandles if isinstance(h, Line2D)]#[xxx for xxx in plt.gcf().findobj(lambda x:hasattr(x, 'label')) if xxx not in plt.gcf().findobj(text.Text)] if showOnly not in [[],None]: from cpblUtilities import flattenList showOnly=[xx for xx in flatten(showOnly) if xx.get_label()]+[xx for xx in flatten(commentsH) if xx.get_label()] showLabels=[oob.get_label() for oob in showOnly] if 0 and removeOldLegends: # Possibly I could actually delete the object using o.remove() plt.gca().legend_ = None # Remove existing legend plt.draw( ) # Remove existing legend if figlegend: 1/0 # No, don't use that. maybe I can move it outside the axes if desired, though. lh=plt.figlegend(fancybox=True,shadow=False,loc=loc) else: # June 2010: Bug in pylab: loc=0 fails with log y axis? well, not in trivial cases. hm. if 0 and loc==None: loc=0 if showOnly is None: lh=plt.legend(fancybox=True,shadow=False,title=title,loc=loc) elif showOnly not in [[]]: # # Feb 2011: need? horrific bloody kludge here to eliminate envelopes. matplotlib bug?? #for ixx,xx in enumerate(showOnly): # lh=plt.legend([xx],[showLabels[ixx]])#,fancybox=True,shadow=False,title=title,loc=loc)#loc=0, lh=plt.legend(showOnly,showLabels,fancybox=True,shadow=False,title=title,loc=loc)#loc=0, else: print """Do not know yet how to make a legend without lines (ie benefit from auto placement), and so I don't have a comment-only function as far as I can tell.""" assert not showOnly is None # Not sure this is right. apr2011 # The following does nothing. This is broken. I posted April 2011 on google groups return() lh=plt.legend(plt.gcf(),[],[],fancybox=True,shadow=False,title=title,loc=loc)#loc=0, if lh: lh.get_frame().set_alpha(0.5) if comments: from matplotlib.offsetbox import TextArea, VPacker fontsize=None if lh: fontsize=lh.get_texts()[0].get_fontsize() # else!! Warning: if lh==None, there were no lines plotted.. hmm. legendcomment=TextArea('\n'.join(comments), textprops=dict(size=fontsize)) lh._legend_box = VPacker(pad=5, sep=0, children=[lh._legend_box,legendcomment], align="left") # Or should it be centre? lh._legend_box.set_figure(plt.gcf()) plt.draw() # Do i need this? if lh and title and titlesize: fontsize=lh.get_texts()[0].get_fontsize() plt.setp(plt.getp(lh,'title'),'fontsize',titlesize) if comments: return(lh,commentsH) else: return(lh) def getIndexedColormap(cmap_name,N): """ This will return a sequence of RGBA values, actually an Nx4 ndarray. I don't think the inclusion of the 4th column hurts anything, but obviously you can use indexing to remove it if you want to. The last line in the function would change to ...(as below) """ import numpy as np import matplotlib.cm as cm cmap = cm.get_cmap(cmap_name, N) return cmap(np.arange(N))[:,:-1] def labelAbscissaSurveyYears(surveys,yGood=None,years=None,addQuebecHistory=False,clearOld=True): """ This is rather too specialised to be in this module, but where else? to make sure I find it easily. Mark cycle of GSS or other survey on a plot for which x scale is year (time). years could be figured out (if None) from survey name yGood is either a vector of y values the same length as surveys, or a boolean vector of same length. It is used to show only surveys that have good data. if addHistory, it ONLY adds history notes to the x axis """ from pylab import ylim # Label cycles if years==[]: # Since defaults not programmed here yet. return() assert years # Since defaults not programmed here yet. # Ensure yGood is boolean vector; may have come as floating, ie the y data for each *possible* year/survey. if yGood==None: yGood=[True for yy in years] if not all([yy==True or yy==False for yy in yGood]): yGood=[isfinite(yy) for yy in yGood] if clearOld: oos=plt.findobj(plt.gca(),lambda o:o.get_label()=='labelAbscissaSurveyYears') while oos: # This is a bit awkward. It's just to deal with duplicates! Weird. oos[-1].remove() # FINALLY!! I have found out how to remove an object from a plot!! oos=plt.findobj(plt.gca(),lambda o:o.get_label()=='labelAbscissaSurveyYears') if not addQuebecHistory: for ixx,xx in enumerate(years): # this just uses years of last province! if yGood[ixx]: plt.text(xx,min(ylim()),surveys[ixx].replace('GSSp','GSS'),horizontalalignment='center',verticalalignment='bottom',rotation=90,label='labelAbscissaSurveyYears') if addQuebecHistory: """ For presentation: Should also label the folllowing (as another overlay version!! Make rest of figure fade, and overlay arrows... """ history=[[1965,'1960s Quiet Revolution'], [1970,'October Crisis'], [1977,'Bill 101'], [1980,'Referendum on sovereignty-association'], [1982,'Canada Act Constitution'], [1987,'Meech Lake Accord ...'], [1992,'... to Charlottetown Accord'], [1995,'Second referendum on sovereignty'], ] for ayear,aevent in history: plt.text(ayear,min(ylim()),aevent,horizontalalignment='center',verticalalignment='bottom',rotation=90) # I THINK FOLLOWING NOT USED ANYWHERE. PYGREP AND TRASH IT. def empiricalcdf(data, method='Hazen'): """Return the empirical cdf. [copied from scipi...] hih? trash Methods available: Hazen: (i-0.5)/N Weibull: i/(N+1) Chegodayev: (i-.3)/(N+.4) Cunnane: (i-.4)/(N+.2) Gringorten: (i-.44)/(N+.12) California: (i-1)/N Where i goes from 1 to N. """ i = np.argsort(np.argsort(data)) + 1. N = len(data) method = method.lower() if method == 'hazen': cdf = (i-0.5)/N elif method == 'weibull': cdf = i/(N+1.) elif method == 'california': cdf = (i-1.)/N elif method == 'chegodayev': cdf = (i-.3)/(N+.4) elif method == 'cunnane': cdf = (i-.4)/(N+.2) elif method == 'gringorten': cdf = (i-.44)/(N+.12) else: raise 'Unknown method. Choose among Weibull, Hazen, Chegodayev, Cunnane, Gringorten and California.' return cdf def sortDictsIntoQuantiles(dicts,sortkey,weightkey,nQuantiles=None,approxN=None): """ Generate quantile groups for data with sample weights. The data should be in the (inefficient!) format of a list of dicts. CPBL, 4 August 2010 Aug 2010: Also, add a ranking for each respondent into her dict. This seems to be a curious beast... I'm going to put the lowest at 0 and the highest at 1. Yet I'm going to distribute the rest according to weights....... hmm dicts: list of dicts. Each dict contains one respondent (sample), with named variables and their values. Should all have identical format. Horribly inefficient data structure! Could maybe offer an alternative, where these variables have been vectorized! Indeed, all I need is y and w, and then this could return a dict of indices rather than sorting... So the behaviour should be quite different depending on whether first argument is a list of dicts or a dict of named vectors (not yet done). sortkey: the variable whose order is being ranked into quantiles weightkey: sample weights nQuantiles=None: Either nQuantiles or approxN or neither can be specified. This tells how many quantiles to calculate. They'll be evenly spaced from 0 to 1. approxN=None: nQuantiles can be calculated based on how many samples there are, and requiring that roughly approxN or more samples are going to be in each quantile (ie would be if weights were uniform). In August 2010 I made another CDF-related function, to calculate non-parametric confidence intervals. """ assert approxN==None or nQuantiles==None finiteSubset=[dd for dd in dicts if isfinite(dd[sortkey])] finiteSubset.sort(key=lambda x: x[sortkey]) y=array([respondent[sortkey] for respondent in finiteSubset]) w=array([respondent[weightkey] for respondent in finiteSubset]) if approxN==None: approxN=20 if nQuantiles==None: # Restrict number of quantiles to be between 2 and 25 nQuantiles=max(2,min(25,floor(len(y)/approxN))) #nQuantiles=min(25,floor(len(y)/approxN)) pQtl=(1.0+1.0*arange(nQuantiles))/nQuantiles assert len(pQtl)==nQuantiles assert all(isfinite(w)) CDF=np.cumsum(w)*1.0/sum(w) pQtl=(1.0*arange(nQuantiles))/nQuantiles # Now assign respondents into the pQtl quantiles categories (ie dict: byQtl={} used=len(finiteSubset) for qq in pQtl[::-1]: iQ=find(CDF[:used]>=qq) for irespondent in iQ: byQtl[qq]=[finiteSubset[iir] for iir in iQ] used=min(iQ) # Also, assign to each respondent (sample) a ranking, ranging from 0 to 1, but taking into account weights (a bit weird?) """ I guess the way to do that is actually the same as above, except use n_unique(y) as the number of bins? Algorithm: assign rank to each, from 0 to 1, ignoring that some y values are the same. Then average over those with common y values. (1) Stretch the CDF to go from 0 to 1: """ CDFscaled=CDF/(max(CDF)-min(CDF)) CDFscaled=CDFscaled-min(CDFscaled) """ (2) Now average rank over duplicates in y """ from scipy import stats keyname='rank'+sortkey[0].upper()+sortkey[1:] for isample,asample in enumerate(finiteSubset): asample[keyname]=CDFscaled[isample] from dictTrees import dictTree byY=dictTree(finiteSubset,[sortkey]) for kk in byY: sameY=byY[kk] if len(sameY)>1: for dd in sameY: # WHat!!!!!!!!! there is no weighted standard error of sample mean with sample weights in scipy???? Following needs weights!!!!11 #debugprint( ' WHat!!!!!!!!! there is no weighted standard error of sample mean with sample weights in scipy???? Following needs weights!!!!11') dd[keyname]=np.mean([aresp[keyname] for aresp in sameY]) # NO! THIS WOULD NOT BE VERY INFORMATIVE OF ANYTHING... dd['se'+keyname]=stats.sem([aresp[keyname] for aresp in sameY]) return(byQtl) def scaledHist(rawData,bins=None,histtype=None,weights=None,color=None,label=None): """ I find it hard scale a histogram so its max value is 1. So make a function to do this using a kludge. So normed arg not allowed in this call, unlike for hist() """ if weights==None: weights=array([1.0 for xx in rawData]) [a,b]=plt.histogram(rawData,bins=bins,weights=weights) weights=weights/max(a*1.0) return(plt.hist(rawData,bins=bins,histtype=histtype,weights=weights,color=color,label=label)) def analyseErrorDistributionNonParametric(errorList,weights=None,twoSidedP=[.68,.95,.99],visualise=False): """ Pretty specific at the momen; can be generalised. Returns a set of confidence intervals for two-sided ranges in an empirical disteibution. #####NOPE! of errors. ie it's specific to "errors" only in that I'm interested in deviations around zero. .eg. values for p=.9 igves the the 5th and 95th quantile estimates. """ assert weights==None # No one has written a weighted quantile function?! Ah, use rpy hmisc's wtd.quantile. Or port it. errorList.sort() outset={} for pp in twoSidedP: outset[pp]=[quantile(errorList, (1.0-pp)/2.0,issorted=True),quantile(errorList, 0.5+pp/2.0,issorted=True)] if visualise: plt.close('all') # debug!!!!! figure(876) plotPDFandCDF(errorList,label='error/E',CDFlabel='hm',color='g',normColor='r',absCDF=True,weights=weights) subplot(211) for kk in outset: plot(outset[kk],[kk,kk],'rx') return(outset) def plotPDFandCDF(yy,label=None,CDFlabel=None,color=None,normColor=None,absCDF=True,weights=None): """ For now, this assumes zero-centred distribution of data, and plots PDF on top and CDF on bottom axes of current figure. It adds Gaussian curve fits by default, and generates labels but leaves the legend for someone else to call, since it can be used multiple times on the same axis. It ignores/hides long tails! How to do this? Hm, at moment by looking at only the middle 75% of the range? or by looking at only 3 sigma, whicever is smaller. It does not allow weights yet!!!! At the moment, the CDF, unlike the PDF, displays the distribution of absolute values! So this is not the integral of the PDF. This is appropriate for look at a distribution of errors, my first application. CPBL Aug 2010 """ from pylab import sqrt import scipy from scipy.stats import distributions assert absCDF==True and weights==None # Missing features! yy=array(sorted(finiteValues(yy))) # Find width of core of distribution: def noTailsStdDev(yyy): """ just use the inner 2 sigma to fit a Gaussian Assume mean zero! Actually, also look at inner 75%ile, and if that's smaller, take it, since sigma must be driven by hug outliers. """ tailsLim=plt.std(yyy) sigmaFit=plt.std(yyy[find(abs(yyy)<2.0*tailsLim)]) # Check alternative method: n=len(yyy) if n>10: yyy.sort() sigmaFit=min(sigmaFit,plt.std(yyy[int(n*0.125):int(n*.875)])) return(sigmaFit) # Assume zero-centred! fitSig=noTailsStdDev(yy) xx=arange(-5*fitSig,5.0*fitSig,fitSig/500.0) # For plotting fits tailsLim=fitSig*5.0 bins=arange(-tailsLim,tailsLim,tailsLim/sqrt(len(yy))) # for histogram absbins=arange(0,2.0*tailsLim,2.0*tailsLim/sqrt(len(yy))) # for CDF of abs(yy) # PDF subplot(211) scaledHist(yy[find(abs(yy)>> data = range(1,5) >>> data [1, 2, 3, 4] >>> np.average(data) 2.5 >>> np.average(range(1,11), weights=range(10,0,-1)) 4.0 """ if not isinstance(a, np.matrix) : a = np.asarray(a) if a.size==0: # Added by CPBL, Sep2010 return(nan) elif weights is None : avg = a.mean(axis) scl = avg.dtype.type(a.size/avg.size) else : a = a + 0.0 wgt = np.array(weights, dtype=a.dtype, copy=0) # Sanity checks if a.shape != wgt.shape : if axis is None : raise TypeError, "Axis must be specified when shapes of a and weights differ." if wgt.ndim != 1 : raise TypeError, "1D weights expected when shapes of a and weights differ." if wgt.shape[0] != a.shape[axis] : raise ValueError, "Length of weights not compatible with specified axis." # setup wgt to broadcast along axis wgt = np.array(wgt, copy=0, ndmin=a.ndim).swapaxes(-1,axis) scl = wgt.sum(axis=axis) if (scl == 0.0).any(): raise ZeroDivisionError, "Weights sum to zero, can't be normalized" avg = np.multiply(a,wgt).sum(axis)/scl if returned: scl = np.multiply(avg,0) + scl return avg, scl else: return avg def LambertW(z): import numpy from math import log,sqrt,exp """debugprint( "Should in new version get: scipy.special.lambertw")""" " Lambert W function, principal branch " if z.__class__ in [numpy.ndarray]: return(array([LambertW(zz) for zz in z])) if z.__class__ in [list]: return([LambertW(zz) for zz in z]) eps=4.0e-16 em1=0.3678794411714423215955237701614608 if z<-em1: print >>stderr,'LambertW.py: bad argument %g, exiting.'%z exit(1) if 0.0==z: return 0.0 if z<-em1+1e-4: q=z+em1 r=sqrt(q) q2=q*q q3=q2*q return\ -1.0\ +2.331643981597124203363536062168*r\ -1.812187885639363490240191647568*q\ +1.936631114492359755363277457668*r*q\ -2.353551201881614516821543561516*q2\ +3.066858901050631912893148922704*r*q2\ -4.175335600258177138854984177460*q3\ +5.858023729874774148815053846119*r*q3\ -8.401032217523977370984161688514*q3*q if z<1.0: p=sqrt(2.0*(2.7182818284590452353602874713526625*z+1.0)) w=-1.0+p*(1.0+p*(-0.333333333333333333333+p*0.152777777777777777777777)) else: w=log(z) if z>3.0: w-=log(w) for i in xrange(10): e=exp(w) t=w*e-z p=w+1.0 t/=e*p-0.5*(p+1.0)*t/p w-=t if abs(t)>> print(lm_D9.names) [1] "coefficients" "residuals" "effects" "rank" [5] "fitted.values" "assign" "qr" "df.residual" [9] "contrasts" "xlevels" "call" "terms" [13] "model" And so is extracting a particular element: >>> print(lm_D9.rx2('coefficients')) (Intercept) groupTrt 5.032 -0.371 or >>> print(lm_D9.rx('coefficients')) """ robjects.r(''' f <- function(x,y, verbose=FALSE) { if (verbose) { cat("I am calling f().\n") } 2 * pi * r } f(3) ''') """# aha!! A quanitle loess will give me what I want for sigmas!! just show an envelope of interquartile range!! # fitting the Quantile LOESS source("http://www.r-statistics.com/wp-content/uploads/2010/04/Quantile.loess_.r.txt") QL <- Quantile.loess(Y = Ozone.2, X = Temp.2, the.quant = .95, window.size = 10, window.alignment = c("center")) points(QL$y.loess ~ QL$x, type = "l", col = "green") legend("topleft",legend = c("95% Quantile regression", "95% Quantile LOESS"), fill = c("red","green")) """ def linearColormapLookup(cmap,zs): import numpy as np import matplotlib.cm as cm def make_N_colors(cmap_name, N): cmap = cm.get_cmap(cmap_name, N) return cmap(np.arange(N))[:,:-1] cmapLU=make_N_colors('jet',100) from scipy import interpolate return(interpolate.interp1d(np.linspace(min(zs),max(zs),num=100),array(cmapLU).T)) # oh shit... I already had that: def getIndexedColormap(cmap_name,N): """ This will return a sequence of RGBA values, actually an Nx4 ndarray. I don't think the inclusion of the 4th column hurts anything, but obviously you can use indexing to remove it if you want to. The last line in the function would change to ...(as below) """ import numpy as np import matplotlib.cm as cm cmap = cm.get_cmap(cmap_name, N) return cmap(np.arange(N))[:,:-1] def assignColormapEvenly(cmap,zs,asDict=False,missing=[1,1,1]):#None):# # Stupid bloody basemap/pylab can't colormap grayscale fill colours?! """ Nice: Use a colormap, supplied rather explicitly (tough to get in pylab!?), to evenly cover the given range of data. In default mode, returns a lookup table of values to RGB colour list. (Is "floor" correct?) See also (much nicer?) def plot_biLinearColorscale(rgbBotMidTop,datavals) in cpblUtilitiesMath yuck. this should instead return an function, using interp. Now it does! Also, matplotlib.cm has lots of utility functions that I'm not using!! What? What does evenly mean?? Can I just look at the end points and spread the values out linearly? Or was this supposed to spread them out evenly across the colours? Ahh... I think the latter. So I should rever htis function to a much older version and make a new linear one. """ import pylab szs=deepcopy([zz for zz in zs if not isnan(zz)]) szs.sort() assert asDict assert cmap # Ie (above): revert this to old version, or retire it... if cmap is None: fooo cmap=tonumeric([LL.strip().split('\t') for LL in open('mcool.tsv','rt').readlines()])[::16] from pylab import jet cmap=jet() #colormap(jet) ccs=[cmap[int(floor(iz*len(cmap)*1.0/len(szs)))] for iz in range(len(szs))] if asDict:#deprecatd: return(dict(zip(szs+[pylab.nan],ccs+[missing]))) else: # Return a lookup function!! from scipy import interpolate return(interpolate.interp1d(array(szs).T,array(ccs).T)) # A lookup function for z values! def plot_biLinearColorscale(rgbBotMidTop,datavals): """ Specify the bottom, middle, and top RGB values of a colormap. Specify the linear extremes of the data. So this should produce a colorbar with the given map and tickmarks for the given data extremes, mapped linearly to the colormap. I also have an old def assignColormapEvenly(cmap,zs,asDict=True,missing=[1,1,1]) in mapMaster Also, matplotlib.cm has lots of utility functions that I'm not using!! """ if not rgbBotMidTop: # Offer a default example: green red rgbBotMidTop=[[0.5,0.0,0.0],[1.0,1.0,.9],[0.0,0.5,0.0]] rgbB,rgbM,rgbT=rgbBotMidTop #=[[rgb1],[],[]] """ cdict = {'red':((0.0, .5, .5), (0.5, 1.0, 1.0), (1.0, 0.0, 0.0)), 'green': ((0.0, 0.0, 0.0), (0.5, 1.0, 1.0), (1.0, 0.5, 0.5)), 'blue': ((0.0, 0.0, 0.0), (0.5, .9, .9), (1.0, 0.0, 0.0))} """ cdict = {'red':((0.0, rgbB[0],rgbB[0]), (0.5, rgbM[0],rgbM[0]), (1.0, rgbT[0],rgbT[0])), 'green': ((0.0, rgbB[1],rgbB[1]), (0.5, rgbM[1],rgbM[1]), (1.0, rgbT[1],rgbT[1])), 'blue': ((0.0, rgbB[2],rgbB[2]), (0.5, rgbM[2],rgbM[2]), (1.0, rgbT[2],rgbT[2])) } import numpy as np #import pylab as plt from pylab import * import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.colors import LinearSegmentedColormap greenred= LinearSegmentedColormap('TwoLinear', cdict) plt.register_cmap(name='TwoLinear', data=cdict) # optional lut kwarg fig=plt.figure(1) plt.clf() #cnorm = mpl.colors.Normalize(vmin=min(datavals),vmax=max(datavals)) minD,maxD=min(datavals),max(datavals) hi=plt.imshow([[minD,maxD],[maxD,minD]], interpolation='nearest',cmap='TwoLinear') hax=gca() #plt.colormap('GreenRed') setp(hax,'visible',False) #cb1 = mpl.colorbar.ColorbarBase(mpl.colorbar.make_axes(hax,pad=0)[0], cmap=greenred,norm=cnorm,orientation='vertical',format='%d')#,ticks=range(len(allCohorts)))#uniqu hcb=plt.colorbar(aspect=6) #norm=cnorm, # Also return a lookup function that can be used with interpolation to give colour for any country value: import scipy.interpolate as sp print [minD,1.0/2.0*sum(minD+maxD),maxD] print [array(rgbB),array(rgbM),array(rgbT)] return(fig,sp.interpolate.interp1d( [minD,1.0/2.0*sum(minD+maxD),maxD], [array(rgbB),array(rgbM),array(rgbT)], axis=0) )