#!/usr/bin/python
from IPython.Debugger import Tracer; debug_here = Tracer()
# Shit. temp don't have it installed!!!
#import rpy2.robjects as robjects 
import os
import re
from copy import deepcopy

#import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl # Not yet used, may 2010
# WHAT!? matplotlib.pyplot is not the same as pylab!!
#import matplotlib.pyplot as plt
import pylab as plt
#from pylab import *
# Why not define these lobally? June 2010
from pylab import figure,plot,ylim,xlim,setp,clf,array,isnan,nan,find,text,isfinite,xlabel,ylabel,title,arange,subplot,gca
NaN=nan


# Defaults only for colours so far:
if 1:
    #import cpblDefaults
    #defaults=cpblDefaults.defaults()
    import cifarColours
    cifar=cifarColours.defcolours()
"""
Solutions for bounding box / whitespace in output figures:
            plt.subplots_adjust(left  = 0.05,right=1-tiny,bottom=0.1,top=1-tiny) # BRILLIANT!!! USE subplot_tool() to find values!


"""

print 'fyi: __See figureFontSetup() for plot settings  (cpblUtilMathGraph)'


#####################################################################################
def overplotLinFit(x,y,format=None,color=None,xscalelog=False):
    """
    March 2010: upgrading it so that the fit line doesn't extrapolate beyond the x,y range of data.
    """
    if format==None:
        format='k--'
    if color==None:
        color=cifar['grey']
    from pylab import plot,array,xlim,any

    if all(isnan(x)):
        return()
    if 0: # Use quick method, no standard errors
        from rpy2 import r

        ls_fit = r.lsfit(x,y)
        gradient = ls_fit['coefficients']['X']
        yintercept= ls_fit['coefficients']['Intercept']
        xr=array([max(min(x),min(xlim())),min(max(x),max(xlim()))])
        yr=[yintercept+gradient*xr[0],yintercept+gradient*xr[1]]
        plot(xr,yr,format,color=color)
        return(gradient)
    


    if xscalelog:
        x=np.log(x)


    if 1: # FIT A LINE TO THE NON-NAN ELEMENTS, OVERPLOT IT
        from scipy import stats as stats
        from pylab import where,isfinite,logical_and
        ii=where(logical_and(isfinite(x),isfinite(y)))
        ( gradient, yintercept, rrr, twoTailedProb, stderr)=stats.linregress(x[ii],y[ii])
        s=stderr
    else:
        import rpy2.robjects as robjects
        rpy = robjects.r
        #import rpy2 as rpy
        rpy.set_default_mode(rpy.NO_CONVERSION)
        if xscalelog:
            x=np.log(x)
        linear_model = rpy.r.lm(rpy.r("y ~ x"), data = rpy.r.data_frame(x=x, y=y))
        rpy.set_default_mode(rpy.BASIC_CONVERSION)
        gradient=linear_model.as_py()['coefficients']['x']
        yintercept=linear_model.as_py()['coefficients']['(Intercept)']
        s=rpy.r.summary(linear_model)['sigma']
        print '  b: ',gradient
        #{'x': 5.3935773611970212, '(Intercept)': -16.281127993087839}
        print '  sigma: ',s
        #{'terms': <Robj object at 0x0089E240>, 'fstatistic': {'dendf': 2.0, 'value': 2.2088097871524752, 'numdf': 1.0}, 'aliased': {'x': False, '(Intercept)': False}, 'df': [2, 2, 2], 'call': <Robj object at 0x0089E340>, 'residuals': {'1': -9.3064376809571137, '3': -6.9622553363545983, '2': 6.3744808050079511, '4': 9.8942122123037599}, 'adj.r.squared': 0.28720941270437206, 'cov.unscaled': array([[ 2.1286381 , -0.42527178], [-0.42527178, 0.09626979]]), 'r.squared': 0.524806275136248, 'sigma': 11.696414461570097, 'coefficients': array([[-16.28112799, 17.06489672, -0.95407129, 0.44073772], [ 5.39357736, 3.62909012, 1.48620651, 0.27556486]])} 

        """
    import rpy2.robjects as robjects
    r = robjects.r
    # Create the data by passing a Python list to RPy2, which interprets as an R vector
    ctl = robjects.FloatVector(x)
    trt = robjects.FloatVector(y)
    group = r.gl(2, 10, 20, labels = ["Ctl","Trt"])
    weight = ctl + trt
    # RPy2 uses Python dictionary types to index data
    robjects.globalEnv["weight"] = weight
    robjects.globalEnv["group"] = group
    # Run the models
    lm_D9 = r.lm("weight ~ group")
    print(r.anova(lm_D9))
    lm_D90 = r.lm("weight ~ group - 1")
    print(r.summary(lm_D90))

    """


    # Limit extent of the plotted line to the x-extent of non-nan points
    xxx=[xx for ix,xx in enumerate(x) if isfinite(xx) and isfinite(y[ix])]
    lxlim=xlim()
    if xscalelog:
        lxlim=log(lxlim)
    xr=array([max(min(xxx),min(lxlim)),min(max(xxx),max(lxlim))])
    yr=[yintercept+gradient*xr[0],yintercept+gradient*xr[1]]
    if xscalelog:
        xr=np.exp(xr)
    aline=plot(xr,yr,format,color=color)
    return(aline,gradient,s)


# The following taken from skipper of pystatsmodels... because it allows for errorbars. so incorporate it into mine, above.
def linfit2D(y, x=None, y_unc=None): 
    import numpy as np 
    import scipy.special as ss 
    """
    Fits a line to 2D data, optionally with errors in y. 

    The method is robust to roundoff error. 

    Parameters 
    ---------- 
    y: ndarray 
        Ordinates, any shape. 
    x: ndarray 
        Abcissas, same shape.  Defaults to np.indices(y.length)) 
    y_unc: ndarray 
        Uncertainties in y.  If scalar or 1-element array, applied 
        uniformly to all y values.  [NOT IMPLEMENTED YET!]  Must be 
        positive. 

    Returns 
    ------- 
    a: scalar 
        0 Fitted intercept 
    b: scalar 
        1 Fitted slope 
    a_unc: scalar 
        2 Uncertainty of fitted intercept 
    b_unc: scalar 
        3 Uncertainty of fitted slope 
    chisq: scalar 
        4 Chi-squared 
    prob: scalar 
        5 Probability of finding worse Chi-squared for this model with 
          these uncertainties. 
    covar: ndarray 
        6 Covariance matrix: [[a_unc**2,  covar_ab], 
                              [covar_ab,  b_unc**2]] 
    yfit: ndarray 
        7 Model array calculated for our abcissas 
    
    Notes 
    ----- 
    
    If prob > 0.1, you can believe the fit.  If prob > 0.001 and the 
    errors are not Gaussian, you could believe the fit.  Otherwise 
    do not believe it. 

    See Also 
    -------- 
    Press, et al., Numerical Recipes in C, 2nd ed, section 15.2, 
    or any standard data analysis text. 

    Examples 
    -------- 
    >>> import linfit 

    >>> a = 1. 
    >>> b = 2. 
    >>> nx = 10 
    >>> x = np.arange(10, dtype='float') 
    >>> y = a + b * x 
    >>> y_unc = numpy.ones(nx) 
    >>> y[::2]  += 1 
    >>> y[1::2] -= 1 
    >>> a, b, sa, sb, chisq, prob, covar, yfit = linfit.linfit(y, x, y_unc) 
    >>> print(a, b, sa, sb, chisq, prob, covar, yfit) 
(1.272727272727272, 1.9393939393939394, 0.58775381364525869, 0.11009637651263605, 9.6969696969696937, 0.28694204178663996, array([[ 0.34545455, -0.05454545], 
       [-0.05454545,  0.01212121]]), array([  1.27272727,   3.21212121,   5.15151515,   7.09090909, 
         9.03030303,  10.96969697,  12.90909091,  14.84848485, 
        16.78787879,  18.72727273])) 

    Revisons 
    -------- 
    2007-09-23 0.1  jh@...	Initial version 
    2007-09-25 0.2  jh@...	Fixed bug reported by Kevin Stevenson. 
    2008-10-09 0.3  jh@...	Fixed doc bug. 
    2009-10-01 0.4  jh@...  Updated docstring, imports. 
    """
    # standardize and test inputs 
    if x == None: 
      x = np.indices(y.length, dtype=y.dtype) 
      x.shape = y.shape 

    if y_unc == None: 
        y_unc = np.ones(y.shape, dtype=y.dtype) 

    # NR Eq. 15.2.4 
    ryu2  = 1. / y_unc**2 
    S     = np.sum(1.    * ryu2) 
    Sx    = np.sum(x     * ryu2) 
    Sy    = np.sum(y     * ryu2) 
    # Sxx = np.sum(x**2  * ryu2) # not used in the robust method 
    # Sxy = np.sum(x * y * ryu2) # not used in the robust method 

    # NR Eq. 15.2.15 - 15.2.18 (i.e., the robust method) 
    t = 1. / y_unc * (x - Sx / S) 
    Stt = np.sum(t**2) 

    b = 1. / Stt * np.sum(t * y / y_unc) 
    a = (Sy - Sx * b) / S 

    covab = -Sx / (S * Stt)                  # NR Eq. 15.2.21 

    sa = np.sqrt(1. / S * (1. - Sx * covab)) # NR Eq. 15.2.19 
    sb = np.sqrt(1. / Stt)                   # NR Eq. 15.2.20 

    rab = covab / (sa * sb)                  # NR Eq. 15.2.22 

    covar = np.array([[sa**2, covab], 
                      [covab, sb**2]]) 

    yfit = a + b * x 
    chisq = np.sum( ((y - yfit) / y_unc)**2 ) 

    prob = 1. - ss.gammainc( (y.size - 2.) / 2., chisq / 2.) 

    return a, b, sa, sb, chisq, prob, covar, yfit 



##############################################################################
##############################################################################
#
def cpblOLS(y,xs,betacoefs=False,rhsOnly=None):
    ##########################################################################
    ##########################################################################
    """
    I want to have my OLS routine:
  - take a list of vectors
  - get heteroskedasticity-robust errors back
  - have option to get standardized beta coeffcients rathr than raw b
  - ignore NaNs
 - take sample weights. (WLS)


  So xs can/should be a dict of named vectors.
Actually, here's my preferred calling format: yname,xnames, dataDict
but dataDict should be able to be a list of dicts or a dict of named vectors.


sep 2010: this failed, ie is incompelte, since I can't see how to do weights with it.
    """
    import numpy as np
    import scikits.statsmodels as sm


    # Not yet flexi calling format:
    assert isinstance(y,str)
    #assert isinstance(x,list)
    #assert isinstance(x[0],str)
    assert isinstance(xs,dict)
    dataDict=xs
    Y=dataDict.pop(y) # Remove y from the data to be used for x.
    if not rhsOnly:
        rhsOnly=dataDict.keys()#list(set(x.keys())-set([y]))

    # get data

    if betacoefs:
        X = sm.tools.add_constant(np.column_stack([(xs[kk]-np.mean(finiteValues(xs[kk])))/np.std(finiteValues(xs[kk])) for kk in rhsOnly]))
    else:
        X = sm.tools.add_constant(np.column_stack([xs[kk] for kk in rhsOnly]))
    #beta = np.array([1, 0.1, 10])
    #y = np.dot(X, beta) + np.random.normal(size=nsample)

    # run the regression
    results = sm.OLS(Y, X,weights=Y).fit()
    foiu
    # look at the results
    #print results.summary()

    #and look at `dir(results)` to see some of the results
    #that are available
    return(bs,rses)


##############################################################################
##############################################################################
#
def _obselete_use_savefigall_exportPythonFigToFormats(fileName):
    ##########################################################################
    ##########################################################################
    from pylab import savefig
    for ff in ['pdf','png','eps']:
        # Should still develop this to get rid of borders!
        
        savefig(fileName+'.'+ff,format=ff,transparent=True)

##############################################################################
##############################################################################
#
def savefigall(fn,transparent=False,ifany=None,fig=None,skipIfExists=False,pauseForMissing=True,onlyPNG=False,jpeg=False,jpeghi=False,svg=False,pdf=True):
    ##########################################################################
    ##########################################################################
    """
    Like savefig, but saves in multiple formats; make this standard

    April 2010: adding transaprent option for png only. (right now it's automatic for pdf)

    ifany must be an artist subclass taken by matplotlib.findobj(match=)

Do not forget to use cpblStata's
 saveAndIncludeFig(self,figname=None,caption=None,texwidth=None,title=None,onlyPNG=False)
 if you're using a cpbl latex object!

Sept 2010: skipIfExists is a way to skip saving if the file already exists (ie to save time).
    
Sept 2010: Returns written (or existing) file stems.

April 2011: what?! this has evolved to have dissimilar options from my latex class's saveAndIncludeFig() !
      # saveAndIncludeFig(self,figname=None,caption=None,texwidth=None,title=None,onlyPNG=False,rcparams=None,transparent=False):
      # Okay. April 2011: I've hopefully implemented things so can use latex.saveAnd... with the above options.


    """
    if fig==None:
        fig=plt.gcf()

    (root,tail)=os.path.split(fn)
    if not root:
        try:
            from cpblDefaults import defaults, paths
            root=paths['graphics']#'/home/cpbl/rdc/workingData/graphics/'#defaults['workingPath']+'graphics/'#'graphicsPath'
        except:
            root='./'#/home/cpbl/rdc/graphicsOuttest/graphics/'#defaults['workingPath']+'graphics/'#'graphicsPath'
    if root and not root.endswith('/'):
        root+='/'
    from cpblUtilities import str2pathname
    tail=str2pathname(tail) # Get rid of any punctuation in the name.



    if 0:
        print 'haha kludge skipping !!!!!!!!!!!!!!!!!!!!!!!! ',root,tail
        return(root+tail)



    if skipIfExists and  os.path.exists(root+tail+'.png') and  os.path.exists(root+tail+'.pdf'):
        print '   Skipping production of '+root+tail+'(png/pdf) because it already exists [skipIfExists]'
        return(root+tail)

    if ifany:
        if not plt.findobj(match=ifany):
            print '   savefigall: Empty plot (no %s), so not saving %s.'%(str(ifany),root+tail)
            plt.savefig(root+tail+'.png.FAILED',format='png')
            if pauseForMissing:
                plt.show()
                from cpblUtilities import cwarning
                cwarning('   savefigall: Empty plot (no %s), so not saving %s.'%(str(ifany),root+tail))
            return(None)
    
    print 'Saving graphics: '+root+tail+' (+ext)'
    if transparent:
        fig.savefig(root+tail+'.png',transparent=transparent)
    else:
        fig.savefig(root+tail+'.png')
    #savefig(root+tail+'_t.png',transparent=True)
    #savefig(root+tail+'.pdf')
    if not onlyPNG and pdf:
        plt.savefig(root+tail+'.pdf',transparent=transparent)
    if svg:
        plt.savefig(root+tail+'.svg',transparent=transparent)
    if jpeg:
        # jpeg Not supported by pylab!!
        #plt.savefig(root+tail+'.jpeg')#transparent=True)
        os.system('convert '+ root+tail+'.png '+root+tail+'.jpeg')
    if jpeghi:
        os.system('convert -quality 100 '+ root+tail+'.png '+root+tail+'.jpeg')
    return(root+tail)


def xTicksLogIncome_deprecated_USE_xTicksIncome(nticks=7,natlog=False,tenlog=False,kticks=None):
    """
    For any plot in which the abscissa is log10(income) [set log10=True in this case] or log(income) , you can use this to make more readable tick points and labels.

You can specify the income ticks in k$ in kticks.

one of natlog or tenlog should be chose, until 2012 when I can set natlog=True as default, above.

    """
    assert natlog or tenlog
    assert not natlog or not tenlog
    base,flog=10.0,np.log10
    if natlog:
        base,flog=np.e,np.log
        
    xl=pow(base,plt.array(plt.xlim()))/1000
    ones=[ik for ik in range(5,200,1) if ik<=xl[1] and ik>=xl[0]]
    fives=[ik for ik in range(5,200,5) if ik<=xl[1] and ik>=xl[0]]
    tens=[ik for ik in range(10,200,10) if ik<=xl[1] and ik>=xl[0]]
    oo=[ones,fives,tens]
    nn=[abs(len(ooo)-nticks) for ooo in oo]
    choice=oo[[inn for inn in range(len(nn)) if nn[inn]==min(nn)][0]]
    if kticks:
        choice=kticks
    plt.setp(plt.gca(),'xticks',[flog(cc*1000.0) for cc in choice])
    plt.setp(plt.gca(),'xticklabels',[str(cc)+'k' for cc in choice])
    return

def xTicksIncome(nticks=7,natlog=False,tenlog=False,kticks=None,dollarsign=True):
    """
    Oct 2011: generalise above to deal with non-log?
    
    For any plot in which the abscissa is log10(income) [set log10=True in this case] or log(income) , you can use this to make more readable tick points and labels.

You can specify the income ticks in k$ in kticks.

one of natlog or tenlog should be chose, until 2012 when I can set natlog=True as default, above. [not anymore, unless I make a new log=False argument]

    """
    #assert natlog or tenlog
    assert not natlog or not tenlog
    base,flog=10.0,np.log10
    if natlog:
        base,flog=np.e,np.log
    if natlog or tenlog:
        xl=pow(base,plt.array(plt.xlim()))/1000
        ones=[ik for ik in range(5,200,1) if ik<=xl[1] and ik>=xl[0]]
        fives=[ik for ik in range(5,200,5) if ik<=xl[1] and ik>=xl[0]]
        tens=[ik for ik in range(10,200,10) if ik<=xl[1] and ik>=xl[0]]
        oo=[ones,fives,tens]
        nn=[abs(len(ooo)-nticks) for ooo in oo]
        choice=oo[[inn for inn in range(len(nn)) if nn[inn]==min(nn)][0]]
        if kticks:
            choice=kticks
        plt.setp(plt.gca(),'xticks',[flog(cc*1000.0) for cc in choice])
        plt.setp(plt.gca(),'xticklabels',[str(cc)+'k' for cc in choice])
    else: #Linear values: just put a dollar sign and take off the thousands.?
        xl=plt.array(plt.xlim())/1000.0
        ones=[ik for ik in range(5,200,1) if ik<=xl[1] and ik>=xl[0]]
        fives=[ik for ik in range(5,200,5) if ik<=xl[1] and ik>=xl[0]]
        tens=[ik for ik in range(10,200,10) if ik<=xl[1] and ik>=xl[0]]
        oo=[ones,fives,tens]
        nn=[abs(len(ooo)-nticks) for ooo in oo]
        choice=oo[[inn for inn in range(len(nn)) if nn[inn]==min(nn)][0]]
        if kticks:
            choice=kticks
        plt.setp(plt.gca(),'xticks',[cc*1000.0 for cc in choice])
        plt.setp(plt.gca(),'xticklabels',[r'\$'*dollarsign+str(cc)+'k' for cc in choice])
    return


##############################################################################
##############################################################################
#
def stdev(vals):
    ##########################################################################
    ##########################################################################
    """ the available std does not give 0 if there's only one value
    """
    from pylab import std
    if len(vals)>1:
        return(std(vals))
    elif len(vals)>0:
        return(0.)
    else:
        return (float('nan'))

##############################################################################
##############################################################################
#
from numpy import mean # This one deals with no values!, so overwrite python's default mean..
#def mean(vals):
#    ##########################################################################
#    ##########################################################################
#    """ the available mean cannot deal with no values
#    """
#    import pylab
#    if len(vals)>0:
#        return(np.mean(vals))
#    else:
#        return(float('nan'))
        
##############################################################################
##############################################################################
#
def seMean(vals, ses): # A Weighted mean: weighted by standard errors
    # Takes a simple list of estimates and a simple list of their standard errors.
    # Returns an estimate of the scalar weighted mean and its standard error.
    #
    # - COVARIANCE IS IGNORED (ASSUMED ZERO)
    #
    # - NaNs are DROPPED! before taking mean
    ##########################################################################
    ##########################################################################
    #from pylab import mean #import pylab
    from pylab import sqrt,isnan
    if vals and any(vals):
        vals,ses=[v for v in vals if not isnan(v)],[v for v in ses if not isnan(v)]
        meanSE=sqrt(1.0/sum([1.0/ff/ff for ff in ses]))
        meanVals=sum([vals[ic]/ses[ic]/ses[ic] for ic in range(len(vals))])*meanSE*meanSE
    else:
        meanSE=''#[float('nan')]
        meanVals=''#[float('nan')]
    return(meanVals,meanSE)
##############################################################################
##############################################################################
#
def seSum(x=None, sx=None): # This takes a simple sum across x, i.e. x has length >1.
    ##########################################################################
    ##########################################################################
    #from pylab import mean #import pylab
    from pylab import sqrt,array,isnan,any
    f=sum(x)
    sf=sqrt(sum(array(sx)*array(sx)))
    assert not any(isnan(sf))

    return(f,sf)



##############################################################################
##############################################################################
#
def seProduct(x=None,y=None,sx=None,sy=None,covs=None): 
    ##########################################################################
    ##########################################################################
    """
For f=x/y, with s.e.'s of sx and sy, returns f and approx sf...
# COVARIANCE SO FAR IGNORED!

They need to be floats at the moment.
The arguments are mandatory; I'm making them parameters for easier reading of the call.
"""
    #from matplotlib import *
    x=array(x)
    y=array(y)
    sx=array(sx)
    sy=array(sy)

    f=x*y
    sfoverf=sqrt((sx/x)*(sx/x)+(sy/y)*(sy/y))
    sf=sfoverf*abs(f)
    
    if x==0.0 and y==0.0: # How do I calculate se for this?
        sf=sqrt(sx**2+sy**2) # True for any limit of x==y?
    if (sx==0 and x==0) or (sy==0 and y==0):
        f,sf=0,0
    assert isnan(f) or not isnan(sf)
    return(f,sf)
##############################################################################
##############################################################################
#
def seQuotient(x=None,y=None,sx=None,sy=None,covs=None): 
    ##########################################################################
    ##########################################################################
    """
For f=x/y, with s.e.'s of sx and sy, returns f and approx sf...
# COVARIANCE SO FAR IGNORED!

They need to be floats at the moment
"""
    #from matplotlib import *
    x=array(x)
    y=array(y)
    sx=array(sx)
    sy=array(sy)

    f=x/y
    sfoverf=sqrt((sx/x)*(sx/x)+(sy/y)*(sy/y))
    sf=sfoverf*f
    return(f,sf)






def categoryBarPlot(labels,y,skipEmpty=True,sortDecreasing=False,demo=False,labelLoc=None,yerr=None,horiz=True,barColour=None,labelColour=None,grouped=False,stacked=False,animate=False,filename='unnamedCategoryBarPlot',xlabel=None,ylabel=None,title=None,width=None):
    if sortDecreasing:
        print "CAUTION!!!!!!!!!!!!!!!may 2010: sortdecreasing does not sort the labels properly!!!!!!!!!!!!!!!!!!!!!!!!!1"
    """

April 2010: Stupid (bug??). barh does not return handles to the error bars -- it only gives you the rectangles. So I'm writing a new captureLines subroutine.

    Earlier:
    
    Obviously not finished. Jeezus. There's no grouped option. Write a group/stacked bar chart with labels ....

    etc. easy exercise. not sure why not done by others.


Sept 2010: Demo now even fails: on grouped bar plot

horiz: default bar orientation is horizontal. Set to False for vertical bars. Vertical is not yet implemented.


labelLoc modes:
'eitherSideOfZero': put label start or ending on the zero line, depending on whether bar goes pos or neg
'opposite bar or inside': this is the default.
'biggest space': put right/left/inside the bar, whichever has most horizontal space there!

'bestEdge':
'alignLeftAxis':
'eitherSideOfZero':
'fromLeft':
'atRightAxis':
'atRight':
'atLeft':

width: same option as taken by bar() function

    """

    if demo: # Should do a bunch of different formats to demonstrate stacked, errorbars, etc, etc,..
        tableFromXLS=[
        ['', 'USA(SCCBS)', 'Canada (ESC2)', 'Gallup ladder', 'Gallup SWL'],
        ['log(household income)', '0.13', '0.13', '0.34', '0.24'],
        ['visits to family', '0.05', '0.05', '', ''],
        ['number of / contact with close friends', '0.1', '0.06', '', ''],
        ['talk/contact with neighbours', '0.04', '0.05', '', ''],
        ['volunteer groups /memberships', '0.03', '0.01', '', ''],
        ['people can be trusted', '0.08', '0.07', '', ''],
        ['trust neighbours (to return a wallet)', '0.1', '0.07', '', ''],
        ['trust in police (to return a wallet)', '', '0.05', '', ''],
        ['frequency of religious attendance', '0.04', '0.02', '0', '0.03'],
        ['freedom to choose', '', '', '0.08', '0.1'],
        ['perception of corruption (neg)', '', '', '0.09', '0.05'],
        ['donated time', '', '', '0.01', '0.03'],
        ['donated money', '', '', '0.06', '0.05'],
        ['helped a stranger', '', '', '0.03', '0.05'],
        ['safe at night', '', '', '0.03', '0.01'],
        ['importance of religion', '0.03', '0.03', '0', '0.05'],
        ['can count on friends', '', '', '0.09', '0.12'],
        ]

        sccbs=array([tonumeric(tt[1]) for tt in tableFromXLS[1:]])
        esc2=array([tonumeric(tt[2]) for tt in tableFromXLS[1:]])
        ladder=array([tonumeric(tt[3]) for tt in tableFromXLS[1:]])
        swl=array([tonumeric(tt[4]) for tt in tableFromXLS[1:]])
        labels=[tt[0] for tt in tableFromXLS[1:]]



        # Basic demo single bars:
        figure(2)
        categoryBarPlot(labels,swl)#,labelLoc='atRight')#plt.transpose([sccbs,esc2,ladder,swl]))

        figure(5)
        if 0:
            categoryBarPlot(labels,swl,horiz=False)#,labelLoc='atRight')#plt.transpose([sccbs,esc2,ladder,swl]))

        # Grouped
        figure(1)
        categoryBarPlot(labels,plt.transpose([sccbs,esc2,ladder,swl]),grouped=['sccbs','esc2','ladder','swl'])#,labelLoc='atRight')#

        # Animated for presentations
        figure(6)
        yerr=0.2*swl
        categoryBarPlot(labels,swl,yerr=yerr,horiz=True,animate=True)#,labelLoc='atRight')#plt.transpose([sccbs,esc2,ladder,swl]))
        

        return()

    if plt.size(y,0)==len(labels):
        pass
    assert array(y).shape[0]==len(labels) # size(y,0)

    simplevector=len(array(y).shape)==1 # ie not stacked????
    #print 'simplevector',simplevector
    nGroups=len(labels)
    if not simplevector:
        nInGroup=array(y).shape[1]
    assert nGroups==array(y).shape[0]
    if not simplevector:
        if not stacked and not grouped:
            grouped=True
        assert stacked or grouped
        assert not stacked or not grouped

    if skipEmpty and simplevector: # Clean out empty/nan values
        # Not yet made skipEmpty for stacked, grouped.
        ii=plt.where(isfinite(y))
        labels=array(labels)[ii]
        y=array(y)[ii]



    if not barColour and grouped:
        barColour=[cifar['green'],cifar['cyan'],cifar['pink'],cifar['grey'],'r','b','g','k','c','m',cifar['green'],cifar['cyan'],cifar['pink'],cifar['grey'],'r','b','g','k','c','m',cifar['green'],cifar['cyan'],cifar['pink'],cifar['grey'],'r','b','g','k','c','m',cifar['green'],cifar['cyan'],cifar['pink'],cifar['grey'],'r','b','g','k','c','m']

    if not barColour:
        barColour={}
    if isinstance(barColour,dict):
        barColourList=[barColour.get(xx,cifar['cyan']) for xx in labels]
    elif isinstance(barColour,list):
        barColourList=barColour
    else:
        assert 0 # Can't parse barColour


    def unzip(recs):
        return zip(*recs)
    if sortDecreasing and simplevector:
        sss=zip(y,labels)
        sss.sort()# inverse=True  WHY INVERSE FAILES?
        [y,labels]=unzip(sss)


    def str2latexL(ss):
        subs=[
    ['$',r'\$'],
    ['_',r'\_'],
    ['%',r'\%'],
    ['#','\#'],
    ['^','\^'],
    ]
        for asub in subs:
            ss=ss.replace(asub[0],asub[1])
        return(ss)

    ind=arange(len(labels))
    if horiz: # We want to go from top to bottom of page, ie decreasing ordinate
        ind=arange(len(labels)-1,-1,-1)

    if width==None:
        width=1.0
        if labelLoc== 'atRight':
            width=0.8

    # pick TextColour:
    print ' April 2010: set colours porperly'
    if 1: # April 2010!!!!!!!!!!!!!! THIS IS NOT FINISHED YET!!!
        # And choose colour for the text labels:
        if labelColour==None:
            labelColour=[['k'],[cifar['pink']]][int(simplevector)]
        elif isinstance(labelColour,'str'):
            labelColour=labelColour*len(labels)
            #lcolour=labelColour
        else:
            foiuoi
            lcolour=labelColour[ii]
        #return(lcolour)
        if len(labelColour)==1:
            labelColour=labelColour*len(labels)
    htext=[[] for ff in labelColour]

    if simplevector:
        yloffset=width/2 # offset of labels from ind

        if horiz==True:
            if 0: # April 2010: do not do them all at once?? so tht I can animate...
                rects=plt.barh(ind,y,color=barColourList,height=width,xerr=yerr,ecolor=cifar['grey'])#,width=width)

            rects=plt.barh(ind,y,color=barColourList,height=width)
            xlrect=plt.xlim()
            erb=[] # list of errorbar handles
            if yerr in [None]:
                erb=[[]]*len(y)
            else:
                for ii in range(len(yerr)):
                    erb+=[plt.errorbar(y[ii],ind[ii]+yloffset, xerr=yerr[ii],ecolor=cifar['grey'],fmt=None)[1:]]
            plt.xlim(xlrect)
            plot([0,0],plt.ylim(),'k-')

            #,xerr=yerr,ecolor=cifar['grey'])#,width=width)
            """
plt.bar(pos,val, align='center')
erb = plt.errorbar(pos, val, yerr=[np.zeros(len(yerr)), yerr], fmt=None)
erb[1][0].set_visible(False) # make lower error cap invisible
plt.xticks(pos, ('Tom', 'Dick', 'Harry', 'Slim', 'Jim'))
errorbar((0+w/2,1+w/2),V,e,elinewidth=3,ecolor='k',fmt=None,capsize=10, mew=5)

plt.show()

"""


        else:
            rects=plt.bar(ind,y,color=barColourList,width=width,yerr=yerr,ecolor=cifar['grey'])#,width=width) cifar['cyan']

    if not simplevector:

        y=array(y)
        assert len(y.shape)==2
        if grouped:
            width=(1.0/(nInGroup+.8)) # So.. leave 0.8 bar's width spacing between groups
            yloffset=width*(nInGroup/2.0) # This isn't quite right!!
        rects=[]
        for iy in range(nInGroup):
            if yerr:
                rects+=[ plt.barh(ind+(nInGroup-1-iy)*width, y[:,iy], height=width,color=barColourList[iy],xerr=yerr[:,iy],ecolor=cifar['grey'])]
            else:
                rects+=[ plt.barh(ind+(nInGroup-1-iy)*width, y[:,iy], height=width,color=barColourList[iy],ecolor=cifar['grey'])]

    if horiz:
        plt.gca().set_yticklabels([])
    else:
        plt.gca().set_xticklabels([])


    'fromLeft'
    tsize1=12*(min(1,16.0/nGroups))

    if labelLoc in ['opposite bar or inside'] and not simplevector: # Default: Place each label in whichever place has more space: just right or left of the error bar, or inside the bar!!!
        if not horiz:
            from cpblUtilities import cwarning
            cwarning('oh-oh... this not programmed ye!!!!!!')#assert horiz # vert mode not coded yet.. just change xlim to ylim..
        tsize1=0.6*18*(min(1,16.0/nGroups))
        for ii in range(len(ind)):#[1:]:
            if not yerr==None:
                ye=yerr[ii]
            else:
                ye=0.0
            ys=array([yyy for yyy in y[ii] if not isnan(yyy)])
            if len(ys)==0:
                continue

            if all([yy<=0 or isnan(yy) for yy in y[ii]]): # For horizontal mode:
                posparams=sorted([[  min(ys-ye)-xlim()[0], min(ys-ye),'right'],
                    [  -max(ys+ye), 0,'right'],
                    [  xlim()[1], 0, 'left'],
                      ],reverse=True)##.sort()##.sort(key=lambda x:x[0])[0]
            else:
                posparams=sorted([[  xlim()[1]-max(ys+ye), max(ys+ye),'left'],
                    [  min(ys-ye), 0,'left'],
                    [  -xlim()[0], 0, 'right'],
                      ],reverse=True)#.sort()##.sort(key=lambda x:x[0])[0]
            pp=[ppp for ppp in posparams if not isnan(ppp[0]) or 1]

            #pp.sort(reverse=True)
            #print labels[ii],pp
            if not pp or isnan(pp[0][1]):
                assert 0
                pp[0]=[nan,xlim()[1],'right']
            pp=pp[0]
            # Don't convert labels with str2latex, as they may already be converted.
            htext[ii]=text(pp[1],ind[ii]+yloffset,' '+labels[ii],verticalalignment='center',horizontalalignment=pp[2],size=tsize1,color=labelColour[ii])
            if 'number of' in labels[ii]:
                assert 1



    if labelLoc in ['opposite bar or inside'] and  simplevector: # Default: Place each label in whichever place has more space: just right or left of the error bar, or inside the bar!!!
        """
        Jan 2010: this used to be the default. But is there a bug? Maybe my intention was the "biggest space" behaviour, which follows. The only change there was to change an xlim()[0] to xlim()[1]... which looks like a bug or??
        """
        if not horiz:
            cwarning('oh-oh... this not programmed ye!!!!!!')#assert horiz # vert mode not coded yet.. just change xlim to ylim..
        #assert horiz # vert mode not coded yet.. just change xlim to ylim..
        for ii in range(len(ind)):#[1:]:
            if not yerr==None:
                ye=yerr[ii]
            else:
                ye=0.0
            if y[ii]<=0: # For horizontal mode:, bar is to the left
                posparams=sorted([[  (y[ii]-ye)-xlim()[0], y[ii]-ye,'right'],
                    [  -(y[ii]+ye), 0,'right'],
                    [  xlim()[1], 0, 'left'],
                      ],reverse=True)##.sort()##.sort(key=lambda x:x[0])[0]
            else: # Bar is to the right
                posparams=sorted([[  xlim()[0]-y[ii]-ye, y[ii]+ye,'left'], # 
                    [  (y[ii]-ye), 0,'left'],
                    [  -xlim()[0], min([0,y[ii]-ye]), 'right'],
                      ],reverse=True)#.sort()##.sort(key=lambda x:x[0])[0]
            pp=posparams
            #pp.sort(reverse=True)
            # Until pylab allows me to right-pad text with spaces:
            pp=pp[0]
            dpad=(xlim()[1]-xlim()[0])/100.0
            padOffset={'right':-dpad,'left':dpad}[pp[2]]
            htext[ii]=text(pp[1]+padOffset,ind[ii]+yloffset,' '+labels[ii],verticalalignment='center',horizontalalignment=pp[2],size=tsize1,color=labelColour[ii])

    print 'NOT DON HEREE::'
    if 1 and labelLoc in [None,'biggest space'] and  simplevector: # : put right/left/inside the bar, whichever has most horizontal space there!
        if not horiz:
            cwarning('oh-oh... this not programmed ye!!!!!!')#assert horiz # vert mode not coded yet.. just change xlim to ylim..
        #assert horiz # vert mode not coded yet.. just change xlim to ylim..
        if horiz:
            for ii in range(len(ind)):#[1:]:
                if not yerr==None:
                    ye=yerr[ii]
                else:
                    ye=0.0
                if y[ii]<=0: # For horizontal mode:, bar is to the left
                    posparams=sorted([[  (y[ii]-ye)-xlim()[0], y[ii]-ye,'right'],
                        [  -(y[ii]+ye), 0,'right'],
                        [  xlim()[1], 0, 'left'],
                          ],reverse=True)##.sort()##.sort(key=lambda x:x[0])[0]
                else: # Bar is to the right
                    posparams=sorted([[  xlim()[1]-y[ii]-ye, y[ii]+ye,'left'], # 
                        [  (y[ii]-ye), 0,'left'],
                        [  -xlim()[0], min([0,y[ii]-ye]), 'right'],
                          ],reverse=True)#.sort()##.sort(key=lambda x:x[0])[0]
                pp=posparams
                #pp.sort(reverse=True)
                # Until pylab allows me to right-pad text with spaces:
                pp=pp[0]
                dpad=(xlim()[1]-xlim()[0])/100.0
                padOffset={'right':-dpad,'left':dpad}[pp[2]]
                htext[ii]=text(pp[1]+padOffset,ind[ii]+yloffset,' '+labels[ii],verticalalignment='center',horizontalalignment=pp[2],size=tsize1,color=labelColour[ii])
        else:
            print "This is not finished!! won't work"
            for ii in range(len(ind)):#[1:]:
                if not xerr==None:
                    xe=xerr[ii]
                else:
                    xe=0.0
                if x[ii]<=0: # For horizontal mode:, bar is to the left
                    posparams=sorted([[  (x[ii]-xe)-ylim()[0], x[ii]-xe,'right'],
                        [  -(x[ii]+xe), 0,'right'],
                        [  ylim()[1], 0, 'left'],
                          ],reverse=True)##.sort()##.sort(key=lambda x:x[0])[0]
                else: # Bar is to the right
                    posparams=sorted([[  ylim()[1]-x[ii]-xe, x[ii]+xe,'left'], # 
                        [  (x[ii]-xe), 0,'left'],
                        [  -ylim()[0], min([0,x[ii]-xe]), 'right'],
                          ],reverse=True)#.sort()##.sort(key=lambda x:x[0])[0]
                pp=posparams
                #pp.sort(reverse=True)
                # Until pylab allows me to right-pad text with spaces:
                pp=pp[0]
                dpad=(xlim()[1]-ylim()[0])/100.0
                padOffset={'right':-dpad,'left':dpad}[pp[2]]
                htext[ii]=text(pp[1]+padOffset,ind[ii]+yloffset,' '+labels[ii],verticalalignment='center',horizontalalignment=pp[2],size=tsize1,color=labelColour[ii])

    if labelLoc=='bestEdge':
        # Place text at chart edge furthest from zero:
        if xlim()[0]<0 and xlim()[1]>=0:
            if -xlim()[0]>xlim()[1]:
                labelLoc='alignLeftAxis'
            if -xlim()[0]<xlim()[1]:
                labelLoc='alignRightAxis'
    if labelLoc=='alignLeftAxis':
        for ii in range(len(ind)):
            htext[ii]=text(xlim()[0],ind[ii]+width/2,' '+labels[ii],verticalalignment='center',horizontalalignment='left',size=tsize1,color=labelColour[ii])
    if labelLoc=='eitherSideOfZero':
        for ii in range(len(ind)):
            htext[ii]=text(0.0,ind[ii]+width/2,' '+labels[ii],verticalalignment='center',horizontalalignment='left'*(y[ii]<=0)+'right'*(y[ii]>0),size=tsize1,color=cifar['pink'])

    if labelLoc=='fromLeft':
        for ii in range(len(ind)):
            htext[ii]=text(0,ind[ii]+width/2,' '+labels[ii],verticalalignment='center')
    elif labelLoc== 'atRight':
        for ii in range(nGroups):
            htext[ii]=text(max(y[ii]),ind[ii]+yloffset,' '+labels[ii]+' ',verticalalignment='center',horizontalalignment='right',size=tsize1,color=labelColour[ii])
        plt.yticks([])
    elif labelLoc== 'atRightAxis':
        for ii in range(nGroups):
            htext[ii]=text(xlim()[1],ind[ii]+yloffset,' '+labels[ii]+' ',verticalalignment='center',horizontalalignment='right',size=tsize1,color=labelColour[ii])
        plt.yticks([])
    elif labelLoc== 'atLeft':
        plt.yticks(ind+width/2., labels)

    leg=None
    if grouped and 0:
        leg=plt.legend( [rr[0] for rr in rects], grouped)


    if xlabel:
        plt.xlabel(xlabel)
    if ylabel:
        plt.ylabel(ylabel)
    if title:
        plt.title(title)
    # Orgainse all the plot objects to they can be accessed by cateogry:

    byBar=[[rects[ii],htext[ii],erb[ii]] for ii in range(len(y))]
    if animate:
        if animate in [True]:
            animate=range(len(y))
        else:
            from cpblUtilities import orderListByRule
            assert isinstance(animate,list)
            animate=orderListByRule(range(len(y)),animate)
        plt.setp([rects,htext,erb],'visible',False)
        for ii,ia in enumerate(animate):
            plt.setp(byBar[ia],'visible',True)
            savefigall(filename+'-%d'%ii)
            
    return({'legend':leg,'bars':rects,'labels':htext,'errors':erb,'byCategory':byBar})



##########################################################################
##########################################################################
#
fNaN=float('nan')
def tonumeric(lists,to='auto',nokeys=False,cNaN=fNaN,skipkeys=None,doNothing=False):
    #
    ##########################################################################
    ##########################################################################
    """ Just a recursive version of float(), almost.
It converts empty sets to NaNs, empty dicts to {}, empty strings to NaNs.
By default it converts strings to floats.
If nokeys is used, it will not affect keys in dicts, though it will still affect their contents.

2009 August: it now considers strings equal to "." to be null, ie nan.

2010 Feb: now able to deal with arrays (rather than lists) of strings?

2010 March: adding new option "skipkeys" which means not to convert any elements of a dict named with something in this list. Also, "doNothing" means return lists unmodified, useful for internal programming, below , in recursion.
    """
    if not skipkeys:
        skipkeys=[]
    if doNothing:
        return(lists)
    
    if lists.__class__ not in [list,str,unicode, int,float]:
        import numpy
        if isinstance(lists,numpy.ndarray): # Cannot use "not lists" expression for arrays, so rest of function  would break
            return(array([tonumeric(ele,to=to,nokeys=nokeys,cNaN=cNaN) for ele in lists]))

    if not lists and lists.__class__ in [list,str,unicode]: return(cNaN)
    if not lists and isinstance(lists,dict): return({})
    if isinstance(lists,int) or isinstance(lists,float): return(lists)
    if lists.__class__ in [str,unicode]:
        try:
            if lists.strip() in ['.','','NaN','nan']:
                return(cNaN)
            if to=='auto':
                if '.' in lists:
                    to=float
                else:
                    to=int
            if to==float:
                return(float(lists))
            elif to==int:
                if '.' in lists:
                    assert abs(float(lists))>=1 # or 5e-8 would fail here?
                    return(int(float(lists)))
                else:
                    return(int(lists))
        except: #  this catches everything right now. ie returns strings intact if they don't look numeric
            return(lists)
            #return(float('nan'))
    elif isinstance(lists,list):
        return([tonumeric(ele,to=to,nokeys=nokeys,cNaN=cNaN) for ele in lists])
    elif isinstance(lists,dict):
        if nokeys:
            return(dict([ (k,tonumeric(lists[k],to=to,nokeys=nokeys,cNaN=cNaN,doNothing=k in skipkeys )) for k in lists.keys()]))
        else:
            return(dict([(tonumeric(k,to=to,nokeys=nokeys,cNaN=cNaN),tonumeric(lists[k],to=to,nokeys=nokeys,cNaN=cNaN,doNothing=k in skipkeys )) for k in lists.keys() ]))




def labelLine(lines):
    """
    Wrap this around (ie feed it hte output of) any plot or errorbar command. In both cases it will put a label at the right end of the line...
    """
    aline=lines
    while not aline.__class__== matplotlib.lines.Line2D:
        aline=aline[0]
    alabel=aline._label
    text(aline._x[-1],aline._y[-1],alabel,color=aline.get_color())
    return(lines)


def plotWithEnvelope(x,y,yLow,yHigh,linestyle='-',linecolor=None,facecolor=None,alpha=0.5,label=None,lineLabel=None,patchLabel=None,laxSkipNaNsSE=False,laxSkipNaNsXY=False,skipZeroSE=False,ax=None,laxFail=True):
    """ For plotting a line with standard errors at each point...
    This makes a patch object that gives the envelope around the line.

yLow and yHigh are the confidence interval lower and upper points.


uhh.. this could have been done with plt.fill() more compactly! [Done]


you can then make a legend (if you've specified lineLabel or patchLabel on each) using "legend()".

    CPBL, Jan 2010

April 2010: rewrote this with pylab.fill() and pylab.poly_between():  Vastly nicer! than old patch() kludge.

April 2010: Added facility for dealing with NaNs. We just excise them rather than splitting the plot.. (this could be an option)

May 2010: laxSkipNaNs says that rather than requiring all values to be defined for all or none of x,y,yLow,yHigh, the function will allow different subsets of each to be defined. So all that will be plotted are those x values for which all are defined (ie non NaN).  Maybe a better behaviour would be to mark points or something where x,y are defined but not yLow,yHigh...?

August 2010: Huh? No explanation for what laxSkipNaNsSE was supposed to do. if lax..XY is turned on, then lax..SE is forced on.  I think what I want is simply to drop all points with SE==NaN if lax is set, right?
Jan 2011: Maybe, but I should also simply skip the envelope whenever all the se's are nan!

Jan 2011: There exists now a fill_between() in matplotlib... I now use that below?
    """
    from pylab import isnan,isfinite,find,where,logical_and,logical_or,logical_not#,any,sqrt,array

    if linecolor==None and not facecolor==None:
        linecolor=facecolor
    if not linecolor==None and facecolor==None:
        facecolor=linecolor
    if 1: # NO!!! Aug 2010 I need to make this use the next colour in matplotlib's colour sequence... where is that??
        if facecolor==None:
            facecolor='g' 
        if linecolor==None:
            linecolor='g'
    
    if laxSkipNaNsXY: 
        laxSkipNaNsSE=True
        
    if label:
        assert not lineLabel
        assert not patchLabel
        lineLabel=label
        patchLabel=label

    if ax==None:
        ax=plt.gca()

    explainQuit=True
    quitRS='   plotWithEnvelope: Not making any envelope line "%s" '%str(lineLabel)
    if isinstance(x,float) and  isnan(x):
        assert laxFail
        return([],[])
    if not list( x ) and not list( y ) and not list( yLow ) and not list( yHigh):
        print quitRS+' because x and y are empty'
        assert laxFail
        return([],[])
    if all(isnan( x )) and all(isnan( y )) and all(isnan( yLow )) and all(isnan( yHigh)):
        print quitRS+' because x and y are all NaN'
        assert laxFail
        return([],[])
    if any(isfinite( x )) and all( isnan( y )) and all( isnan( yLow )) and all( isnan(yHigh)):
        print quitRS+' because y are all NaN'
        assert laxFail
        return([],[])
    if len(find(isfinite(y)))==1:
        print quitRS+" because there's only one y point to plot!"
        assert laxFail
        return([],[])
    # Deal is NaNs.
    """
What should we allow?? there must be matching between y,yLow,yHigh. But y need not match x. ?


Aug 2010: I'd like to rewrite this whole thing (?)
Should just have an iGood, and add each criterion one by one; can do asserts and notes when something changes.

"""
    xNaN=set(find(isnan(x)))
    yNaN=set(find(isnan(y)))
    seNaN=set(find(logical_or(logical_not(isfinite(yLow)),logical_not(isfinite(yHigh)))))
    bNoSE=all(logical_or(logical_not(isfinite(yLow)),logical_not(isfinite(yHigh))))
    seZero=set(find(logical_or(yLow==y,yHigh==y)))
    iGood=set(range(len(y)))

    assert len(x)==len(y)
    assert len(x)==len(yLow)
    assert len(x)==len(yHigh)
        
    if laxSkipNaNsXY and not bNoSE: # bNoSE means we should entirely skip the envelope
        iGood=iGood-seNaN
    if skipZeroSE:
        iGood=iGood-seZero
    if laxSkipNaNsXY:
        iGood=iGood-xNaN-yNaN
    else:
        assert xNaN==yNaN
    iGood=sorted(list(iGood))
    assert iGood



    # Ahh! How to subscript with a vector??
    vals=deepcopy([x,y,yLow,yHigh])
    xorig,yorig,yLoworig,yHighOrig=deepcopy( (x,y,yLow,yHigh))
    x,y,yLow,yHigh=[[vvv[ii] for ii in iGood] for vvv in vals]
    #x[iGood],y[iGood],yLow[iGood],yHigh[iGood]



    if 0: # Old stuff, sitll needs to be reincorporated with above!!!!!

        if any(isnan(x)) or any(isnan(y)) or any(isnan(yLow)) or any(isnan(yHigh)):
            na,nb,nc,nd=where(isfinite(x)),where(isfinite(y)),where(isfinite(yLow)),where(isfinite(yHigh))
            if laxSkipNaNsSE:
                ne=where(logical_and(logical_and(isfinite(x),isfinite(y)),logical_and(isfinite(yLow),isfinite(yHigh))))
                if not laxSkipNaNsXY:
                    assert set(na[0])==set(nb[0]) # Still require x and y elements to match NaNs
                if not set(nb[0])==set(nc[0]) or not set(nc[0])==set(nd[0]):
                    print 'Warning: laxSkipNaNs is letting go a mismatch .......'
                # Aug 2010: I think above ifs are wrong! What has it got to do with laxSE?
                iGood=ne[0]
            else:
                assert set(na[0])==set(nb[0])
                assert set(nb[0])==set(nc[0])
                assert set(nc[0])==set(nd[0])
                iGood=nb[0]
            # Is below redundant?!/ ancient?
            assert list(na[0]) not in [[]] # For now, don't deal yet with nan's in x..??

            assert list(iGood) not in [[]] # For now, don't deal yet with nan's in x..??

        # --------------- OLD SECTION ABOVE TO BE REINCORPORATED IN TO FURTHER ABOVE --------------


    assert any(isfinite(x)) and  any(isfinite(y))

    hLine=ax.plot(x,y,linestyle=linestyle,color=linecolor,linewidth=2,label=lineLabel)



    #lseX=list(seX)
    #lseY=list(seY)
    #polyXY=plt.array(zip(seX,seY))###
    # was: 

    if 0: # Obselete now that fill_between exists:
        xs, ys = plt.poly_between(x, yLow, yHigh)
        if bNoSE:
            envelopePatch=None
        else:
            envelopePatch=plt.fill(xs,ys,facecolor=facecolor,alpha=alpha,linewidth=0,label=patchLabel)# edgecolor=None, does not work!! So use linewidth instead?
    if bNoSE:
        envelopePatch=None
    else:
        if patchLabel is not None:
            envelopePatch=plt.fill_between(x, yLow, yHigh,facecolor=facecolor,alpha=alpha,linewidth=0,label=patchLabel)# edgecolor=None, does not work!! So use linewidth instead?
        else:
            envelopePatch=plt.fill_between(x, yLow, yHigh,facecolor=facecolor,alpha=alpha,linewidth=0)# edgecolor=None, does not work!! So use linewidth instead?



    #seY=yLow+yHigh[::-1]
    #seX=x+x[::-1]

    #polyXY=plt.transpose(plt.array([seX,seY]))

    #envelopePatch=mpl.patches.Polygon(polyXY,True,facecolor=facecolor,alpha=alpha,linewidth=0,label=patchLabel)# edgecolor=None, does not work!! So use linewidth instead?
    #ax.add_patch(envelopePatch)

    return(hLine,envelopePatch)


def axisNearlyTight(ax=None):
    from pylab import axis,getp,xlim,ylim,gca,exp,log
    if ax==None:
        ax=gca()
    plt.axes(ax)
    plt.axis('tight') # Sigh... this seems not to work well.
    if getp(ax,'xscale')=='log':
        xr=max(xlim())/min(xlim())
        xlim(min(xlim())/exp(log(xr)/20.0),max(xlim())*exp(log(xr)/20.0))
    else:
        xr=max(xlim())-min(xlim())
        xlim(min(xlim())-xr/20.0,max(xlim())+xr/20.0)
    if getp(ax,'yscale')=='log':
        yr=max(ylim())/min(ylim())
        ylim(min(ylim())/exp(log(yr)/20.0),max(ylim())*exp(log(yr)/20.0))
    else:
        yr=max(ylim())-min(ylim())
        ylim(min(ylim())-yr/20.0,max(ylim())+yr/20.0)


def figureFontSetup():
    """
Set font size settings for matplotlib figures so that they are reasonable for exporting to PDF to use in publications / presentations.....
    """
    params = {#'backend': 'ps',
           'axes.labelsize': 16,
           'text.fontsize': 14,
           'legend.fontsize': 10,
           'xtick.labelsize': 16,
           'ytick.labelsize': 16,
           'text.usetex': True,}
           #'figure.figsize': fig_size}
    plt.rcParams.update(params)
    return(params)


def addSignatureToPlot():
    """
    my translegend can now also add annotations [annotate] at the bottom (or top!?) of the legend.
    But this is nice; you can include newlines etc in the text.
    Could add option for box, and options for placement.
    """
    Dx=max(xlim())-min(xlim())
    Dy=max(ylim())-min(ylim())
    th=plt.text(max(xlim())-Dx/100.0,min(ylim())+Dy/50.0,'C. Barrington-Leigh\nMcGill');
    plt.setp(th,'fontsize',7,'verticalalignment','bottom','horizontalalignment','right')
    return(th)

if __name__ == '__main__':
    from cpblUtilities import *    
    categoryBarPlot([],[],demo=True)
    show()


##########################################################################
##########################################################################
#
class Position:
    #
    ##########################################################################
    ##########################################################################
    """ This is about as much GIS as I really need here: calculate great circle distance between two points on a sphere. This is done through the subtraction operation (-) for this clas. """

    def __init__(self, longitude, latitude):
        import math
        'init position with longitude/latitude coordinates'
        llx = math.radians(longitude)
        lly = math.radians(latitude)
        self.x = math.sin(llx) * math.cos(lly)
        self.y = math.cos(llx) * math.cos(lly)
        self.z = math.sin(lly)

    def __sub__(self, other):
        import math
        'get distance in km between two positions'
        d = self.x * other.x + self.y * other.y + self.z * other.z
        if d < -1:
            d = -1
        if d > 1:
            d = 1
        km = math.acos(d) / math.pi * 20000.0
        return km

    #def __mul__(self, other): # Scalar multiply?
    #    import math
    #    assert isinstance(other,float)
    #    return(self.__init(self.x*other,self.y*other))

    # Yikes! This needs to be able to return lat, lon, etc.. much more needed.





##########################################################################
##########################################################################
#    This function is meant to absorb scatterPlotSEs, eventually.
def cpblScatterPlot(
# Common arguments:
x,y,
# Deprecated:
fn=None,
# arguments for by-region group-coloured plots:
ids=None,xlab=None,ylab=None,fname=None,fig=None,xlog=False,byRegion=None,labelFunction=None,labelFunctionArgs=None,fitEachRegion=False,regionOrder=None,legendMode=None,markersize=None,markerMode=None,subsetIndices=None,forceUpdate=True,         
# arguments for error-bar plots (from scatterPlotSE, not yet incorporated, but holding space....)
xse=None,yse=None,labels=None,nearlyTight=True,color=None): #markersize=10
    #
    ##########################################################################
    ##########################################################################
    """ 
Feb 2010: Adding "byRegion" option: pass the region name for each country

March 2010: Moved to cpblUtilitiesMathGraph from regressionsGallup (may need generalising!?)
    # So this is for Gallup...

Hmmm. trying to generalise, so:

    if labelFunction(x,y,ids,color=None, labelFunctionArgsDict=None) is specified, it will be used to label nations within each region.


    fitEachRegion: turn this on to add a temporary regression line to each subsample, and one to the overall set.

    March 2010: Getting rid of logx and making an xlog, ie just a flag that the x value is a log value already.  If so, this is noted in the filename and  reflected in the axis scaling. Prior to now, this function used to make both non-log and log versions whenever the defunct logx was True.


N.B.!!!!! I now have two scatter plot functions. One that makes SEs and one that does it by regions. I am in the process of combining these. There will be ACTIONMODE to do different things, in some cases, depending on what is passed.

LAlignMode:
nnMLB
MLT

April 2010: Automatically choose this based on density of points in four quadrants? Argh, am I reinventing the auto legend placements??
So far I haven't done the following at the beginning... hmmm. (elementwise logic)
    xx=x[find(~isnan(x) & ~isnan(y))]
    yy=x[find(~isnan(x) & ~isnan(y))]


September 2010: added z=None. z data will be represented in symbol size, at least for the currently-displayed region...  At first, z data will need to be in the actual right units of size, not some arbitrary scale. well, maybe it should be renamed o be symbolsize (yes) for now.

markerMode: This will specify various behaviour about when/whether dots or scaled-size scatterplot symbols are shown. Default behaviour for starters (Sept 2010):  when symbolSize data are provided, they are only displayed for each region when it's being highlighted. ie simple dots are shown the rest of the time. Also, the symbolsizes are just relative, and are all rescaled. Also, they are rescaled for EACH GROUP, not for the whole set. Note tht markerMode can be a list of strings to specify various settings.
Here are some options
['hiddenInGlobalView',  # Agh.. no, right now this doesn't work, cause i want hidden in dots view but not hidden in final, with lines and annotations view.
'rescaledByGroup' or 'rescaledGlobal' or 'absoluteScale']
]
I also need to make a "labelsMode" or get the country labels working somehow.

Hm, did I know about scatter() when I wrote this?

Sept 2010: forceUpdate=True: By default, this function always recreates plots. But if forceUdpate=False, it will skip actually saving the plots if it already exists!

Sept 2010: TO DO!!! incorporate the cpblUtilitiesLabelPlacement code!
In general, it seems the label placement is so customised/kludged, it may be better to just use the labelfunction as much as possible...


Sept 2010: Now returns a list of filenames (or stems) ...?

Sep 2010:  This used to capitalise axis labels in the byregion mode.  I'm taking that off. Can put it back in as an option, but then need to not capitalise stuff inside $...$. !

    """
    from cpblUtilities import uniqueInOrder
    import pylab as plt
    from pylab import xlim,ylim,arrow,errorbar,figure,savefig,setp,getp,ion,show,array,gca,log10

    if fig==None:
        fig=figure()
    else:
        figure(fig)
    if ylab==None:
        ylab=''
    if xlab==None:
        xlab=''
    def dummyLabelFunction(x,y,ids,args=None,color=None):
        return([])
    if labelFunction==None:
        labelFunction=dummyLabelFunction


    if xlog==True:
        xlog='-logx'
    else:
        assert xlog in [False, None]
        xlog=''
    if fn is not None:
        assert fname is None
        fname=fn
        print " WARNING! USE OF fn IS DEPRECATED IN CPBLSCATTERPLOT. USE FNAME"
    fDir=None
    if not fname is None:
        fDir,fname= os.path.split(fname)
    if not fDir:
        from cpblDefaults import paths
        fDir=paths['graphics']#'/home/cpbl/gallup/graphicsOut/'
    else:
        fDir+='/'
    if markersize in [None]:
        markersize=10 # This is needed for s.e. version..
        if 0:  #Agh, no! The following fails in one of the kinds of scatter plots! (se)
            markersize=[None for _ in x]
        assert markerMode in [None]
    #if markersize.__class__ in [int,float]:
    #    markersize=array([markersize for xx in x])

    if markerMode in [None]:
        markerMode=['hiddenInGlobalView','rescaledByGroup']
    assert not ('rescaledByGroup' in markerMode and  'rescaledGlobally' in markerMode)
    assert not ('absoluteScale' in markerMode and ('rescaledByGroup' in markerMode or  'rescaledGlobally' in markerMode))



    ACTIONMODE=[]
    if xse is not None or yse is not None:
        ACTIONMODE+=['errorbars']
    if byRegion:
        ACTIONMODE+=['by region']
    if not ACTIONMODE:
        ACTIONMODE+=['errorbars']

    outfns=[] #Collect list of graphics files written.

    def safelen(stuff): # return len if it's not in [None, [], array([])]:
        if stuff in [None,[]]:
            return(0)
        if stuff.__class__ in [int,float]:
            return(1)
        return(array(stuff).size)# isinstance(a, numpy.ndarray)

        
    assert len(x)==len(y)
    origLen=len(x)
    x=array(x)
    y=array(y)
    if 'by region' in ACTIONMODE:
        assert len(x)==len(ids)
        ids=array(ids)
    if subsetIndices not in [None]:#.any():
        x=x[subsetIndices]
        y=y[subsetIndices]
        if 'by region' in ACTIONMODE:
            byRegion=array(byRegion)[subsetIndices]
        if safelen(ids):
            ids=ids[subsetIndices]
        if safelen(markersize)==origLen:
            markersize=array(markersize)[subsetIndices]
        if xse is not None:
            xse=xse[subsetIndices]
        if yse is not None:
            yse=yse[subsetIndices]
    if isinstance(labels,dict): # So labels is a lookup by ids. Convert to a list.
        assert safelen(ids)
        labels=[labels[anid] for anid in ids]
    if safelen(labels):
        assert not labelFunction or labelFunction  == dummyLabelFunction
    if not labelFunction  == dummyLabelFunction:
        assert safelen(ids)

    if 'errorbars' in ACTIONMODE:
        """ This is my Sept 2010 kludge to start merging these functions."""

        """
        def _scatterPlotSE(x,y,xse,yse,fname=None,labels=None,nearlyTight=True,color=None,markersize=10):


        NearlyTight sets the xlim to just slightly bigger than tight.


    N.B.!!!!! I now have two scatter plot functions. One that makes SEs and one that does it by regions.
    Shouldn't I combine these????
    yes. They're now both in cpblScatterPlot()

        $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        $ BEGIN:  Scatter plot with STANDARD ERRORS
        $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        """

        if safelen(labels):##labels and labels.size:
            labels=array(labels)
        else:
            labels=array([])

        if not fname:
            fnameTitle='test'
        try: # June 2011: Not sure why fnameTitle is not available now...
            fnameTitle
        except:
            print '       caution: using '+fname+' in place of fnameTitle (undefined) (loc2)'
            fnameTitle=fname

        figureFontSetup()


        fign=    plt.mod(sum([ord(f)^2 for f in fnameTitle]),1000) 
        fig = plt.figure(fign)
        #plt.setp(fig,'figheight',11.0125,'figwidth', 8.1125)
        fig.clear()
        ax = fig.add_subplot(111)

        #LL=0.1*(max(xlim())-min(xlim()))*min([sss for sss in ses if not isnan(sss)])

        if color==None:
            color='k'

        plabels,bplabels,perrorbars=[],[],[]
        secolor='0.85' # ie a shade of gray.  secolor='gray'
        # If xse, yse =None,Non, following will not barf, but nothing plotted
        if isinstance(color,list):
            for ixx,xx in enumerate(x):
                perrorbars+=ax.errorbar(xx,y[ixx],yerr=yse[ixx],xerr=xse[ixx],fmt='o',ecolor=secolor,markersize=10,color=color[ixx],label=labels[ixx])
        else:
            perrorbars+=ax.errorbar(x,y,yerr=yse,xerr=xse,fmt='o',ecolor=secolor,markersize=markersize,color=color)
        adot=ax.plot(x,y,'.',markersize=10,color=color[0])

        rotateLabels=False
        print '+++++++++++++++++++++++++',fnameTitle
        if 0 and "LadderVLadder" in fnameTitle:
            oiuoiu
        if 0 and any(perrorbars):
            setp(adot,'visible',False)
        ## else:
        ##     adot=[]
        ##     for ixx,xx in enumerate(x):
        ##         adot+=ax.plot(xx,y[ixx],'.',markersize=10,label=labels[ixx],color=color[ixx])

        if labelFunction is not dummyLabelFunction:
            plabels=labelFunction(x,y,ids,args=labelFunctionArgs)#,color=rcolors[ir])#,label=aregion)
        elif  safelen(labels):#.size: # ie if not empty
            """
            I can make the labels go along the lines, but I'd need to convert dy and dx to more physical units, since the axes are not in image mode.
            """
            for iix in range(len(x)):#if 1:#len(coefsByCRuid.keys())<20:#postPlotArgs['geoOrder'])<20:
                #geoLabels=dict(provinceNames)
                xi,yi=x[iix],y[iix]
                # Bug in matplotlib: text fails if NaN:
                if isnan(xi) or isnan(yi):
                    continue
                if not isnan(xi) and not isnan(yi):
                    if rotateLabels:
                        plabels+=[plt.text(xi,yi,namesByCRuid[CRuid],rotation=plt.arctan2(dy,dx)*57.0)]
                    else:
                        plabels+=[plt.text(xi+.01,yi,labels[iix]+'   ',rotation='horizontal',horizontalalignment='right',fontsize=14,verticalalignment='center',color=plt.getp(adot[0],'color'))]

                if 'betterLabels':
                    Dx=max(xlim())-min(xlim())
                    Dy=max(ylim())-min(ylim())
                    if xi>= min(xlim())+.8*Dx:
                        offset=-Dx/100.0
                        ha='right'
                    else:
                        offset=Dx/100
                        ha='left'

                    bplabels+=[plt.text(xi+offset,yi+Dy/100.0,labels[iix]+'   ',rotation='horizontal',horizontalalignment=ha,fontsize=14,verticalalignment='center',color=plt.getp(adot[0],'color'))]
                    plt.setp(plabels,'visible',False)


        if rotateLabels:
            plt.axis('image')
                #horizontalalignment='right',fontsize=8,verticalalignment='center',


        #plt.ylabel(yName.upper(),size=20)
        #plt.xlabel(xName.upper(),size=20)

        if 0: # Sorry..!! July 2011 I'm cutting out this kludge from some specific thing?!. oh.. no.. it's a bug in pylab that I'm hoping will disappear at some point??
                # Uhhh... fix an annoying bug in text placed with whitespace at end:
                xr=max(xlim())-min(xlim())
                [label.set_position(label.get_position()+plt.array([-xr/12.0,0])) for label in plabels]

        figure(fign)
        if nearlyTight:
            plt.axis('tight') # Sigh... this seems not to work well.
            xr=max(xlim())-min(xlim())
            yr=max(ylim())-min(ylim())
            xlim(min(xlim())-xr/20.0,max(xlim())+xr/20.0)
            ylim(min(ylim())-yr/20.0,max(ylim())+yr/20.0)


        #return(plabels,[])
        """
    for ic=1:length(names)
        name=names{ic};
        Dx=max(xlim)-min(xlim);
        Dy=max(ylim)-min(ylim);
        if x(ic)>= min(xlim)+.8*Dx,
            offset=-Dx/100; ha='right';
            %offset=Dx/100; ha='left';
        else
            offset=Dx/100; ha='left';%.006
        end % if

        if length(name)>1,
            th=text(x(ic)+offset,y(ic)+.01,name);%[upper(name(1)) lower(name(2:end))]);
            set(th,'fontsize',10,'verticalalignment','middle','horizontalalignment',ha)
            if th,        ths=[ths th]; end %if
        end %if
        fprintf(' Labelling "%s"\n',name);

        if settings.signature,
            th=text(max(xlim)-Dx/100,min(ylim)+Dy/50,'C. Barrington-Leigh, UBC');
            set(th,'fontsize',7,'verticalalignment','bottom','horizontalalignment','right')
        end % if
    end %for

    """

        #_scatterPlotSE(x,y,xse,yse,fname,labels,nearlyTight,color,markersize)
        xlabel(xlab)
        ylabel(ylab)
    if 'by region' in ACTIONMODE:
        """
        $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        $ BEGIN:  Scatter plot with REGION GROUPS
        $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        """
        # These are hardcoded or Gallup, so need fixing/parameterising for general case!
        limsDefault={
        'SWLylim':(2,8.5),
        'INCFRACxlim':  [0.15,12],
        'INCFRACxlimLOG':(0.007,2),
        'POWERxlimLOG':(.2,10.5),
            }
        lims=limsDefault

    ##         if 'INCOME' in xlab.upper() and 'FRACTION' in xlab.upper():
    ##             xlim(INCFRACxlimLOG)
    ##         if 'POWER PER CAPITA' in xlab.upper():
    ##             xlim(POWERxlimLOG)
    ##         if 'SATISFACTION' in ylab.upper():
    ##             ylim(SWLylim)


        clf()
        adot=plot(x,y,'.',hold=True)
        if xlog:
            setp(gca(),'xscale','log')
        axisNearlyTight()
        if 'SATISFACTION' in ylab.upper():
            ylim(lims['SWLylim'])
        if xlog:
            if 'INCOME' in xlab.upper() and 'FRACTION' in xlab.upper():
                xlim(lims['INCFRACxlimLOG'])
            if 'POWER PER CAPITA' in xlab.upper():
                xlim(lims['POWERxlimLOG'])
            if 'SATISFACTION' in ylab.upper():
                ylim(lims['SWLylim'])
                #INCFRACxlim=  [0.15,12]


        hDbyRegion,hLbyRegion,fitsByRegion,bigMarkersByRegion={},{},{},{}
        """'cifar['cyan']'# 'CIFARdarkgreen' 'cifar['green']'# 'cifar['grey']' 'CIFARlightblue' 'CIFARpalekhaki' 'cifar['pink']'# 'CIFARyellow'
        """
        rcolors=['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar]+['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar]+['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar]+['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar]

        if regionOrder:
            regs=regionOrder
        else:
            regs=uniqueInOrder([kk for kk in byRegion if not (isinstance(kk,float) and isnan(kk))], drop= ['98','99','']+['nan']+[nan])
            regs=uniqueInOrder( byRegion, drop= ['98','99','']+['nan']+[nan,NaN])#dropNaN=True)
        #assert regs or not uniqueInOrder(byRegion)
        assert not len(regs)==0


        # Now evaluate where to put legend based on density of points:
        xx=x[find(~isnan(x) & ~isnan(y))]
        yy=y[find(~isnan(x) & ~isnan(y))]
        if xlog:
            LL=xx<=sqrt(xlim()[0]*xlim()[1])
        else:
            LL=xx<=mean(xlim())
        TT=yy>= mean(ylim())
        quadrants=[len(find(LL * TT)), len(find((~ LL) * TT)),
                   len(find(LL * (~ TT))), len(find((~ LL) * (~ TT))), ]
        wquadrant=[0,0,
                   0,0]
        # Automatically choose legendMode:
        assert not legendMode # unused now.
        if not legendMode:
            LAlignMode='auto'
            iLA=sorted( enumerate(quadrants),key=lambda z:z[1])[0][0] #['LLT','RRT', 'LLB','RRB']
        if xlog:
            xspace=pow(xlim()[1]/xlim()[0],(1.0/50))
            xbase=[min(xlim())*xspace,     max(xlim())/xspace,
                   min(xlim())*xspace,   max(xlim())/xspace  ]
        else:
            xspace=(max(xlim())-min(xlim()))/50.0
            xbase=[min(xlim())+xspace,     max(xlim())-xspace,
                   min(xlim())+xspace,   max(xlim())-xspace,  ]
        yspace=(max(ylim())-min(ylim()))/50.0
        ybase=[max(ylim())-yspace, max(ylim())-yspace,
               min(ylim())+yspace, min(ylim())+yspace ]
        ' 0.32 was for the Gallup incomes/SWL plots...  ie 7 regions??'

        yincrement=array([-0.32, -0.32,
                    +0.32, +0.32])/.32*(max(ylim())-min(ylim()))/2.0/len(uniqueInOrder(regs))
                     #+0.32, +0.32])/.32*(max(ylim())-min(ylim()))/15.0
        horizontalalignment=['left', 'right',
                             'left', 'right']
        verticalalignment=['top','top',
                             'bottom','bottom']
        for ir,aregion in enumerate(regs):
            inRegion=find(array(byRegion) == aregion)
            #if lastMarkers:
            #    plt.setp(lastMarkers,'visible',False)#fitsByRegion[aregion]        
            hDbyRegion[aregion]= plt.plot(x[inRegion],y[inRegion],'.',hold=True,color=rcolors[ir],label=aregion)

            if safelen(markersize)==safelen(x) and any(isfinite(array(markersize)[inRegion])): # This if line excludes Nones and NaNs , respectively.
                z=array(markersize)[inRegion]#array(markersize)[inRegion]
                #if z.__class__ in [int,float]:
                    #z=array([markersize for xx in x])
                if 'rescaledByGroup' in markerMode:
                    aa,bb=min(z),max(z)
                    z=((z/bb)-(aa/bb)+.1)*100
                if 'rescaledGlobally' in markerMode:
                    aa,bb=min(markersize),max(markersize)
                    z=((z/bb)-(aa/bb)+.1)*100
                if 'absoluteScale' in markerMode:
                    z=array(markersize[inRegion])
                bigMarkersByRegion[aregion]= plt.scatter(x[inRegion],y[inRegion],s=z,marker='o',hold=True,c=rcolors[ir],label=aregion)
            else:
                print '  Not using large markers here becuase markersize was not specified for each point'
                hDbyRegion[aregion]= plot(x[inRegion],y[inRegion],'.',hold=True,color=rcolors[ir],label=aregion)


            if labelFunction:
                hLbyRegion[aregion]=labelFunction(x[inRegion],y[inRegion],ids[inRegion],args={'xoff':0},color=rcolors[ir])#,label=aregion)
                setp(hLbyRegion[aregion],'label',aregion)
            else:
                if LAlignMode=='auto':
                    hLbyRegion[aregion]+=[text(xbase[iLA],ybase[iLA]+ir*yincrement[iLA],aregion,color=rcolors[ir], verticalalignment=verticalalignment[iLA],horizontalalignment=horizontalalignment[iLA])]
                elif xlog and LAlignMode=='MLB': #middle, left-aligned, bottom
                    hLbyRegion[aregion]+=[text(sqrt(xlim()[0]*xlim()[1]),min(ylim())+yspace+ir*.32,aregion,color=rcolors[ir])]
                elif not xlog and LAlignMode=='MLB': #middle, left-aligned, bottom
                    hLbyRegion[aregion]+=[text(mean(xlim()),             min(ylim())+yspace+ir*.32,aregion,color=rcolors[ir])]
                elif xlog and LAlignMode=='MLT': #middle, left-aligned, top
                    hLbyRegion[aregion]+=[text(sqrt(xlim()[0]*xlim()[1]),max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top')]
                elif not xlog and LAlignMode=='MLT': #middle, left-aligned, top
                    hLbyRegion[aregion]+=[text(mean(xlim()),             max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top')]
                elif xlog and LAlignMode=='RRB': #middle, left-aligned, bottom
                    hLbyRegion[aregion]+=[text(max(xlim())/xspace,min(ylim())+yspace+ir*.32,aregion,color=rcolors[ir],horizontalalignment='right')]
                elif not xlog and LAlignMode=='RRB': #middle, left-aligned, bottom
                    hLbyRegion[aregion]+=[text(max(xlim())-xspace,             min(ylim())+yspace+ir*.32,aregion,color=rcolors[ir],horizontalalignment='right')]
                elif xlog and LAlignMode=='RRT': #middle, left-aligned, top
                    hLbyRegion[aregion]+=[text(max(xlim())/xspace,max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top',horizontalalignment='right')]
                elif not xlog and LAlignMode=='RRT': #middle, left-aligned, top
                    hLbyRegion[aregion]+=[text(max(xlim())-xspace,             max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top',horizontalalignment='right')]
                elif xlog and LAlignMode=='LLT': #left, left-aligned, top
                    hLbyRegion[aregion]+=[text(xspace*min(xlim()),max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top')]
                elif not xlog and LAlignMode=='LLT': #left, left-aligned, top
                    hLbyRegion[aregion]+=[text(xspace+min(xlim()),             max(ylim())-yspace-ir*.32,aregion,color=rcolors[ir],verticalalignment='top')]
                else:
                    assert 0 # Don't know LAlignMode
                    ###print 'aaaaaaaaaaaaaaaaaaaaahhhhhhh this fails on fedora!!!!! '
            if any(isfinite(x[inRegion])) and any(isfinite(y[inRegion])) :#'fails on fedora only':
                fitsByRegion[aregion],gradient,s=overplotLinFit(x[inRegion],y[inRegion],format='-',color=rcolors[ir],xscalelog=xlog)
                assert fitsByRegion[aregion]
            else:
                fitsByRegion[aregion],gradient,s=[],[],[]
        #legend([hLbyRegion[kk][0] for kk in regs])

        #labs=labelFunction(x,y,ids,xoff=0)
        #plot(x,y,'.',hold=True)
        setp(adot,'visible',False)
        del adot
        xlabel(xlab.replace('KW/','kW/')) # .upper()
        ylabel(ylab)  #.upper()
        if 'SATISFACTION' in ylab.upper():
            ylim(lims['SWLylim'])
        from cpblUtilities import flattenList
        #allFits=flattenList([fitsByRegion[kk] for kk in fitsByRegion])
        #print allFits
        setp(fitsByRegion.values(),'visible',False)
        setp(hLbyRegion.values(),'visible',False)
        setp(bigMarkersByRegion.values(),'visible',False)

        #savefigall('/home/cpbl/gallup/graphicsOut/'+fname+'-8')#,transparent=transparent)
        for ir,region in enumerate(regs):
            if fname:
                outfns.append(savefigall(fDir+fname+xlog+'-%d'%(0+ir),skipIfExists=not forceUpdate))#(len(regs)-ir))#,transparent=transparent)
            setp(fitsByRegion.values(),'visible',False)
            #region=regs[len(regs)-ir-1]
            #setp(fitsByRegion.values(),'visible',False)
            setp(bigMarkersByRegion.values(),'visible',False)
            setp(hLbyRegion.values(),'visible',False)
            setp(fitsByRegion[region],'visible',True)
            setp(hLbyRegion[region],'visible',True)
            setp(bigMarkersByRegion.get(region,[]),'visible',True)
            ###setp(fitsByRegion[region],'visible',False)

        if fname:
            outfns.append(savefigall(fDir+fname+xlog+'-%d'%(0+len(regs)),skipIfExists=not forceUpdate))#(len(regs)-ir))#,transparent=transparent)
        setp(fitsByRegion.values(),'visible',False)
        setp(hLbyRegion.values(),'visible',True)
        setp(bigMarkersByRegion.values(),'visible',True)
        if  'fails on fedora only': #It won't anymore, though the python stats linreg causes a warning... obselete?!
            fitsByRegion['all'],gradient,s=overplotLinFit(x,y,format='-',color=[0.5,0.5,0.5],xscalelog=xlog)
        if fname:
            outfns.append(savefigall(fDir+fname+xlog+'-%d'%(1+len(regs)),skipIfExists=not forceUpdate))
        setp(fitsByRegion.values(),'visible',False)
        if fname:
            outfns.append(savefigall(fDir+fname+xlog+'-%d'%(2+len(regs)),skipIfExists=not forceUpdate))
        try: # June 2011: Not sure why fnameTitle is not available now...
            fnameTitle
        except:
            print '       caution: using '+fname+' in place of fnameTitle (undefined) '
            fnameTitle=fname
        title(fnameTitle+xlog)


    if fname and not 'by region' in ACTIONMODE:
        outfns.append(savefigall(fDir+fname,skipIfExists=not forceUpdate))
        title(fname)

    return(outfns)

##     #savefig(fDir+fname+'.png',transparent=transparent)
##     #savefig(fDir+fname+'.pdf',transparent=transparent)

##     # Make log version too?
##     if 'INCOME' in xlab.upper() or logx:
##         clf()
##         dummy=plot(x,y,'.',hold=True)
##         axisNearlyTight()
##         if 'SATISFACTION' in ylab.upper():
##             ylim(SWLylim)
##         if 0:
##             setp(fitsByRegion.values(),'visible',False)
##             setp(hLbyRegion.values(),'visible',False)
##             # "del" below does NOT get rid of them! So do two lines above.
##             del hLbyRegion
##             del fitsByRegion
##         setp(gca(),'xscale','log')
##         axisNearlyTight()
##         if 'INCOME' in xlab.upper() and 'FRACTION' in xlab.upper():
##             xlim(INCFRACxlimLOG)
##         if 'POWER PER CAPITA' in xlab.upper():
##             xlim(POWERxlimLOG)
##         if 'SATISFACTION' in ylab.upper():
##             ylim(SWLylim)
##             #INCFRACxlim=  [0.15,12]


##         hDbyRegion,hLbyRegion,fitsByRegion={},{},{}
##         rcolors=['r','b','g','k','m','c','y']+[cifar[kk] for kk in cifar]
##         ncolor=0
##         x=array(x)
##         y=array(y)
##         ids=array(ids)
##         #regs=[kk for kk in uniqueInOrder(byRegion) if kk not in ['98','99','']]
##         for aregion in regs:
##             inRegion=find(array(byRegion) == aregion)
##             hDbyRegion[aregion]= plot(x[inRegion],y[inRegion],'.',hold=True,color=rcolors[ncolor],label=aregion)
##             hLbyRegion[aregion]=labelFunction(x[inRegion],y[inRegion],ids[inRegion],xoff=0,color=rcolors[ncolor])#,label=aregion)
##             setp(hLbyRegion[aregion],'label',aregion)
##             if 0:#hLbyRegion[aregion]:
##                 hLbyRegion[aregion].set_label(aregion)
##             text(sqrt(xlim()[0]*xlim()[1]),min(ylim())+.1+ncolor*.32,aregion,color=rcolors[ncolor])
##             ncolor+=1
##             fitsByRegion[aregion],gradient,s=overplotLinFit(x[inRegion],y[inRegion],format='-',color='grey')

##         #labelFunction(x,y,ids,xoff=0)


##         for ir,region in enumerate(regs):
##             savefigall(fDir+fname+'-logx-%d'%(1+ir))#(len(regs)-ir))#,transparent=transparent)
##             setp(fitsByRegion.values(),'visible',False)
##             #region=regs[len(regs)-ir-1]
##             #setp(fitsByRegion.values(),'visible',False)
##             setp(fitsByRegion[region],'visible',True)
##             setp(hLbyRegion[region],'visible',True)
##             ###setp(fitsByRegion[region],'visible',False)

##         savefigall(fDir+fname+'-logx-%d'%(1+len(regs)))#(len(regs)-ir))#,transparent=transparent)
##         setp(fitsByRegion.values(),'visible',False)
##         fitsByRegion['all'],gradient,s=overplotLinFit(x,y,format='-',color=[0.5,0.5,0.5])
##         savefigall(fDir+fname+'-logx-%d'%(2+len(regs)))
##         setp(fitsByRegion.values(),'visible',False)
##         savefigall(fDir+fname+'-logx-%d'%(3+len(regs)))


##         #savefig(fDir+fname+'_logx.png',transparent=transparent)
##         #savefig(fDir+fname+'_logx.pdf',transparent=transparent)

##     #savefig(fDir+fname+'_t.png',transparent=transparent)

scatterPlotByRegion=cpblScatterPlot

from math import modf, floor
def quantile(x, q,  qtype = 7, issorted = False):
    """

I want something like this but which admits weights for the data!

    
    Args:
       x - input data
       q - quantile
       qtype - algorithm
       issorted- True if x already sorted.
 
    Compute quantiles from input array x given q.For median,
    specify q=0.5.
 
    References:
       http://reference.wolfram.com/mathematica/ref/Quantile.html
       http://wiki.r-project.org/rwiki/doku.php?id=rdoc:stats:quantile
 
    Author:
	Ernesto P.Adorio Ph.D.
	UP Extension Program in Pampanga, Clark Field.
    """
    if not issorted:
        y = sorted(x)
    else:
        y = x
    if not (1 <= qtype <= 9): 
       return None  # error!
 
    # Parameters for the Hyndman and Fan algorithm
    abcd = [(0,   0, 1, 0), # inverse empirical distrib.function., R type 1
            (0.5, 0, 1, 0), # similar to type 1, averaged, R type 2
            (0.5, 0, 0, 0), # nearest order statistic,(SAS) R type 3
 
            (0,   0, 0, 1), # California linear interpolation, R type 4
            (0.5, 0, 0, 1), # hydrologists method, R type 5
            (0,   1, 0, 1), # mean-based estimate(Weibull method), (SPSS,Minitab), type 6 
            (1,  -1, 0, 1), # mode-based method,(S, S-Plus), R type 7
            (1.0/3, 1.0/3, 0, 1), # median-unbiased ,  R type 8
            (3/8.0, 0.25, 0, 1)   # normal-unbiased, R type 9.
           ]
 
    a, b, c, d = abcd[qtype-1]
    n = len(x)
    g, j = modf( a + (n+b) * q -1)
    if j < 0:
        return y[0]
    elif j > n:
        return y[n]
 
    j = int(floor(j))
    if g ==  0:
       return y[j]
    else:
       return y[j] + (y[j+1]- y[j])* (c + d * g)    
 
def quantileTest():
    x = [11.4, 17.3, 21.3, 25.9, 40.1, 50.5, 60.0, 70.0, 75]
 
    for qtype in range(1,10):
        print qtype, quantile(x, 0.35, qtype)

def transAnnotation(comments,pos=None):
    """ I think this is waiting to find the algorithm for optimal-positioning. [oct 2011]
    I suppose in the mean time you could just use it as a shorthand for putting things in corners?
    But it's hardly worth functionalising it. Just copy the code!
    """
    leftB,rightB=plt.xlim()
    botB,topB=plt.ylim()
    dX=rightB-leftB
    dY=topB-botB
    plt.gca().text(rightB-.025*dX,topB-.025*dY,waves[wave]+'\n'+r'$\pm$2 s.e. shown'+0*('Chris Barrington-Leigh\n                         McGill'), ha='right', va="top", size=14,  bbox=dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9))
    
    

def transLegend(figlegend=False,comments=None,removeOldLegends=True,showOnly=None,title=None,loc='best',titlesize=None): # Make a legend and, if it's not empty, set some transparency properties.
    """
    Started out simply making a translucent legend (somewhat safely).

    If comments (extra lines in legend) supplied, then return value gives them too in a tuple. Comments come AFTER evverything else. The title gives stuff to go before the main content of the legend..

    removeOldLegends gets rid of one if there's already


    May 2010: how the hell do i restrict which objects get listed in the legend??????????? it says this is not supported?!?!
    showOnly takes a possibly nested list of objects. only these are shown.. Hm. But what about the order?! damn!

    Can I use this purely to make annotations? Or should I have a separate one to do that? how about transLegend(showOnly=[],comments='hello')
    Hm, no. I should just make a box when there's nothing but annotations, so that the annotations can start flush left. Not done. See boxstyle below...


Sept 2010: Figured out how to add comments nicely!!! So edit above notes [oh? i've not got comments only done yet.]
So, title goes on top; comments go below.

Need to do some error checking here to deal with pylab bugs and fragility: e.g. if tex mode not set, warnings ..?

    April 2011: Agh! I've broken my own work.
    showOnly=None: should work like legend(), using the best guess (all) of labelled objects.
    showOnly=[]: should avoid showing any legend entries. This just should produce a comment box...?

Annotations:
 you can set loc to be a 3-vector of x,y,h-alignment.

n.b. if you want to include handles from more than one axis, eg. axes made with twinx(), just specify the handles with "showonly"
    """

    from pylab import xlim,ylim,flatten,gca

    import os
    if 1: #os.uname()[2][0]=='3':
        from pylab import legend
        lh=legend()
        lh.get_frame().set_alpha(0.5)
        return


    if showOnly==[] and not comments==None: # Just do an annotation!!!
        # I guess I'm going to need to specify loc for this option. grrr.
        
        if titlesize is None: # Should really use textsize, but not imple yet
            titlesize=20

        xl,yl=xlim(),ylim()
        dx,dy=xl[1]-xl[0],yl[1]-yl[0]
        if loc in ['northeast','NE','ne','topright']:
            tlx,tly,ha=xl[1]-.05*dx,  yl[1]-.1*dy,'right' # top right
        tlx,tly,ha=xl[0]+.1*dx,  yl[1]-.1*dy,'left' # top left 
        if loc in ['southwest','SW','sw']:
            tlx,tly,ha=xl[0]+.1*dx,  yl[0]+.1*dy,'left' # bottmleft 
        if isinstance(loc,list):
            assert len(loc)==3
            tlx,tly,ha=loc
        gca().text(tlx, tly, comments, ha=ha, va="center", size=titlesize,  bbox=dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9))
        print """this is not finished. and no auto placement!. Actually, you shoul djust use a one-line boxed text, e.g.:

                gca().text(.252,-.13,'Chris Barrington-Leigh\n                         McGill', ha='left', va="center", size=7,  bbox=dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9))
        gca().text(.328,-.13,r'$\pm$1 s.e. shown', ha='right', va="center", size=7,  bbox=dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9))
"""


        return()


    commentsH=[]
    if comments and isinstance(comments,str):
        comments=[comments]
    #if comments:
    #    if isinstance(comments,str):
    #        comments=[comments]
    #    for astr in comments:
    #        commentsH+=plt.plot([xlim()[0],xlim()[0]],[ylim()[0],ylim()[0]],'-',color=[1,1,1],visible=False,label=astr)

    if 0 and showOnly in [[],None] and commments is None and title is None:
        print 'No transLegend unless you specify showonly?! or title or comments'
        return
    if 0 and showOnly==None: # No, don't do this! legend() makes the best choice, ie of how to deal with groups of lines?? The following gives redundant entries.
        #import matplotlib.text as text
        from matplotlib.lines import Line2D
        from matplotlib.patches import Patch, Rectangle, Shadow, FancyBboxPatch
        #from matplotlib.collections import LineCollection, RegularPolyCollection,  CircleCollection

        showOnly=plt.gcf().findobj(Line2D)#[h for h in self.legendHandles if isinstance(h, Line2D)]#[xxx for xxx in plt.gcf().findobj(lambda x:hasattr(x, 'label')) if xxx not in plt.gcf().findobj(text.Text)]










    if showOnly not in [[],None]:
        from cpblUtilities import flattenList
        showOnly=[xx for xx in flatten(showOnly) if xx.get_label()]+[xx for xx in flatten(commentsH) if xx.get_label()]
        showLabels=[oob.get_label() for oob in showOnly]

        if 0 and removeOldLegends: # Possibly I could actually delete the object using o.remove()
            plt.gca().legend_ = None         # Remove existing legend
            plt.draw(  )                 # Remove existing legend


    if figlegend:
        1/0 # No, don't use that. maybe I can move it outside the axes if desired, though.
        lh=plt.figlegend(fancybox=True,shadow=False,loc=loc)
    else: # June 2010: Bug in pylab: loc=0 fails with log y axis? well, not in trivial cases. hm.
        if 0 and loc==None:
            loc=0
        if showOnly is None:
            lh=plt.legend(fancybox=True,shadow=False,title=title,loc=loc)
        elif showOnly not in [[]]:
            # # Feb 2011: need? horrific bloody kludge here to eliminate envelopes. matplotlib bug??
            #for ixx,xx in enumerate(showOnly):
            #    lh=plt.legend([xx],[showLabels[ixx]])#,fancybox=True,shadow=False,title=title,loc=loc)#loc=0,
            lh=plt.legend(showOnly,showLabels,fancybox=True,shadow=False,title=title,loc=loc)#loc=0,
                
        else:
            print """Do not know yet how to make a legend without lines (ie benefit from auto placement), and so I don't have a comment-only function as far as I can tell."""

            assert not showOnly is None # Not sure this is right. apr2011
            # The following does nothing. This is broken. I posted April 2011 on google groups
            return()
            lh=plt.legend(plt.gcf(),[],[],fancybox=True,shadow=False,title=title,loc=loc)#loc=0,
    if lh:
        lh.get_frame().set_alpha(0.5)

    if comments:
        from matplotlib.offsetbox import TextArea, VPacker 
        fontsize=None    
        if lh:
            fontsize=lh.get_texts()[0].get_fontsize()
            # else!! Warning: if lh==None, there were no lines plotted.. hmm.
            legendcomment=TextArea('\n'.join(comments), textprops=dict(size=fontsize)) 
            lh._legend_box = VPacker(pad=5, 
                                       sep=0, 
                                       children=[lh._legend_box,legendcomment], 
                                       align="left")  # Or should it be centre?
            lh._legend_box.set_figure(plt.gcf())
            plt.draw() # Do i need this?

    if lh and  title and titlesize:
        fontsize=lh.get_texts()[0].get_fontsize()
        plt.setp(plt.getp(lh,'title'),'fontsize',titlesize)

    if comments:
        return(lh,commentsH)
    else:
        return(lh)


def getIndexedColormap(cmap_name,N):
    """
        This will return a sequence of RGBA values, actually an Nx4 ndarray.  I 
    don't think the inclusion of the 4th column hurts anything, but 
    obviously you can use indexing to remove it if you want to.  The last 
    line in the function would change to ...(as below)
    """
    import numpy as np 
    import matplotlib.cm as cm 
    cmap = cm.get_cmap(cmap_name, N) 
    return cmap(np.arange(N))[:,:-1] 




def labelAbscissaSurveyYears(surveys,yGood=None,years=None,addQuebecHistory=False,clearOld=True):
    """
    This is rather too specialised to be in this module, but where else? to make sure I find it easily.

    Mark cycle of GSS or other survey on a plot for which x scale is year (time).

    years could be figured out (if None) from survey name

    yGood is either a vector of y values the same length as surveys, or a boolean vector of same length. It is used to show only surveys that have good data.

    if addHistory, it ONLY adds history notes to the x axis
    """
    from pylab import ylim
    # Label cycles
    if years==[]:  # Since defaults not programmed here yet.
        return()
    assert years # Since defaults not programmed here yet.
    # Ensure yGood is boolean vector; may have come as floating, ie the y data for each *possible* year/survey.
    if yGood==None:
        yGood=[True for yy in years]
    if not all([yy==True or yy==False for yy in yGood]):
        yGood=[isfinite(yy) for yy in yGood]

    if clearOld:
        oos=plt.findobj(plt.gca(),lambda o:o.get_label()=='labelAbscissaSurveyYears')
        while oos: # This is a bit awkward. It's just to deal with duplicates! Weird.
            oos[-1].remove()  # FINALLY!! I have found out how to remove an object from a plot!!
            oos=plt.findobj(plt.gca(),lambda o:o.get_label()=='labelAbscissaSurveyYears')

    if not addQuebecHistory:
        for ixx,xx in enumerate(years): # this just uses years of last province!
            if yGood[ixx]:
                plt.text(xx,min(ylim()),surveys[ixx].replace('GSSp','GSS'),horizontalalignment='center',verticalalignment='bottom',rotation=90,label='labelAbscissaSurveyYears')

    if addQuebecHistory:
        """ For presentation:  Should also label the folllowing (as another overlay version!! Make rest of figure fade, and overlay arrows...
        """
        history=[[1965,'1960s Quiet Revolution'],
                 [1970,'October Crisis'],
                 [1977,'Bill 101'],
                 [1980,'Referendum on sovereignty-association'],
                 [1982,'Canada Act Constitution'],
                 [1987,'Meech Lake Accord ...'],
                 [1992,'... to Charlottetown Accord'],
                 [1995,'Second referendum on sovereignty'],
                 ]
        for ayear,aevent in history:
            plt.text(ayear,min(ylim()),aevent,horizontalalignment='center',verticalalignment='bottom',rotation=90)


# I THINK FOLLOWING NOT USED ANYWHERE. PYGREP AND TRASH IT.
def empiricalcdf(data, method='Hazen'):
	    """Return the empirical cdf.
            [copied from scipi...]
hih? trash

	    Methods available:
	        Hazen:       (i-0.5)/N
	            Weibull:     i/(N+1)
	        Chegodayev:  (i-.3)/(N+.4)
	        Cunnane:     (i-.4)/(N+.2)
	        Gringorten:  (i-.44)/(N+.12)
	        California:  (i-1)/N
	   
    Where i goes from 1 to N.
	    """
	   
	    i = np.argsort(np.argsort(data)) + 1.
	    N = len(data)
	    method = method.lower()
	    if method == 'hazen':
	        cdf = (i-0.5)/N
	    elif method == 'weibull':
                cdf = i/(N+1.)
	    elif method == 'california':
	        cdf = (i-1.)/N
	    elif method == 'chegodayev':
	        cdf = (i-.3)/(N+.4)
	    elif method == 'cunnane':
	        cdf = (i-.4)/(N+.2)
	    elif method == 'gringorten':
	        cdf = (i-.44)/(N+.12)
	    else:
	        raise 'Unknown method. Choose among Weibull, Hazen, Chegodayev, Cunnane, Gringorten and California.'
	   
	    return cdf

def sortDictsIntoQuantiles(dicts,sortkey,weightkey,nQuantiles=None,approxN=None):
    """
    Generate quantile groups for data with sample weights. The data should be in the (inefficient!) format of a list of dicts.
    CPBL, 4 August 2010

    Aug 2010: Also, add a ranking for each respondent into her dict. This seems to be a curious beast... I'm going to put the lowest at 0 and the highest at 1. Yet I'm going to distribute the rest according to weights....... hmm

dicts: list of dicts. Each dict contains one respondent (sample), with named variables and their values. Should all have identical format. Horribly inefficient data structure!  Could maybe offer an alternative, where these variables have been vectorized! Indeed, all I need is y and w, and then this could return a dict of indices rather than sorting... So the behaviour should be quite different depending on whether first argument is a list of dicts or a dict of named vectors (not yet done).

sortkey: the variable whose order is being ranked into quantiles

weightkey: sample weights

nQuantiles=None: Either nQuantiles or approxN or neither can be specified. This tells how many quantiles to calculate. They'll be evenly spaced from 0 to 1.

approxN=None:  nQuantiles can be calculated based on how many samples there are, and requiring that roughly approxN or more samples are going to be in each quantile (ie would be if weights were uniform).

In August 2010 I made another CDF-related function, to calculate non-parametric confidence intervals.

    """
    assert approxN==None or nQuantiles==None

    finiteSubset=[dd for dd in dicts if isfinite(dd[sortkey])]
    finiteSubset.sort(key=lambda x: x[sortkey])
    y=array([respondent[sortkey] for respondent in finiteSubset])
    w=array([respondent[weightkey] for respondent in finiteSubset])
    if approxN==None:
        approxN=20
    if nQuantiles==None:
        # Restrict number of quantiles to be between 2 and 25
        nQuantiles=max(2,min(25,floor(len(y)/approxN)))
        #nQuantiles=min(25,floor(len(y)/approxN))
    pQtl=(1.0+1.0*arange(nQuantiles))/nQuantiles
    assert len(pQtl)==nQuantiles
    assert all(isfinite(w))
    CDF=np.cumsum(w)*1.0/sum(w)
    pQtl=(1.0*arange(nQuantiles))/nQuantiles
    # Now assign respondents into the pQtl quantiles categories (ie dict:
    byQtl={}
    used=len(finiteSubset)
    for qq in pQtl[::-1]:
        iQ=find(CDF[:used]>=qq)
        for irespondent in iQ:
            byQtl[qq]=[finiteSubset[iir] for iir in iQ]
        used=min(iQ)
  

    # Also, assign to each respondent (sample) a ranking, ranging from 0 to 1, but taking into account weights (a bit weird?)
    """
 I guess the way to do that is actually the same as above, except use n_unique(y) as the number of bins?
Algorithm:
assign rank to each, from 0 to 1, ignoring that some y values are the same. Then average over those with common y values.

(1) Stretch the CDF to go from 0 to 1:
    """
    CDFscaled=CDF/(max(CDF)-min(CDF))
    CDFscaled=CDFscaled-min(CDFscaled)
    """
    (2) Now average rank over duplicates in y
    """
    from scipy import stats
    keyname='rank'+sortkey[0].upper()+sortkey[1:]
    for isample,asample in enumerate(finiteSubset):
        asample[keyname]=CDFscaled[isample]
    from dictTrees import dictTree
    byY=dictTree(finiteSubset,[sortkey])
    for kk in byY:
        sameY=byY[kk]
        if len(sameY)>1:
            for dd in sameY:
                # WHat!!!!!!!!! there is no weighted standard error of sample mean with sample weights in scipy???? Following needs weights!!!!11
                #debugprint( '       WHat!!!!!!!!! there is no weighted standard error of sample mean with sample weights in scipy???? Following needs weights!!!!11')
                dd[keyname]=np.mean([aresp[keyname] for aresp in sameY])
                # NO! THIS WOULD NOT BE VERY INFORMATIVE OF ANYTHING... dd['se'+keyname]=stats.sem([aresp[keyname] for aresp in sameY])

    return(byQtl)

def scaledHist(rawData,bins=None,histtype=None,weights=None,color=None,label=None):
    """
    I find it hard scale a histogram so its max value is 1. So make a function to do this using a kludge.

    So normed arg not allowed in this call, unlike for hist()
    """
    if weights==None:
        weights=array([1.0 for xx in rawData])
    [a,b]=plt.histogram(rawData,bins=bins,weights=weights)
    weights=weights/max(a*1.0)
    return(plt.hist(rawData,bins=bins,histtype=histtype,weights=weights,color=color,label=label))


def analyseErrorDistributionNonParametric(errorList,weights=None,twoSidedP=[.68,.95,.99],visualise=False):
    """
    Pretty specific at the momen; can be generalised. Returns a set of confidence intervals for two-sided ranges in an empirical disteibution.
    #####NOPE! of errors. ie it's specific to "errors" only in that I'm interested in deviations around zero.

    .eg. values for p=.9 igves the the 5th and 95th quantile estimates.
    
    """
    assert weights==None # No one has written a weighted quantile function?! Ah, use rpy hmisc's wtd.quantile. Or port it.
    errorList.sort()
    outset={}
    for pp in twoSidedP:
        outset[pp]=[quantile(errorList, (1.0-pp)/2.0,issorted=True),quantile(errorList, 0.5+pp/2.0,issorted=True)]

    if visualise:
        plt.close('all') # debug!!!!!
        figure(876)
        plotPDFandCDF(errorList,label='error/E',CDFlabel='hm',color='g',normColor='r',absCDF=True,weights=weights)
        subplot(211)
        for kk in outset:
            plot(outset[kk],[kk,kk],'rx')
    return(outset)


def plotPDFandCDF(yy,label=None,CDFlabel=None,color=None,normColor=None,absCDF=True,weights=None):
    """
    For now, this assumes zero-centred distribution of data, and plots PDF on top and CDF on bottom axes of current figure. It adds Gaussian curve fits by default, and generates labels but leaves the legend for someone else to call, since it can be used multiple times on the same axis.
    It ignores/hides long tails! How to do this? Hm, at moment by looking at only the middle 75% of the range? or by looking at only 3 sigma, whicever is smaller.
    It does not allow weights yet!!!!
    At the moment, the CDF, unlike the PDF, displays the distribution of absolute values! So this is not the integral of the PDF. This is appropriate for look at a distribution of errors, my first application.
    
    CPBL Aug 2010
    """
    from pylab import sqrt
    import scipy
    from scipy.stats import distributions
    assert absCDF==True and weights==None # Missing features!
    yy=array(sorted(finiteValues(yy)))
    # Find width of core of distribution:
    def noTailsStdDev(yyy):
        """
        just use the inner 2 sigma to fit a Gaussian
        Assume mean zero!

        Actually, also look at inner 75%ile, and if that's smaller, take it, since sigma must be driven by hug outliers.
        """
        tailsLim=plt.std(yyy)
        sigmaFit=plt.std(yyy[find(abs(yyy)<2.0*tailsLim)])

        # Check alternative method:
        n=len(yyy)
        if n>10:
            yyy.sort()
            sigmaFit=min(sigmaFit,plt.std(yyy[int(n*0.125):int(n*.875)]))
        return(sigmaFit)

    # Assume zero-centred!
    
    fitSig=noTailsStdDev(yy)
    xx=arange(-5*fitSig,5.0*fitSig,fitSig/500.0) # For plotting fits
    tailsLim=fitSig*5.0
    bins=arange(-tailsLim,tailsLim,tailsLim/sqrt(len(yy))) # for histogram
    absbins=arange(0,2.0*tailsLim,2.0*tailsLim/sqrt(len(yy))) # for CDF of abs(yy)

    
    # PDF 
    subplot(211)

    scaledHist(yy[find(abs(yy)<tailsLim)],bins=bins,histtype='step',label=label,color=color)
    fitSig=noTailsStdDev(yy)
    aGauss=distributions.norm.pdf(xx,scale=fitSig)
    plot(xx,aGauss/max(aGauss),color+':',label=r'fit ($\sigma=$%.02f)'%fitSig,hold=True)


    if not normColor==None:
        # A sigma=1 normal curve for comparison
        xxn=arange(-6,6,100)
        aGauss=distributions.norm.pdf(xxn)
        plt.axis('tight')
        plot(xxn,aGauss/max(aGauss),normColor,label=r'normal ($\sigma$=1)')

    ylabel('Relative frequency')
    setp(gca(),'yticklabels',[])
    #xlabel(r'$\varepsilon/\sigma$')

    subplot(212) # CDF of absolute values



    nyy=abs(yy)
    num_bins = len(xx)#sqrt(len(nyy))
    counts, bin_edges = np.histogram(nyy, bins=absbins, normed=True)
    cdf = np.cumsum(counts)  # cdf not normalized, despite above
    scale = 1.0/cdf[-1]
    ncdf = scale * cdf
    plot(bin_edges[1:], ncdf,color,label=CDFlabel,hold=True)

    if not normColor==None:
        xxn=arange(0,6,100)
        plot(xxn, (distributions.norm.cdf(xxn)-0.5)*2.0 ,normColor,label=r'normal ($\sigma$=1)')


    return()


def finiteValues(ay,by=None,cy=None):
    """
Trivial shorthand: return an array of only the elements that are not NaN.
Various error checking missing...

CPBL August 2010

Sept 2010: If a second argument is given, then only elements of two vectors where both are finite are returned!

"""

    if by==None:
        return(array(ay)[find(isfinite(array(ay)))])
    if cy==None:
        ii=plt.logical_and(isfinite(array(ay)),isfinite(array(by)))
        return(array(ay)[ii],array(by)[ii])
    ii=plt.logical_and(    plt.logical_and(isfinite(array(ay)),isfinite(array(by))),   isfinite(array(cy)))
    return(array(ay)[ii],array(by)[ii], array(cy)[ii])




#%%%%%%%%%%%%%%%%% There are no weighted mean, weighted se of mean in numpy?!
# So, use following.
def wtvar(X, W, method = "R"): 
    sumW = sum(W) 
    if X.size==0:
        return(nan)
    if method == "nist": 
        xbarwt = sum([w * x for w,x in zip(W, X)])/sumW # fixed.2009.03.07, divisor added. 
        Np = sum([ 1 if (w != 0) else 0 for w in W]) 
        D = sumW * (Np-1.0)/Np 
        return sum([w * (x - xbarwt)**2 for w,x in zip(W,X)])/D 
    else: # default is R 
        sumW2 = sum([w **2 for w in W]) 
        xbarwt = sum([(w * x) for (w,x) in zip(W, X)])/sumW 
        return sum([(w * (x - xbarwt)**2) for (w,x) in zip(W, X)])* sumW/(sumW**2 - sumW2)

#def wtsem(X, W, method = "R"): 
#    thevar=wtvar(X, W, method = "R")
def wtsem(a, w,axis=0, bootstrap=False): # 
    if not bootstrap:
        #a, axis, w = _chk_asarray(a, axis,w)
        a=np.asarray(a)
        w=np.asarray(w)
        #n = a.count(axis=axis)
        #s = a.std(axis=axis,ddof=0) / ma.sqrt(n-1)
        assert axis==0 # Hm, not done yet.
        s = np.sqrt(wtvar(a,w) / (a.size-1) )
        return s
    else:
        # Bootstrap version?? (not used, just for thought.. waste of space, then.)
        """
        Not clear how to deal with weights when boostrapping. weight sampling based on them? renomralise for each sample draw (done here..)
        """
        from cpblUtilities import *
        w = w / w.sum()
    #Initialize:
        n = 10000
        ma = np.zeros(n)
        ssize=a.size
    #Save mean of each bootstrap sample:
        for i in range(n):
            idx = np.random.randint(0, ssize,ssize)
            sw=w[idx]/w[idx].sum()
            ma[i] = np.dot(a[idx], sw)

    #Estimate of Error in mean:
        #print ma.std(), wtsem(a,w)
        return(ma.std())



# And numpy's average() cannot deal with empty sets!!
def wtmean(a, axis=None, weights=None, returned=False):
    """
    This is numpy.average, augmented to be able to take emtpy sets.
    Return the weighted average of array over the specified axis.

    Parameters
    ----------
    a : array_like
        Data to be averaged.
    axis : int, optional
        Axis along which to average `a`. If `None`, averaging is done over the
        entire array irrespective of its shape.
    weights : array_like, optional
        The importance that each datum has in the computation of the average.
        The weights array can either be 1-D (in which case its length must be
        the size of `a` along the given axis) or of the same shape as `a`.
        If `weights=None`, then all data in `a` are assumed to have a
        weight equal to one.
    returned : bool, optional
        Default is `False`. If `True`, the tuple (`average`, `sum_of_weights`)
        is returned, otherwise only the average is returned.  Note that
        if `weights=None`, `sum_of_weights` is equivalent to the number of
        elements over which the average is taken.

    Returns
    -------
    average, [sum_of_weights] : {array_type, double}
        Return the average along the specified axis. When returned is `True`,
        return a tuple with the average as the first element and the sum
        of the weights as the second element. The return type is `Float`
        if `a` is of integer type, otherwise it is of the same type as `a`.
        `sum_of_weights` is of the same type as `average`.

    Raises
    ------
    ZeroDivisionError
        When all weights along axis are zero. See `numpy.ma.average` for a
        version robust to this type of error.
    TypeError
        When the length of 1D `weights` is not the same as the shape of `a`
        along axis.

    See Also
    --------
    ma.average : average for masked arrays

    Examples
    --------
    >>> data = range(1,5)
    >>> data
    [1, 2, 3, 4]
    >>> np.average(data)
    2.5
    >>> np.average(range(1,11), weights=range(10,0,-1))
    4.0

    """
    if not isinstance(a, np.matrix) :
        a = np.asarray(a)

    if a.size==0: # Added by CPBL, Sep2010
        return(nan)
    elif weights is None :
        avg = a.mean(axis)
        scl = avg.dtype.type(a.size/avg.size)
    else :
        a = a + 0.0
        wgt = np.array(weights, dtype=a.dtype, copy=0)

        # Sanity checks
        if a.shape != wgt.shape :
            if axis is None :
                raise TypeError, "Axis must be specified when shapes of a and weights differ."
            if wgt.ndim != 1 :
                raise TypeError, "1D weights expected when shapes of a and weights differ."
            if wgt.shape[0] != a.shape[axis] :
                raise ValueError, "Length of weights not compatible with specified axis."

            # setup wgt to broadcast along axis
            wgt = np.array(wgt, copy=0, ndmin=a.ndim).swapaxes(-1,axis)

        scl = wgt.sum(axis=axis)
        if (scl == 0.0).any():
            raise ZeroDivisionError, "Weights sum to zero, can't be normalized"

        avg = np.multiply(a,wgt).sum(axis)/scl

    if returned:
        scl = np.multiply(avg,0) + scl
        return avg, scl
    else:
        return avg


def LambertW(z):
  import numpy
  from math import log,sqrt,exp

  """debugprint( "Should in new version get: scipy.special.lambertw")"""
  " Lambert W function, principal branch "
  if z.__class__ in [numpy.ndarray]:
      return(array([LambertW(zz) for zz in z]))
  if z.__class__ in [list]:
      return([LambertW(zz) for zz in z])
  eps=4.0e-16
  em1=0.3678794411714423215955237701614608
  if z<-em1:
    print >>stderr,'LambertW.py: bad argument %g, exiting.'%z
    exit(1)
  if 0.0==z: return 0.0
  if z<-em1+1e-4:
    q=z+em1
    r=sqrt(q)
    q2=q*q
    q3=q2*q
    return\
     -1.0\
     +2.331643981597124203363536062168*r\
     -1.812187885639363490240191647568*q\
     +1.936631114492359755363277457668*r*q\
     -2.353551201881614516821543561516*q2\
     +3.066858901050631912893148922704*r*q2\
     -4.175335600258177138854984177460*q3\
     +5.858023729874774148815053846119*r*q3\
     -8.401032217523977370984161688514*q3*q
  if z<1.0:
    p=sqrt(2.0*(2.7182818284590452353602874713526625*z+1.0))
    w=-1.0+p*(1.0+p*(-0.333333333333333333333+p*0.152777777777777777777777))
  else:
    w=log(z)
  if z>3.0: w-=log(w)
  for i in xrange(10):
    e=exp(w)
    t=w*e-z
    p=w+1.0
    t/=e*p-0.5*(p+1.0)*t/p
    w-=t
    if abs(t)<eps*(1.0+abs(w)): return w


# """ file variance.py tranlator Dr. Ernesto P. Adorio UP at Clark Field. License GNu Free documentation license(wikipedia license) """   def svar(X, method = 0): """ Computes the sample variance using various methods explained in the reference.   method: 0 - basic two-pass, the default. 1 - single pass 2 - compenstated 3 - welford Reference: http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance """ if not(0 < method < 3): method = 0 n = len(X) if method == 0: # basic two-pass. xbar = sum(X) / float(n) return sum([(x - xbar)**2 for x in X])/(n -1.0)   elif method == 1: # single pass. S = 0.0 SS = 0.0 for x in X: S += x SS += x*x xbar = S/float(n) return (SS -n * xbar * xbar) / (n -1.0)   elif method == 2: # compensated algorithm. xbar = mean(X)   sum2 = 0 sumc = 0 for x in X: sum2 = sum2 + (x - mean)^2 sumc = sumc + (x - xbar) return (sum2 - sumc^2/n)/(n - 1)   elif method == 3: # Welford algorithm n = 0 mean = 0 M2 = 0   for x in X: n = n + 1 delta = x - mean mean = mean + delta/n M2 = M2 + delta*(x - mean) # This expression uses the new value return (M2/(n - 1.0))   def weightedsvar(X, W, method = 3): # Computes the weighted variance using weight vector W. # Welford algorithm. n = 0 for x, weight in zip(X,W): if n==0: n = 1 mean = x S = 0 sumweight = weight else: n = n + 1 temp = weight + sumweight S = S + sumweight*weight*(x-mean)^2 / temp mean = mean + (x-mean)*weight / temp sumweight = temp return S * n / ((n-1) * sumweight)   def pvar(X, method = 0): """ Computes the population variance. """ n = len(X) return svar(X, method)*(n-1.0)/ n   def weightedpvar(X, W, method = 3): # Wait a while for the other methods. n = len(X) return weightedsvar(X, W) * (n-1.0)/n   def sdev(X, method = 0): return sqrt(svar(X, method))   def pdev(X, method = 0): return sqrt(pvar(X, method)   # R names for the sample variances and std. deviations. var = svar sd = sdev   Sxx = svar(X) Sxx = svar Sx = sd   def cov(X,Y): # covariance of X and Y vectors with same length. n = len(X) xbar = sum(X) / n ybar = sum(Y) / n return sum([(x-xbar)*(y-ybar) for x,y in zip(X,Y)])/(n-1)   Sxy = cov Sx = sdev   def cor(X,Y): # correlation coefficient of X and Y. return(cov(X,Y)/(Sx(X)*Sx(Y))
# 
# Read more: http://adorio-research.org/wordpress/?p=242#ixzz0z5CKmn1K

def rpy_semipar(XX,y,nonparvars=None):
    """
    NOT WRITTEN YET.
    
    XX is a dict of observations with named-variable vectors!
    y is dependent variable
    nonparvars is a list (or string) with names of nonparametric variables

    library(SemiPar)

        robjects.globalenv["x"]=robjects.FloatVector(x)
    robjects.globalenv["y"]=robjects.FloatVector(y)
    if w is not None:
        robjects.globalenv["w"]=robjects.FloatVector(y)

    """
    fit=robjects.r("""

# CPBL  Jan 2011: agh. I'll use this if it works, but:    The is not a robust procedure. For robust quantile smoothing look at the cobs package on CRAN. Also look at Roger Koencker's package, quantreg, for more quantile smoothing.
    
# This code relies on the rollapply function from the "zoo" package.  My thanks goes to Achim Zeileis and Gabor Grothendieck for their work on the package.
Quantile.loess<- function(Y, X = NULL,
							number.of.splits = NULL,
							window.size = 20,
							percent.of.overlap.between.two.windows = NULL,
							the.distance.between.each.window = NULL,
							the.quant = .95,
							window.alignment = c("center"),
							window.function = function(x) {quantile(x, the.quant)},
							# If you wish to use this with a running average instead of a running quantile, you could simply use:
							# window.function = mean,
							...)
{
	# input: Y and X, and smothing parameters
	# output: new y and x
 
	# Extra parameter "..." goes to the loess
 
	# window.size ==  the number of observation in the window (not the window length!)
 
	# "number.of.splits" will override "window.size"
	# let's compute the window.size:
	if(!is.null(number.of.splits)) {window.size <- ceiling(length(Y)/number.of.splits)}
 
	# If the.distance.between.each.window is not specified, let's make the distances fully distinct
	if(is.null(the.distance.between.each.window)) {the.distance.between.each.window <- window.size}
 
	# If percent.of.overlap.between.windows is not null, it will override the.distance.between.each.window
	if(!is.null(percent.of.overlap.between.two.windows))
		{
			the.distance.between.each.window <- window.size * (1-percent.of.overlap.between.two.windows)
		}
 
 
 
	# loading zoo
	if(!require(zoo))
	{
		print("zoo is not installed - please install it.")
		install.packages("zoo")
	}
 
 
	if(is.null(X)) {X <- index(Y)} # if we don't have any X, then Y must be ordered, in which case, we can use the indexes of Y as X.
 
	# creating our new X and Y
	zoo.Y <- zoo(x = Y, order.by = X)
	#zoo.X <- attributes(zoo.Y)$index
 
	new.Y <- rollapply(zoo.Y, width = window.size,
								FUN = window.function,
								by = the.distance.between.each.window,
								align = window.alignment)
	new.X <- attributes(new.Y)$index
	new.Y.loess <- loess(new.Y~new.X, family = "sym",...)$fitted
 
	return(list(y = new.Y, x = new.X, y.loess = new.Y.loess))
}
















    
    #require(graphics)
    #x <- c(%s)
    # y <- c(%s)
     lo <- loess(y~x,span=%f)# I haven't figured out how to skip the second argument yet. Or I can create a data frame. then add: ,weights)

     #plot(x,y)
    """%(','.join([str(xx) for xx in x]),','.join([str(xx) for xx in y]), kernelwidth) +"""
#     lines(predict(lo), col='red', lwd=2)
# Create a 99-length quantile vector:
#xl <- seq(0.01,.99,.01)
# Or a 100-length vector from x: 

xl <- seq(min(x),max(x), (max(x) - min(x))/%d)"""%(nPts-1)+"""

# Plot result, with data:
#lines(xl, predict(lo,xl), col='red', lwd=2)

fit <- predict(lo,xl)

"""+'\n'.join(["""q%02d <- Quantile.loess(y,x,the.quant=%.2f)
"""%(qq*100.0,qq) for qq in qBands])+"""
""")
#q75 <- Quantile.loess(y,x,the.quant=.75)
#q25 <- Quantile.loess(y,x,the.quant=.25)
#""")



def rpy_loess(x,y,w=None,nPts=None,plotTitle=None,kernelwidth=0.75,qBands=None,OLDFORMAT=False):
    """
    Jan 2011.
    Return loess smoothed values for y(x).
    Return 25th and 75th percentile values (by default), smoothed across x.

    Return loess smoothed standard deviation...? Not sure how to convert this to a standard error.  If I did a boxcar average and the x-points are evenly distributed I might be able to calculate standard error.

    It returns version of the loess for both the original points and 

    2011 April: Added qBands (to generalise from 0.25, .75) and OLDFORMAT. Latter is since I now return just one xvector...

    May 2011: it has started seg faulting a bunch. argh!

It also makes a plot??

UQ: Who told me non-parametric is the way of the future: it was at the reception at some museumish thing with a hummer artwork.
UQ: Ask him how to get loess with s.e.?  [See lpoly!]


    """
    import time
    if qBands is None:
        qBands=[0.25,0.75]
    print ' '+time.asctime()+ ' Initiating loess through R (mean,'+','.join(['%02d%%ile'%(100.0*qq) for qq in qBands])+')...'


#    import rpy2.robjects as robjects
    if nPts is None:
        nPts=100


    if not len(x):
        print 'No x data, but made it to LOESS anyway. Whoops! Returning dummies'
        if OLDFORMAT: # Retired April 2011, ie for gallup compatibility only.
            return(
            dict(xfit=[],yfit=[],
                 q25=dict(x=[],y=[],fit=[]),
                 q75=dict(x=[],y=[],fit=[])
                   )
                )
        else:
            outDict=dict(xfit=[],yfit=[],qFits={})
            if qBands:
                outDict['xq']=[]
            for qq in qBands:
                outDict['y%02d'%(qq*100)]=[]
            return(outDict)

    from cpblUtilities import uniqueInOrder
    if not  len(x) == len(uniqueInOrder(x)): # zoo package in R fails otherwise?
        print '    Adding jitter to create unique ordering of points for rpy loess!!!', plotTitle
        import random
        x=list(array(x)+array([random.random()*1.0e-9 for ii in range(len(x))]))

    #robjects.r(
    '''
       f <- function(r, verbose=FALSE) {
                if (verbose) {
                    cat("I am calling f().\n")
                }
                2 * pi * r
            }
            f(3)
    '''
    #r_f = robjects.globalenv['f']

    #x,y=[1,2,3,4,5],[4,5,6,7,4]
    robjects.globalenv["x"]=robjects.FloatVector(x)
    robjects.globalenv["y"]=robjects.FloatVector(y)
    if w is not None:
        robjects.globalenv["w"]=robjects.FloatVector(y)

    fit=robjects.r("""

# CPBL  Jan 2011: agh. I'll use this if it works, but:    The is not a robust procedure. For robust quantile smoothing look at the cobs package on CRAN. Also look at Roger Koencker's package, quantreg, for more quantile smoothing.
    
# This code relies on the rollapply function from the "zoo" package.  My thanks goes to Achim Zeileis and Gabor Grothendieck for their work on the package.
Quantile.loess<- function(Y, X = NULL,
							number.of.splits = NULL,
							window.size = 20,
							percent.of.overlap.between.two.windows = NULL,
							the.distance.between.each.window = NULL,
							the.quant = .95,
							window.alignment = c("center"),
							window.function = function(x) {quantile(x, the.quant)},
							# If you wish to use this with a running average instead of a running quantile, you could simply use:
							# window.function = mean,
							...)
{
	# input: Y and X, and smothing parameters
	# output: new y and x
 
	# Extra parameter "..." goes to the loess
 
	# window.size ==  the number of observation in the window (not the window length!)
 
	# "number.of.splits" will override "window.size"
	# let's compute the window.size:
	if(!is.null(number.of.splits)) {window.size <- ceiling(length(Y)/number.of.splits)}
 
	# If the.distance.between.each.window is not specified, let's make the distances fully distinct
	if(is.null(the.distance.between.each.window)) {the.distance.between.each.window <- window.size}
 
	# If percent.of.overlap.between.windows is not null, it will override the.distance.between.each.window
	if(!is.null(percent.of.overlap.between.two.windows))
		{
			the.distance.between.each.window <- window.size * (1-percent.of.overlap.between.two.windows)
		}
 
 
 
	# loading zoo
	if(!require(zoo))
	{
		print("zoo is not installed - please install it.")
		install.packages("zoo")
	}
 
 
	if(is.null(X)) {X <- index(Y)} # if we don't have any X, then Y must be ordered, in which case, we can use the indexes of Y as X.
 
	# creating our new X and Y
	zoo.Y <- zoo(x = Y, order.by = X)
	#zoo.X <- attributes(zoo.Y)$index
 
	new.Y <- rollapply(zoo.Y, width = window.size,
								FUN = window.function,
								by = the.distance.between.each.window,
								align = window.alignment)
	new.X <- attributes(new.Y)$index
	new.Y.loess <- loess(new.Y~new.X, family = "sym",...)$fitted
 
	return(list(y = new.Y, x = new.X, y.loess = new.Y.loess))
}
















    
    #require(graphics)
    #x <- c(%s)
    # y <- c(%s)
     lo <- loess(y~x,span=%f)# I haven't figured out how to skip the second argument yet. Or I can create a data frame. then add: ,weights)

     #plot(x,y)
    """%(','.join([str(xx) for xx in x]),','.join([str(xx) for xx in y]), kernelwidth) +"""
#     lines(predict(lo), col='red', lwd=2)
# Create a 99-length quantile vector:
#xl <- seq(0.01,.99,.01)
# Or a 100-length vector from x: 

xl <- seq(min(x),max(x), (max(x) - min(x))/%d)"""%(nPts-1)+"""

# Plot result, with data:
#lines(xl, predict(lo,xl), col='red', lwd=2)

fit <- predict(lo,xl)

"""+'\n'.join(["""q%02d <- Quantile.loess(y,x,the.quant=%.2f)
"""%(qq*100.0,qq) for qq in qBands])+"""
""")
#q75 <- Quantile.loess(y,x,the.quant=.75)
#q25 <- Quantile.loess(y,x,the.quant=.25)
#""")



    fit1=array(robjects.globalenv["fit"])
    xl=array(robjects.globalenv["xl"])
    #q25=array(robjects.globalenv["q25"])

    # It seems q25,q75 (or whatever qBands were calculated) contain various things.
    qFits={}
    for qq in qBands:
        qname='q%02d'%(qq*100.0)
        qx,qy,qyfit=list(robjects.globalenv[qname].rx2('x')),list(robjects.globalenv[qname].rx2('y')),list(robjects.globalenv[qname].rx2('y.loess'))
        qFits[qq]={'x':qx,'y':qy,'yfit':qyfit}
        assert qx==qFits[qBands[0]]['x']
        #q25x,q25y,q25fit=list(robjects.globalenv['q25'].rx2('x')),list(robjects.globalenv['q25'].rx2('y')),list(robjects.globalenv['q25'].rx2('y.loess'))
        #q75x,q75y,q75fit=list(robjects.globalenv['q75'].rx2('x')),list(robjects.globalenv['q75'].rx2('y')),list(robjects.globalenv['q75'].rx2('y.loess'))
    #assert q25x==q75x

    #figure(22)
    #plot(q25x,q25y,q75x,q75y,q25x,q25fit,q75x,q75fit)
    print "Sorry -- Plotting seems not updated for new variables dict. Skipping it"
    if 0 and plotTitle and .25 in qBands and .75 in qBands: # Since I haven't generalised to other qBands yet
        figure(23)
        clf()
        #
        if 0:
            plot(x,y,'r.')
        plot(q25x,q25fit,'m',q75x,q75fit,'m')
        if 0:
            plot(q25x,q25y,'g',q75x,q75y,'g')

        # Add envelope:
        #@xs, ys = plt.fill_between(q25x, q25fit,q75fit)
        envelopePatch=plt.fill_between(q25x, q25fit,q75fit,facecolor='green',alpha=.5,)#,linewidth=0,label=patchLabel)# edgecolor=None, does not work!! So use line
        plot(xl,fit1,'b--',label='loess fit')

        # Show whatever the other thing that Quantile.loess is returning:
        #plot(q25x,q25y,q75x,q75y,q25x,q25fit,q75x,q75fit)
        title(plotTitle)
        plt.show()

    if OLDFORMAT: # Retired April 2011, ie for gallup compatibility only.
        return(
        dict(xfit=xl,yfit=fit1,
             q25=dict(x=q25x,y=q25y,fit=q25fit),
             q75=dict(x=q75x,y=q75y,fit=q75fit)
               )
            )
    else:
        outDict=dict(xfit=xl,yfit=fit1,qFits=qFits)
        if qBands:
            outDict['xq']=qFits[qBands[0]]['x']
        for qq in qBands:
            outDict['y%02d'%(qq*100)]=qFits[qq]['yfit']
        return(outDict)

    """
 print(lm_D9.rclass)
[1] "lm"
Here the resulting object is a list structure, as either inspecting the data structure or reading the R man pages for lm would tell us. Checking its element names is then trivial:

>>> print(lm_D9.names)
 [1] "coefficients"  "residuals"     "effects"       "rank"
 [5] "fitted.values" "assign"        "qr"            "df.residual"
 [9] "contrasts"     "xlevels"       "call"          "terms"
[13] "model"
And so is extracting a particular element:

>>> print(lm_D9.rx2('coefficients'))
(Intercept)    groupTrt
      5.032      -0.371
or

>>> print(lm_D9.rx('coefficients'))

"""

    robjects.r('''
       f <- function(x,y, verbose=FALSE) {
                if (verbose) {
                    cat("I am calling f().\n")
                }
                2 * pi * r
            }
            f(3)
    ''')


    """# aha!! A quanitle loess will give me what I want for sigmas!! just show an envelope of interquartile range!!
 # fitting the Quantile LOESS
source("http://www.r-statistics.com/wp-content/uploads/2010/04/Quantile.loess_.r.txt")
QL <- Quantile.loess(Y = Ozone.2, X = Temp.2, 
							the.quant = .95,
							window.size = 10,
							window.alignment = c("center"))
points(QL$y.loess ~ QL$x, type = "l", col = "green")
 
legend("topleft",legend = c("95% Quantile regression", "95% Quantile LOESS"), fill = c("red","green"))

"""

def linearColormapLookup(cmap,zs):
    import numpy as np 
    import matplotlib.cm as cm 
    def make_N_colors(cmap_name, N): 
        cmap = cm.get_cmap(cmap_name, N) 
        return cmap(np.arange(N))[:,:-1]
    cmapLU=make_N_colors('jet',100)
    from scipy import interpolate
    return(interpolate.interp1d(np.linspace(min(zs),max(zs),num=100),array(cmapLU).T))

# oh shit... I already had that:
def getIndexedColormap(cmap_name,N):
    """
        This will return a sequence of RGBA values, actually an Nx4 ndarray.  I 
    don't think the inclusion of the 4th column hurts anything, but 
    obviously you can use indexing to remove it if you want to.  The last 
    line in the function would change to ...(as below)
    """
    import numpy as np 
    import matplotlib.cm as cm 
    cmap = cm.get_cmap(cmap_name, N) 
    return cmap(np.arange(N))[:,:-1] 


    
def assignColormapEvenly(cmap,zs,asDict=False,missing=[1,1,1]):#None):#
    # Stupid bloody basemap/pylab can't colormap grayscale fill colours?!
    """
Nice: Use a colormap, supplied rather explicitly (tough to get in pylab!?), to evenly cover the given range of data. In default mode, returns a lookup table of values to RGB colour list. 
(Is "floor" correct?)

See also (much nicer?) def plot_biLinearColorscale(rgbBotMidTop,datavals) in cpblUtilitiesMath

yuck. this should instead return an function, using interp. Now it does!

Also, matplotlib.cm has lots of utility functions that I'm not using!!


What? What does evenly mean?? Can I just look at the end points and spread the values out linearly? Or was this supposed to spread them out evenly across the colours? Ahh... I think the latter. So I should rever htis function to a much older version and make a new linear one.
    """
    import pylab
    szs=deepcopy([zz for zz in zs if not isnan(zz)])
    szs.sort()

    assert asDict
    assert cmap
    # Ie (above): revert this to old version, or retire it...
    
    if cmap is None:
        fooo
        cmap=tonumeric([LL.strip().split('\t') for LL in open('mcool.tsv','rt').readlines()])[::16]
        from pylab import jet
        cmap=jet()
        #colormap(jet)

    
    ccs=[cmap[int(floor(iz*len(cmap)*1.0/len(szs)))] for iz in range(len(szs))]
    if asDict:#deprecatd:
        return(dict(zip(szs+[pylab.nan],ccs+[missing])))
    else: # Return a lookup function!!
        from scipy import interpolate
        return(interpolate.interp1d(array(szs).T,array(ccs).T)) # A lookup function for z values!

def plot_biLinearColorscale(rgbBotMidTop,datavals):
    """
    Specify the bottom, middle, and top RGB values of a colormap. Specify the linear extremes of the data.
    So this should produce a colorbar with the given map and tickmarks for the given data extremes, mapped linearly to the colormap.

I also have an old def assignColormapEvenly(cmap,zs,asDict=True,missing=[1,1,1]) in mapMaster
Also, matplotlib.cm has lots of utility functions that I'm not using!!

    """
    if not rgbBotMidTop: # Offer a default example: green red
        rgbBotMidTop=[[0.5,0.0,0.0],[1.0,1.0,.9],[0.0,0.5,0.0]]
    rgbB,rgbM,rgbT=rgbBotMidTop #=[[rgb1],[],[]]
    """
    cdict = {'red':((0.0,  .5, .5),
               (0.5,  1.0, 1.0),
               (1.0,  0.0, 0.0)),

     'green': ((0.0,  0.0, 0.0),
               (0.5, 1.0, 1.0),
               (1.0,  0.5, 0.5)),

     'blue':  ((0.0,  0.0, 0.0),
               (0.5,  .9, .9),
               (1.0,  0.0, 0.0))}
      """
    cdict = {'red':((0.0,  rgbB[0],rgbB[0]),
               (0.5,  rgbM[0],rgbM[0]),
               (1.0,  rgbT[0],rgbT[0])),

     'green': ((0.0,  rgbB[1],rgbB[1]),
               (0.5,  rgbM[1],rgbM[1]),
               (1.0,  rgbT[1],rgbT[1])),

     'blue': ((0.0,  rgbB[2],rgbB[2]),
               (0.5,  rgbM[2],rgbM[2]),
               (1.0,  rgbT[2],rgbT[2]))         }
    import numpy as np
    #import pylab as plt
    from pylab import *
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    from matplotlib.colors import LinearSegmentedColormap
    greenred= LinearSegmentedColormap('TwoLinear', cdict)

    plt.register_cmap(name='TwoLinear', data=cdict) # optional lut kwarg

    fig=plt.figure(1)
    plt.clf()
    #cnorm = mpl.colors.Normalize(vmin=min(datavals),vmax=max(datavals))
    minD,maxD=min(datavals),max(datavals)
    hi=plt.imshow([[minD,maxD],[maxD,minD]], interpolation='nearest',cmap='TwoLinear')
    hax=gca()
    #plt.colormap('GreenRed')
    setp(hax,'visible',False)
    #cb1 = mpl.colorbar.ColorbarBase(mpl.colorbar.make_axes(hax,pad=0)[0], cmap=greenred,norm=cnorm,orientation='vertical',format='%d')#,ticks=range(len(allCohorts)))#uniqu
    hcb=plt.colorbar(aspect=6) #norm=cnorm,

    # Also return a lookup function that can be used with interpolation to give colour for any country value:
    import scipy.interpolate as sp
    print  [minD,1.0/2.0*sum(minD+maxD),maxD]
    print [array(rgbB),array(rgbM),array(rgbT)]
    return(fig,sp.interpolate.interp1d(        [minD,1.0/2.0*sum(minD+maxD),maxD],        [array(rgbB),array(rgbM),array(rgbT)], axis=0) )


