IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 20086


Ignore:
Timestamp:
Oct 12, 2008, 11:34:23 PM (18 years ago)
Author:
Sebastian Jester
Message:

Add histogram plotting, a way to specify overplots, and generalize stats_med routine to return a list of ntiles

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/sj_ippTests_branch_20080929/ippTests/compIPPphoto.py

    r19924 r20086  
    2727#     + magdiff vs. sky
    2828#     + magdiff vs. skydiff
     29#     + magdiff vs. stellar density
     30#     + magdiff vs. presence of bright stars
    2931#     + something about PSF sizes; use Michigan moments for photo -
    3032#       M_rr_cc_psf is the size of the reconstructed PSF, and m_rr_cc is the size of the object
    3133#     + magdiff, mag vs. d_x^2+d_y^2
    3234#     + histogram of chi - but meaningful only for repeat measurements?
     35#     + object density - store!
     36#
    3337#   - per-run:
    3438#     + histograms of everything - recovery fractions, means, medians, quartiles
    3539#     + Trends with field number, seeing etc.
    3640
     41# Convention: for scatter, plot second against first; for histogram, plot both histograms
     42#
     43# XXX: Need mechanism to specify default plotting ranges, since
     44# outliers mess up axis ranges. E.g. 0.1 and 0.9 percentiles?
    3745plotcol_tlist = [
    38     ('d_sky','d_mag','scatter')
    39     ,('d_x','d_y','scatter')
    40     ,('sky_ps1','d_mag','scatter')
    41     ,('psfinstmag_sdss','d_mag','scatter')
    42     ,('sky_ps1','d_mag','scatter')
    43     ,('m_rr_cc_psf','d_mag','scatter')
     46    (['d_sky'],['d_mag'],'scatter')
     47    ,(['d_x'],['d_y'],'scatter')
     48    ,(['sky_ps1'],['d_mag'],'scatter')
     49    ,(['psfinstmag_sdss'],['d_mag'],'scatter')
     50    ,(['m_rr_cc_psf'],['d_mag'],'scatter')
     51    ,(['psfinstmag_sdss'],['psf_inst_mag'],'histogram')
     52    ,(['sky_sdss'],['sky_ps1'],'histogram')
     53    ,(['x_psf'],['objc_colc'],'histogram')
     54    ,(['y_psf'],['objc_rowc'],'histogram')
     55    ,(['x_psf','objc_colc'],['y_psf','objc_rowc'],'scatter')
    4456    ]
    4557
     
    91103    return column_hash,goodrow_hash
    92104
    93 def getOutnameStatsOnefile(matchtable,kind,col1,col2=None,format='eps'):
     105def getOutnameStatsOnefile(matchtable,kind,col1_l,col2_l=None,format='eps'):
    94106    import re
    95107    # Construct output filename
    96108    root = re.sub('(\.[sc]mf|\.fits?)$','',matchtable)
    97     if isNone(col2):
    98         outname = '%s_%s_%s.%s' % (root,kind,col1,format)
     109    if isNone(col2_l):
     110        outname = '%s_%s_%s.%s' % (root,kind,col1[0],format)
    99111    else:
    100         outname = '%s_%s_%s_%s.%s' % (root,kind,col1,col2,format)
     112        if len(col1_l) == 1:
     113            outname = '%s_%s_%s_%s.%s' % (root,kind,col1_l[0],col2_l[0],format)
     114        else:
     115            outname = '%s_%s_%s_%s_%s_%s.%s' % (root,kind,col1_l[0],col2_l[0],col1_l[1],col2_l[1],format)
    101116    return outname
    102117   
     
    104119    """Make diagnostic plots for a single table, based on values
    105120    in values_hash"""
     121    from numpy import concatenate
     122    from sm import cvar,angle
     123    # In histograms, only plot core of distribution within these percentiles:
     124    histo_min_ntile = 0.03
     125    histo_max_ntile = 1-histo_min_ntile
    106126    for troika in plotcol_tlist:
    107         col1name = troika[0]
    108         col2name = troika[1]
     127        col1name_l = troika[0]
     128        col2name_l = troika[1]
    109129        plottype = troika[2]
    110         outname = getOutnameStatsOnefile(matchtable,plottype,col1name,col2name,format=format)
    111         values1 = values_hash[col1name]
    112         goodrows1 = goodrow_hash[col1name]
    113         values2 = values_hash[col2name]
    114         goodrows2 = goodrow_hash[col2name]
    115         # Slice out depth for current filter if necessary
    116         if len(values1.shape) > 1:
    117             values1 = values1[:,bandindex]
    118             goodrows1 = goodrows1[:,bandindex]
    119         if len(values2.shape) > 1:
    120             values2 = values2[:,bandindex]
    121             goodrows2 = goodrows2[:,bandindex]
    122         goodrows = goodrows1 & goodrows2
    123         # print col1name, col2name, sum(goodrows), sum(values1 > 1e3)
    124         # print values1[goodrows & (values1 > 1e3)]
     130        outname = getOutnameStatsOnefile(matchtable,plottype,col1name_l,col2name_l,format=format)
    125131        smOpenPlot(outname,format=format)
    126         if plottype == 'scatter':
    127             smScatterPlot(values1,values2,logical=goodrows,xlab=col1name,ylab=col2name)
    128         smClosePlot()
     132        firstplot = True
     133        for col1name,col2name in zip(col1name_l,col2name_l):
     134            values1 = values_hash[col1name]
     135            goodrows1 = goodrow_hash[col1name]
     136            values2 = values_hash[col2name]
     137            goodrows2 = goodrow_hash[col2name]
     138            # Slice out depth for current filter if necessary
     139            if len(values1.shape) > 1:
     140                values1 = values1[:,bandindex]
     141                goodrows1 = goodrows1[:,bandindex]
     142            if len(values2.shape) > 1:
     143                values2 = values2[:,bandindex]
     144                goodrows2 = goodrows2[:,bandindex]
     145            goodrows = goodrows1 & goodrows2
     146            # print col1name, col2name, sum(goodrows), sum(values1 > 1e3)
     147            # print values1[goodrows & (values1 > 1e3)]
     148            if plottype == 'scatter':
     149                if firstplot:
     150                    # Avoid plotting outliers
     151                    if not re.search('objc',col2name):
     152                        [xmin,xmax] = stats_med(values1[goodrows],[histo_min_ntile,histo_max_ntile])
     153                        [ymin,ymax] = stats_med(values2[goodrows],[histo_min_ntile,histo_max_ntile])
     154                        # print "Huhu", outname, min(values1),max(values2),xmin,xmax
     155                        smScatterPlot(values1,values2,logical=goodrows,xlab=col1name,ylab=col2name,\
     156                                          # xrange=(xmin,xmax),yrange=(ymin,ymax))
     157                                      xrange=None,yrange=None)
     158                    else:
     159                        smScatterPlot(values1,values2,logical=goodrows,xlab=col1name,ylab=col2name)
     160                else:
     161                    angle(45)
     162                    smScatterPlot(values1,values2,logical=goodrows,append=True)
     163                    angle(0)
     164            if plottype == 'histogram':
     165                Nbins = 20
     166                values1 = values1[goodrows1] # This is SDSS column typically
     167                values2 = values2[goodrows2]
     168                # Avoid plotting outliers
     169                if not re.search('objc',col2name):
     170                    [minbin,maxbin] = stats_med(concatenate((values1,values2)),\
     171                                                    [histo_min_ntile,histo_max_ntile])
     172                else:
     173                    minbin=None
     174                    maxbin=None
     175                smHistoPlot(values2,ltype=0,nbins=Nbins,minbin=minbin,maxbin=maxbin,xlab=col2name,ylab="N")
     176                smHistoPlot(values1,append=True,minbin=minbin,maxbin=maxbin,nbins=Nbins,\
     177                            ltype=2)
     178            firstplot = False
     179    smClosePlot()
    129180
    130181def valuesKeysSortedByKeys(hash):
     
    187238        header.update('HIERARCH %s'%(label),value)
    188239   
    189 def stats_med(array):
     240def stats_med(arr,ntiles=[0.25,0.5,0.75]):
     241    """Return a list of ntiles of array arr"""
    190242    from numpy import nan,sort
    191     sortarray = sort(array,kind='mergesort')
    192     l = len(array)
     243    l = len(arr)
    193244    if l == 0:
    194245        return nan,nan,nan
    195     lowerquartile = sortarray[0.25*l]
    196     median = sortarray[0.5*l]
    197     upperquartile= sortarray[0.75*l]
    198     return lowerquartile,median,upperquartile
     246    sortarr = sort(arr,kind='mergesort')
     247    outl = []
     248    for ntile in ntiles:
     249        outl.append(sortarr[ntile*(l-1)])
     250    return outl
    199251
    200252def computeStatistics(tablename,copyfields_list = ['RUN','RERUN','CAMCOL','FIELD','FILTER','FWHM_X','FWHM_Y']):
     
    205257    by their header names"""
    206258    import pyfits,re,operator
    207     from  numpy import sqrt,log10,log
     259    from  numpy import sqrt,log10,log,array,isfinite
    208260   
    209261    label_l = ['mean','rms','median','lowerquartile','upperquartile']
    210     # Hash of output header keyword names pointing to paired lists of
    211     # columns that are to be subtracted from each other for statistics
    212     # to be generated.
    213262    filterID={'u':0,'g':1,'r':2,'i':3,'z':4}
    214     # For this to work, table needs to be called match_r_... for r-band
    215263   
    216264    infile_handle = pyfits.open(tablename,mode='update')
     
    242290            goodrow_hash[column.lower()] = goodValBool(table_data.field(column))
    243291            colval_hash[column.lower()] = array(table_data.field(column))
    244 
    245     # These are just the columns; need to get a slice with the correct array index later
     292    # Maybe I want to keep track of these number of "good" rows?
     293
     294    # XXX: Count number of objects in a) SDSS, b) PS1, c) both, by
     295    #  counting number of a) entries > 0 in 'id', b) non-nan entries
     296    #  in IPP_IDET, c) both (Later pass the following as parameters to
     297    #  be read from a config file)
     298    N_SDSS_col = 'id'
     299    N_SDSS_outcolname = 'N_SDSS'
     300    N_PS1_col = 'IPP_IDET'
     301    N_PS1_outcolname = 'N_PS1'
     302    N_both_outcolname = 'N_both'
     303    N_either_outcolname = 'N_either'
     304   
     305    has_sdss = array(table_data.field(N_SDSS_col)) > 0
     306    has_PS1 = isfinite(array(table_data.field(N_PS1_col)))
     307    outhash[N_SDSS_outcolname] = sum(has_sdss)
     308    outhash[N_PS1_outcolname] = sum(has_PS1)
     309    outhash[N_both_outcolname] = sum(has_sdss & has_PS1)
     310    outhash[N_either_outcolname] = len(has_PS1)
     311   
     312    # Hash of output header keyword names pointing to paired lists of
     313    # columns that are to be subtracted from each other for statistics
     314    # to be generated. This lists just the columns, slicing out the
     315    # correct depth from SDSS 5-filter arrays will be done later.
    246316    colname_hash = {
    247317        'd_x':['colc','X_PSF']
     
    255325        ,'d_magerr':['psfinstmagerr_sdss','PSF_INST_MAG_SIG']
    256326        }
     327    # colname_hash should probably be passed as a parameter so it can
     328    # be read from file.
    257329    ismag = re.compile('mag')
    258330    iscounts = re.compile('counts')
    259     outcoll = []
     331    # outcoll = []
    260332
    261333   
     
    280352        colval_hash[outcol] = SDSScol - PS1col
    281353        avg = delta.mean()
    282         lowq,med,upq = stats_med(delta)
     354        [lowq,med,upq] = stats_med(delta)
    283355        rms = sqrt(delta.var())
    284356        # Save to fits columns/header
     
    289361        for label,value in zip(outtablabel,[avg,rms,med,lowq,upq]):
    290362            outhash[label]=value
    291    
    292     newtab = pyfits.new_table(table.columns+pyfits.ColDefs(outcoll),header=h)
    293     newprimhdu = pyfits.PrimaryHDU(header=infile_handle[0].header)
     363
     364    # #t lines were for writing the new columns which I'm now creating in the match script
     365    #t newtab = pyfits.new_table(table.columns+pyfits.ColDefs(outcoll),header=h)
     366    #t newprimhdu = pyfits.PrimaryHDU(header=infile_handle[0].header)
    294367    infile_handle.close()
    295     writeTable(tablename,newprimhdu,newtab)
     368    #t writeTable(tablename,newprimhdu,newtab)
    296369    return outhash,colval_hash,goodrow_hash
    297370
     
    535608                    xlab=None,ylab=None,xrange=None,yrange=None,\
    536609                    box1=None,box2=None,box3=None,box4=None,\
    537                     append=False):
     610                    ltype=0,append=False):
    538611    """Plot a histogram, intelligently deriving bins from the given
    539612    parameters if they are given intelligently.  Otherwise, silently
    540613    do nothing."""
    541614    import sm
     615    from numpy import histogram
    542616    if isNone(minbin):
    543617        minbin = min(vec)
     
    552626    bincenters = leftbinedges + 0.5*(leftbinedges[1]-leftbinedges[0])
    553627
     628    sm.ltype(ltype)
    554629    if not append:
    555630        logical = None
    556631        smSetup(bincenters,histo,logical,xrange,yrange,xlab,ylab,box1,box2,box3,box4)
    557632    sm.histogram(bincenters,histo)
     633    sm.ltype(0)
     634    return minbin,maxbin
    558635
    559636def smLinePlot(x,y,logical=None,ltype=0,xlab=None,ylab=None,xrange=None,yrange=None,\
     
    571648        sm.ltype(ltype)
    572649        sm.connect(x,y,logical)
     650        sm.ltype(0)
    573651    except:
    574652        pass
     
    602680    if isNone(yrange):
    603681        yrange = y
    604     sm.limits(x,y)
     682    # print "Setting limits to ",xrange,yrange
     683    sm.limits(xrange,yrange)
    605684    smBox(box1,box2,box3,box4)
    606685    if not isNone(xlab):
Note: See TracChangeset for help on using the changeset viewer.