Context Navigation

← Previous Changeset
Next Changeset →

Changeset 20086

Timestamp:

Oct 12, 2008, 11:34:23 PM (18 years ago)

Author:

Sebastian Jester

Message:

Add histogram plotting, a way to specify overplots, and generalize stats_med routine to return a list of ntiles

File:

: 1 edited

branches/sj_ippTests_branch_20080929/ippTests/compIPPphoto.py (modified) (13 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/sj_ippTests_branch_20080929/ippTests/compIPPphoto.py

-              r19924
+              r20086
 #     + magdiff vs. sky
 #     + magdiff vs. skydiff
+#     + magdiff vs. stellar density
+#     + magdiff vs. presence of bright stars
 #     + something about PSF sizes; use Michigan moments for photo -
 #       M_rr_cc_psf is the size of the reconstructed PSF, and m_rr_cc is the size of the object
 #     + magdiff, mag vs. d_x^2+d_y^2
 #     + histogram of chi - but meaningful only for repeat measurements?
+#     + object density - store!
+#
 #   - per-run:
 #     + histograms of everything - recovery fractions, means, medians, quartiles
 #     + Trends with field number, seeing etc.
+# Convention: for scatter, plot second against first; for histogram, plot both histograms
+#
+# XXX: Need mechanism to specify default plotting ranges, since
+# outliers mess up axis ranges. E.g. 0.1 and 0.9 percentiles?
 plotcol_tlist = [
+    ('d_sky','d_mag','scatter')
+    ,('d_x','d_y','scatter')
+    ,('sky_ps1','d_mag','scatter')
+    ,('psfinstmag_sdss','d_mag','scatter')
+    ,('sky_ps1','d_mag','scatter')
+    ,('m_rr_cc_psf','d_mag','scatter')
+    (['d_sky'],['d_mag'],'scatter')
+    ,(['d_x'],['d_y'],'scatter')
+    ,(['sky_ps1'],['d_mag'],'scatter')
+    ,(['psfinstmag_sdss'],['d_mag'],'scatter')
+    ,(['m_rr_cc_psf'],['d_mag'],'scatter')
+    ,(['psfinstmag_sdss'],['psf_inst_mag'],'histogram')
+    ,(['sky_sdss'],['sky_ps1'],'histogram')
+    ,(['x_psf'],['objc_colc'],'histogram')
+    ,(['y_psf'],['objc_rowc'],'histogram')
+    ,(['x_psf','objc_colc'],['y_psf','objc_rowc'],'scatter')
+    ]
 …
     return column_hash,goodrow_hash
 def getOutnameStatsOnefile(matchtable,kind,col1,col2=None,format='eps'):
+def getOutnameStatsOnefile(matchtable,kind,col1_l,col2_l=None,format='eps'):
     import re
     # Construct output filename
     root = re.sub('(\.[sc]mf|\.fits?)$','',matchtable)
     if isNone(col2):
         outname = '%s_%s_%s.%s' % (root,kind,col1,format)
+    if isNone(col2_l):
+        outname = '%s_%s_%s.%s' % (root,kind,col1[0],format)
     else:
+        outname = '%s_%s_%s_%s.%s' % (root,kind,col1,col2,format)
+        if len(col1_l) == 1:
+            outname = '%s_%s_%s_%s.%s' % (root,kind,col1_l[0],col2_l[0],format)
+        else:
+            outname = '%s_%s_%s_%s_%s_%s.%s' % (root,kind,col1_l[0],col2_l[0],col1_l[1],col2_l[1],format)
     return outname
 …
     """Make diagnostic plots for a single table, based on values
     in values_hash"""
+    from numpy import concatenate
+    from sm import cvar,angle
+    # In histograms, only plot core of distribution within these percentiles:
+    histo_min_ntile = 0.03
+    histo_max_ntile = 1-histo_min_ntile
     for troika in plotcol_tlist:
         col1name = troika[0]
         col2name = troika[1]
+        col1name_l = troika[0]
+        col2name_l = troika[1]
         plottype = troika[2]
+        outname = getOutnameStatsOnefile(matchtable,plottype,col1name,col2name,format=format)
+        values1 = values_hash[col1name]
+        goodrows1 = goodrow_hash[col1name]
+        values2 = values_hash[col2name]
+        goodrows2 = goodrow_hash[col2name]
+        # Slice out depth for current filter if necessary
+        if len(values1.shape) > 1:
+            values1 = values1[:,bandindex]
+            goodrows1 = goodrows1[:,bandindex]
+        if len(values2.shape) > 1:
+            values2 = values2[:,bandindex]
+            goodrows2 = goodrows2[:,bandindex]
+        goodrows = goodrows1 & goodrows2
+        # print col1name, col2name, sum(goodrows), sum(values1 > 1e3)
+        # print values1[goodrows & (values1 > 1e3)]
+        outname = getOutnameStatsOnefile(matchtable,plottype,col1name_l,col2name_l,format=format)
         smOpenPlot(outname,format=format)
+        if plottype == 'scatter':
+            smScatterPlot(values1,values2,logical=goodrows,xlab=col1name,ylab=col2name)
+        smClosePlot()
+        firstplot = True
+        for col1name,col2name in zip(col1name_l,col2name_l):
+            values1 = values_hash[col1name]
+            goodrows1 = goodrow_hash[col1name]
+            values2 = values_hash[col2name]
+            goodrows2 = goodrow_hash[col2name]
+            # Slice out depth for current filter if necessary
+            if len(values1.shape) > 1:
+                values1 = values1[:,bandindex]
+                goodrows1 = goodrows1[:,bandindex]
+            if len(values2.shape) > 1:
+                values2 = values2[:,bandindex]
+                goodrows2 = goodrows2[:,bandindex]
+            goodrows = goodrows1 & goodrows2
+            # print col1name, col2name, sum(goodrows), sum(values1 > 1e3)
+            # print values1[goodrows & (values1 > 1e3)]
+            if plottype == 'scatter':
+                if firstplot:
+                    # Avoid plotting outliers
+                    if not re.search('objc',col2name):
+                        [xmin,xmax] = stats_med(values1[goodrows],[histo_min_ntile,histo_max_ntile])
+                        [ymin,ymax] = stats_med(values2[goodrows],[histo_min_ntile,histo_max_ntile])
+                        # print "Huhu", outname, min(values1),max(values2),xmin,xmax
+                        smScatterPlot(values1,values2,logical=goodrows,xlab=col1name,ylab=col2name,\
+                                          # xrange=(xmin,xmax),yrange=(ymin,ymax))
+                                      xrange=None,yrange=None)
+                    else:
+                        smScatterPlot(values1,values2,logical=goodrows,xlab=col1name,ylab=col2name)
+                else:
+                    angle(45)
+                    smScatterPlot(values1,values2,logical=goodrows,append=True)
+                    angle(0)
+            if plottype == 'histogram':
+                Nbins = 20
+                values1 = values1[goodrows1] # This is SDSS column typically
+                values2 = values2[goodrows2]
+                # Avoid plotting outliers
+                if not re.search('objc',col2name):
+                    [minbin,maxbin] = stats_med(concatenate((values1,values2)),\
+                                                    [histo_min_ntile,histo_max_ntile])
+                else:
+                    minbin=None
+                    maxbin=None
+                smHistoPlot(values2,ltype=0,nbins=Nbins,minbin=minbin,maxbin=maxbin,xlab=col2name,ylab="N")
+                smHistoPlot(values1,append=True,minbin=minbin,maxbin=maxbin,nbins=Nbins,\
+                            ltype=2)
+            firstplot = False
+    smClosePlot()
 def valuesKeysSortedByKeys(hash):
 …
         header.update('HIERARCH %s'%(label),value)
+def stats_med(array):
+def stats_med(arr,ntiles=[0.25,0.5,0.75]):
+    """Return a list of ntiles of array arr"""
     from numpy import nan,sort
+    sortarray = sort(array,kind='mergesort')
+    l = len(array)
+    l = len(arr)
     if l == 0:
         return nan,nan,nan
+    lowerquartile = sortarray[0.25*l]
+    median = sortarray[0.5*l]
+    upperquartile= sortarray[0.75*l]
+    return lowerquartile,median,upperquartile
+    sortarr = sort(arr,kind='mergesort')
+    outl = []
+    for ntile in ntiles:
+        outl.append(sortarr[ntile*(l-1)])
+    return outl
 def computeStatistics(tablename,copyfields_list = ['RUN','RERUN','CAMCOL','FIELD','FILTER','FWHM_X','FWHM_Y']):
 …
     by their header names"""
     import pyfits,re,operator
     from  numpy import sqrt,log10,log
+    from  numpy import sqrt,log10,log,array,isfinite
     label_l = ['mean','rms','median','lowerquartile','upperquartile']
-    # Hash of output header keyword names pointing to paired lists of
-    # columns that are to be subtracted from each other for statistics
-    # to be generated.
     filterID={'u':0,'g':1,'r':2,'i':3,'z':4}
-    # For this to work, table needs to be called match_r_... for r-band
     infile_handle = pyfits.open(tablename,mode='update')
 …
             goodrow_hash[column.lower()] = goodValBool(table_data.field(column))
             colval_hash[column.lower()] = array(table_data.field(column))
+    # These are just the columns; need to get a slice with the correct array index later
+    # Maybe I want to keep track of these number of "good" rows?
+    # XXX: Count number of objects in a) SDSS, b) PS1, c) both, by
+    #  counting number of a) entries > 0 in 'id', b) non-nan entries
+    #  in IPP_IDET, c) both (Later pass the following as parameters to
+    #  be read from a config file)
+    N_SDSS_col = 'id'
+    N_SDSS_outcolname = 'N_SDSS'
+    N_PS1_col = 'IPP_IDET'
+    N_PS1_outcolname = 'N_PS1'
+    N_both_outcolname = 'N_both'
+    N_either_outcolname = 'N_either'
+    has_sdss = array(table_data.field(N_SDSS_col)) > 0
+    has_PS1 = isfinite(array(table_data.field(N_PS1_col)))
+    outhash[N_SDSS_outcolname] = sum(has_sdss)
+    outhash[N_PS1_outcolname] = sum(has_PS1)
+    outhash[N_both_outcolname] = sum(has_sdss & has_PS1)
+    outhash[N_either_outcolname] = len(has_PS1)
+    # Hash of output header keyword names pointing to paired lists of
+    # columns that are to be subtracted from each other for statistics
+    # to be generated. This lists just the columns, slicing out the
+    # correct depth from SDSS 5-filter arrays will be done later.
     colname_hash = {
         'd_x':['colc','X_PSF']
 …
         ,'d_magerr':['psfinstmagerr_sdss','PSF_INST_MAG_SIG']
+        }
+    # colname_hash should probably be passed as a parameter so it can
+    # be read from file.
     ismag = re.compile('mag')
     iscounts = re.compile('counts')
     outcoll = []
+    # outcoll = []
 …
         colval_hash[outcol] = SDSScol - PS1col
         avg = delta.mean()
         lowq,med,upq = stats_med(delta)
+        [lowq,med,upq] = stats_med(delta)
         rms = sqrt(delta.var())
         # Save to fits columns/header
 …
         for label,value in zip(outtablabel,[avg,rms,med,lowq,upq]):
             outhash[label]=value
+    newtab = pyfits.new_table(table.columns+pyfits.ColDefs(outcoll),header=h)
+    newprimhdu = pyfits.PrimaryHDU(header=infile_handle[0].header)
+    # #t lines were for writing the new columns which I'm now creating in the match script
+    #t newtab = pyfits.new_table(table.columns+pyfits.ColDefs(outcoll),header=h)
+    #t newprimhdu = pyfits.PrimaryHDU(header=infile_handle[0].header)
     infile_handle.close()
     writeTable(tablename,newprimhdu,newtab)
+    #t writeTable(tablename,newprimhdu,newtab)
     return outhash,colval_hash,goodrow_hash
 …
                     xlab=None,ylab=None,xrange=None,yrange=None,\
                     box1=None,box2=None,box3=None,box4=None,\
                     append=False):
+                    ltype=0,append=False):
     """Plot a histogram, intelligently deriving bins from the given
     parameters if they are given intelligently.  Otherwise, silently
     do nothing."""
     import sm
+    from numpy import histogram
     if isNone(minbin):
         minbin = min(vec)
 …
     bincenters = leftbinedges + 0.5*(leftbinedges[1]-leftbinedges[0])
+    sm.ltype(ltype)
     if not append:
         logical = None
         smSetup(bincenters,histo,logical,xrange,yrange,xlab,ylab,box1,box2,box3,box4)
     sm.histogram(bincenters,histo)
+    sm.ltype(0)
+    return minbin,maxbin
 def smLinePlot(x,y,logical=None,ltype=0,xlab=None,ylab=None,xrange=None,yrange=None,\
 …
         sm.ltype(ltype)
         sm.connect(x,y,logical)
+        sm.ltype(0)
     except:
         pass
 …
     if isNone(yrange):
         yrange = y
+    sm.limits(x,y)
+    # print "Setting limits to ",xrange,yrange
+    sm.limits(xrange,yrange)
     smBox(box1,box2,box3,box4)
     if not isNone(xlab):

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 20086

Legend:

branches/sj_ippTests_branch_20080929/ippTests/compIPPphoto.py

Download in other formats: