IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Apr 5, 2011, 3:02:30 PM (15 years ago)
Author:
rhenders
Message:

Added methods to report NULL columns as well replace NULLs with a substitute (necessary for crazy PSPS -999 NULL thing); changed constructor arguments

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/ippToPsps/jython/batch.py

    r31117 r31178  
    44import datetime
    55import re
     6import sys
     7
     8from gpc1 import Gpc1
    69
    710from java.lang import *
    811from java.sql import *
    9 
    10 
     12from xml.etree.ElementTree import ElementTree
     13
     14
     15'''
     16Base class of all batch types.
     17'''
    1118class Batch(object):
    1219
     
    1522    '''
    1623    Constructor
    17     '''
    18     def __init__(self, batchType, inputFitsPath, outputFitsPath, dbHost, dbName, dbUser, dbPass, survey=""):
     24
     25    >>> batch = Batch(1,2,3,4,5,6,7)
     26    >>> print batch.pspsVoTableFilePath
     27    "../config/2/tables.vot"
     28    '''
     29    def __init__(self, batchType, inputFitsPath, survey=""):
    1930
    2031        # set up class variables
    2132        self.pspsVoTableFilePath = "../config/" + batchType + "/tables.vot"
    2233        self.inputFitsPath = inputFitsPath
    23         self.outputFitsPath = outputFitsPath
    24         self.dbHost = dbHost
    25         self.dbName = dbName
    26         self.dbUser = dbUser
    27         self.dbPass = dbPass
    2834        self.survey = survey
    2935
    30         # set up JDBC connection
    31         self.url = "jdbc:mysql://"+self.dbHost+"/"+self.dbName+"?user="+self.dbUser+"&password="+self.dbPass
    32         self.con = DriverManager.getConnection(self.url)
    33         self.stmt = self.con.createStatement()
     36        # open config
     37        doc = ElementTree(file="config.xml")
     38
     39        # set up JDBC connection to local Db
     40        dbName = doc.find("localdatabase/name").text
     41        dbHost = doc.find("localdatabase/host").text
     42        dbUser = doc.find("localdatabase/user").text
     43        dbPass = doc.find("localdatabase/password").text
     44        self.localUrl = "jdbc:mysql://"+dbHost+"/"+dbName+"?user="+dbUser+"&password="+dbPass
     45        self.localCon = DriverManager.getConnection(self.localUrl)
     46        self.localStmt = self.localCon.createStatement()
     47
     48        # create Gpc1 object
     49        self.gpc1 = Gpc1()
     50
     51        # set up JDBC connection to gpc1 Db
     52        dbName = doc.find("gpc1database/name").text
     53        dbHost = doc.find("gpc1database/host").text
     54        dbUser = doc.find("gpc1database/user").text
     55        dbPass = doc.find("gpc1database/password").text
     56        self.gpc1Url = "jdbc:mysql://"+dbHost+"/"+dbName+"?user="+dbUser+"&password="+dbPass
     57        self.gpc1Con = DriverManager.getConnection(self.localUrl)
     58        self.gpc1Stmt = self.localCon.createStatement()
    3459
    3560        # get survey ID from init table
    3661        sql = "SELECT surveyID from Survey WHERE name = '" + survey + "'"
    3762        try:
    38             rs = self.stmt.executeQuery(sql) 
     63            rs = self.localStmt.executeQuery(sql) 
    3964            rs.first()
    4065            self.surveyID = rs.getInt(1)
     
    5681
    5782        self.log("Batch destructor")
    58         self.stmt.close()
    59         self.con.close()
     83        self.localStmt.close()
     84        self.localCon.close()
     85        self.gpc1Stmt.close()
     86        self.gpc1Con.close()
    6087
    6188    '''
     
    7299
    73100        sql = "UPDATE " + table + "  SET surveyID=%d" % self.surveyID
    74         self.stmt.execute(sql)
     101        self.localStmt.execute(sql)
    75102
    76103    '''
     
    80107
    81108        sql = "UPDATE "+table+" AS a, Filter AS b SET a.filterID=b.filterID WHERE b.filterType = '" + self.filter + "'"
    82         self.stmt.execute(sql)
     109        self.localStmt.execute(sql)
    83110
    84111    '''
     
    130157         for table in self.pspsTables:
    131158             self.log("Creating PSPS table: " + table.name)
    132              table.write(self.url + '#' + table.name)
     159             table.write(self.localUrl + '#' + table.name)
    133160
    134161         self.indexPspsTables()
     
    150177        sql = "CREATE INDEX "+table+"_index ON "+table+" ("+column+")"
    151178        try:
    152             self.stmt.execute(sql)
     179            self.localStmt.execute(sql)
    153180        except:
    154181            self.log("Index already in place on '" + column + "' for table '" + table + "'")
     
    178205          table = stilts.tpipe(table, cmd='explodeall')
    179206          try:
    180               table.write(self.url + '#' + table.name)
     207              table.write(self.localUrl + '#' + table.name)
    181208          except:
    182209              self.log("ERROR problem writing table '" + table.name + "' to the database")
     
    198225        self.log("    Selecting database tables")
    199226        for table in self.pspsTables:
    200            _table = stilts.tread(self.url + '#SELECT * FROM ' + table.name)
     227           _table = stilts.tread(self.localUrl + '#SELECT * FROM ' + table.name)
    201228           _table = stilts.tpipe(_table, cmd='tablename ' + table.name)
    202229           _tables.append(_table)
    203230
    204         self.log("    Writing to FITS file " + self.outputFitsPath)
     231        self.log("    Writing to FITS file '" + self.outputFitsPath + "'...")
    205232        stilts.twrites(_tables, self.outputFitsPath, fmt='fits')
    206 
     233        self.log("    ...done")
     234
     235    '''
     236    Returns a list of column names for this table
     237    '''
     238    def getColumnNames(self, tableName):
     239
     240       sql = "SHOW COLUMNS FROM " + tableName
     241       rs = self.localStmt.executeQuery(sql)
     242       columns = []
     243       while (rs.next()): columns.append(rs.getString(1))
     244       rs.close()
     245       
     246       return columns
     247
     248    '''
     249    Replaces all NULL values in the provided table with the prvoded substitute
     250    '''
     251    def replaceNulls(self, tableName, sub):
     252
     253       # get list of columns
     254       columns = self.getColumnNames(tableName)
     255
     256       # now loop through all columns and replace all NULLs with sub
     257       for column in columns:
     258         
     259          sql = "UPDATE " + tableName + " SET " + column + " = " + sub + " WHERE " + column + " IS NULL"
     260          self.localStmt.execute(sql)
     261
     262
     263    '''
     264    Searches a table and reports the columns that are either partially or completely populated with NULLs
     265    '''
     266    def reportNulls(self, tableName, showPartials):
     267
     268       # first, count rows
     269       sql = "SELECT COUNT(*) FROM " + tableName
     270       rs = self.localStmt.executeQuery(sql)
     271       rs.first()
     272       numRows = rs.getInt(1)
     273
     274       # get list of columns
     275       columns = self.getColumnNames(tableName)
     276
     277       print "+----------------------+---------------+"
     278       print "|  %25s           |" % tableName
     279       print "+----------------------+---------------+"
     280
     281       # now see which columns are full of NULLS, with are partially NULL
     282       for column in columns:
     283         
     284          sql = "SELECT COUNT(*) FROM " + tableName + " WHERE " + column + " IS NULL"
     285          rs = self.localStmt.executeQuery(sql)
     286          rs.first()
     287          if rs.getInt(1) == numRows:
     288              print "| %20s | all NULL      |" % column
     289          elif showPartials and rs.getInt(1) > 0:
     290              print "| %20s | partial NULL  |" % column
     291       rs.close()
     292       print "+----------------------+---------------+"
     293
     294
     295    '''
     296    Searches all PSPS tables and reports the columns that are either partially or completely populated with NULLs
     297    '''
     298    def reportNullsInAllPspsTables(self, showPartials):
     299
     300        for table in self.pspsTables:
     301            self.reportNulls(table.name, showPartials)
     302
     303    '''
     304    Searches all PSPS tables and replaces all NULLs with the provided substitute
     305    '''
     306    def replaceAllPspsNulls(self, sub):
     307
     308        self.log("Replacing all NULL values in PSPS tables with '" + sub + "'...")
     309        for table in self.pspsTables:
     310            self.replaceNulls(table.name, sub)
     311        self.log("...done")
    207312
    208313    '''
Note: See TracChangeset for help on using the changeset viewer.