Context Navigation

← Previous Change
Next Change →

batch.py

Timestamp:

May 5, 2011, 10:05:10 AM (15 years ago)

Author:

eugene

Message:

merging updates from trunk

File:

: 1 edited

branches/eam_branches/ipp-20110404/ippToPsps/jython/batch.py (modified) (7 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/eam_branches/ipp-20110404/ippToPsps/jython/batch.py

-              r31117
+              r31439
 import datetime
 import re
+import sys
+import os
+import md5
+import shutil
+import logging
+from subprocess import call, PIPE, Popen
+from datastore import Datastore
+from scratchdb import ScratchDb
+from gpc1db import Gpc1Db
+from ipptopspsdb import IppToPspsDb
 from java.lang import *
 from java.sql import *
+from xml.etree.ElementTree import ElementTree, Element, tostring
+'''
+Base class of all batch types.
+'''
 class Batch(object):
-    driverName="com.mysql.jdbc.Driver"
     '''
     Constructor
+    '''
+    def __init__(self, batchType, inputFitsPath, outputFitsPath, dbHost, dbName, dbUser, dbPass, survey=""):
+    >>> batch = Batch(1,2,3,4,5,6,7)
+    >>> print batch.pspsVoTableFilePath
+    "../config/2/tables.vot"
+    '''
+    def __init__(self, logger, batchType, inputFitsPath="", survey="", useFullTables=False):
+        # set up logging
+        self.logger = logger
+        self.logger.info("-------------------------------------------------------------------------------")
+        self.logger.debug("Batch class constructor")
         # set up class variables
+        self.batchType = batchType;
         self.pspsVoTableFilePath = "../config/" + batchType + "/tables.vot"
         self.inputFitsPath = inputFitsPath
-        self.outputFitsPath = outputFitsPath
-        self.dbHost = dbHost
-        self.dbName = dbName
-        self.dbUser = dbUser
-        self.dbPass = dbPass
         self.survey = survey
+        # set up JDBC connection
+        self.url = "jdbc:mysql://"+self.dbHost+"/"+self.dbName+"?user="+self.dbUser+"&password="+self.dbPass
+        self.con = DriverManager.getConnection(self.url)
+        self.stmt = self.con.createStatement()
+        # get survey ID from init table
+        sql = "SELECT surveyID from Survey WHERE name = '" + survey + "'"
+        try:
+            rs = self.stmt.executeQuery(sql)
+            rs.first()
+            self.surveyID = rs.getInt(1)
+        except:
+            self.log("No survey ID found for this survey: '" + survey + "'")
+        self.useFullTables = useFullTables
+        # TODO
+        self.tablesToExport = []
+        # open config
+        doc = ElementTree(file="config.xml")
+        # create Gpc1Db object
+        self.gpc1Db = Gpc1Db(self.logger)
+        self.ippToPspsDb = IppToPspsDb(logger)
+        self.scratchDb = ScratchDb(logger, self.useFullTables)
+        if self.survey != "":
+            self.surveyID = self.scratchDb.getSurveyID(self.survey)
+            # get dvo info from config
+            dvoName = doc.find("dvo_" + self.survey + "/name").text
+            self.dvoLocation = doc.find("dvo_" + self.survey + "/location").text
+        else:
+            dvoName = ""
+            self.dvoLocation = ""
             self.surveyID = -1;
+        # get datastore info from config
+        self.datastore = Datastore(self.logger)
+        # create a new batch
+        self.batchID = self.ippToPspsDb.createNewBatch(
+                self.getPspsBatchType(),
+                survey,
+                dvoName,
+                self.datastore.product)
+        # get local storage location from config
+        self.batchName = "B%08d" % self.batchID
+        self.subDir = doc.find("localOutPath").text + "/" + self.getPspsBatchType() + "/" + dvoName
+        self.localOutPath = self.subDir + "/" + self.batchName
+        if not os.path.exists(self.localOutPath): os.makedirs(self.localOutPath)
         # store today's date
         now = datetime.datetime.now();
 …
         if self.inputFitsPath != "":
+            self.parseFitsHeader()
+            file = open(self.inputFitsPath)
+            self.header = self.parseFitsHeader(file)
+            self.logger.info("Read primary and found " + str(len(self.header)) + " header cards")
+            # TODO close file?
+        # create DVO tables if accessing DVO directly
+        if not self.useFullTables: self.scratchDb.createDvoTables()
     '''
 …
     def __del__(self):
+        self.log("Batch destructor")
+        self.stmt.close()
+        self.con.close()
+    '''
+    Prints a log message with the current time
+    '''
+    def log(self, msg):
+        print datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " | " + msg
+    '''
+    Updates a table with surveyID
+    '''
+    def updateSurveyID(self, table):
+        sql = "UPDATE " + table + "  SET surveyID=%d" % self.surveyID
+        self.stmt.execute(sql)
+    '''
+    Updates a table with filterID grabbed from Filter init table
+    '''
+    def updateFilterID(self, table):
+        sql = "UPDATE "+table+" AS a, Filter AS b SET a.filterID=b.filterID WHERE b.filterType = '" + self.filter + "'"
+        self.stmt.execute(sql)
+        self.logger.debug("Batch destructor")
+    '''
+    Returns the value from this dictinary or else NULL
+    '''
+    def safeDictionaryAccess(self, header, key):
+         if key in header: return header[key]
+         else: return "NULL"
+    '''
+    Finds and reads a header extension
+    '''
+    def findAndReadFITSHeader(self, name, file):
+        found = False
+        while True:
+            index = file.tell()
+            record = file.read(80)
+            if not record: break;
+            # quit when we reach 'END'
+            if record.startswith("XTENSION= 'IMAGE"):
+                header = self.parseFitsHeader(file)
+                if header['EXTNAME'] == name:
+                    found = True
+                    file.seek(index + 2880, 0)
+                    break
+            file.seek(index + 2880, 0)
+        if found != True: self.logger.error("...could not find extension '" + name + "'")
+        else: self.logger.info("...read header at '" + name + "' and found " + str(len(header)) + " header cards")
+        return header
+    '''
+    Writes the batch manifest file
+    '''
+    def writeBatchManifest(self):
+        outPath = self.localOutPath + "/BatchManifest.xml"
+        tmpPath = "./tmp.xml"
+        self.logger.info("Creating batch manifest file here: " + outPath)
+        root = Element('manifest')
+        # batch information
+        root.attrib['name'] = self.batchName
+        root.attrib['type'] = self.getPspsBatchType()
+        root.attrib['timestamp'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        if self.survey != "":
+            root.attrib['survey'] = self.getBatchFriendlySurveyType()
+        try: self.minObjID
+        except: pass
+        else: root.attrib['minObjId'] = str(self.minObjID)
+        try: self.maxObjID
+        except: pass
+        else: root.attrib['maxObjId'] = str(self.maxObjID)
+        # get md5sum
+        p = Popen("md5sum " + self.outputFitsPath, shell=True, stdout=PIPE)
+        p.wait()
+        out = p.stdout.read()
+        md5sum = out[0:out.rfind(" ")]
+        # get file size
+        fileSize = os.path.getsize(self.outputFitsPath)
+        # file information
+        child = Element('file')
+        root.append(child)
+        child.attrib['name'] = self.outputFitsFile
+        child.attrib['bytes'] = str(fileSize)
+        child.attrib['md5'] = md5sum
+        # now create doc and write to file
+        file = open(tmpPath, 'w')
+        ElementTree(root).write(file)
+        file.close()
+        # clunky way to prettify XML
+        p = Popen("xmllint --format " + tmpPath + " > " + outPath, shell=True, stdout=PIPE)
+        p.wait()
+        os.remove(tmpPath)
+    '''
+    tar and zips batch directory
+    '''
+    def createTarball(self):
+        # set up filenams and paths
+        tarFile = self.batchName + ".tar"
+        tarPath = self.subDir + "/" + tarFile
+        self.tarballFile = tarFile + ".gz"
+        tarballPath = self.subDir + "/" + self.tarballFile
+        # tar directory
+        p = Popen("tar -cvf " + tarPath + "\
+                -C " + self.subDir + " \
+                " + self.batchName, shell=True, stdout=PIPE)
+        p.wait()
+        # zip tar archive
+        p = Popen("gzip -c " + tarPath + " > " + tarballPath, shell=True, stdout=PIPE)
+        p.wait()
+        # delete tar file and original directory
+        os.remove(tarPath)
+        shutil.rmtree(self.localOutPath)
+    '''
+    Publishes this batch to the datastore
+    '''
+    def publishToDatastore(self):
+        if self.datastore.publish(self.batchName, self.subDir, self.tarballFile, "tgz"):
+            self.ippToPspsDb.updateLoadedToDatastore(self.batchID, 1)
+    '''
+    Gets PSPS-friendly survey type
+    '''
+    def getBatchFriendlySurveyType(self):
+        return "SCR" # TODO
+        try:
+            self.survey
+        except:
+            return "NA"
+        if self.survey == "3PI": return "3PI"
+        elif self.survey == "MD04": return "MD4"
+        else:
+            self.logger.error("Don't know this survey: '" + self.survey + "'")
+            return "NA"
+    '''
+    Gets PSPS friendly batch type
+    '''
+    def getPspsBatchType(self):
+        if self.batchType == "init": return "IN"
+        elif self.batchType == "detection": return "P2"
+        elif self.batchType == "stack": return "ST"
+        else: self.logger.error("Don't know this batch type: " + self.survey)
+    '''
+    Sets min and max obj ID using the provided table, or list of tables
+    '''
+    def setMinMaxObjID(self, tables):
+        first = True
+        for table in tables:
+            sql = "SELECT MIN(objID), MAX(objID) FROM " + table
+            rs = self.scratchDb.stmt.executeQuery(sql)
+            rs.first()
+            if first:
+                self.minObjID = rs.getLong(1)
+                self.maxObjID = rs.getLong(2)
+            else:
+                if rs.getLong(1) < self.minObjID: self.minObjID = rs.getLong(1)
+                if rs.getLong(2) > self.maxObjID: self.maxObjID = rs.getLong(2)
+            first = False
+        self.ippToPspsDb.updateMinMaxObjID(self.batchID, self.minObjID, self.maxObjID)
     '''
     Reads FITS header and stores all fields in a dictionary object
     '''
+    def parseFitsHeader(self):
+        fitsFile = open(self.inputFitsPath)
+        self.header = {}
+    def parseFitsHeader(self, fitsFile):
+        header = {}
         while (True):
            record = fitsFile.read(80)
            # quit when we reach 'END'
+           if record.startswith("END"): break
+           # ignore comments
+           if record.startswith("COMMENT"): continue
+           match = re.match('(.*)=(.*)', record)
+           if re.match('END\s+', record): break
+           # this regex will get param/value pairs for all header cards, ignoring comments and parsing out 'HIERACH' prefixes
+           match = re.match('^(HIERARCH )*([a-zA-Z0-9-_\.]+)\s*=\s+\'*([a-zA-Z0-9-_\.:\s@#]+)\'*\\/*', record)
            if match:
+               # remove HIERARCH prefix
+               param = match.group(1).replace("HIERARCH", "")
+               param = param.strip()
+               value = match.group(2)
+               # remove trailing comment after / char, if there is one
+               index = value.find("/")
+               if index != -1: value = value[0:index]
+               # remove ' chars around content
+               value = value.replace("'", "")
+               # remove leading and trailing whitespace
+               value = value.strip()
+               # store in out dictionary object
+               self.header[param] = value
+               param = match.group(2)
+               value = match.group(3).strip()
+               if value == "NaN": value = "NULL"
+               header[param] = value
                #print param + "|" + value + "|"
+        return header
     '''
 …
          self.pspsTables = stilts.treads(self.pspsVoTableFilePath)
          for table in self.pspsTables:
+             self.log("Creating PSPS table: " + table.name)
+             table.write(self.url + '#' + table.name)
+         self.indexPspsTables()
+             self.logger.info("Creating PSPS table: " + table.name)
+             table.write(self.scratchDb.url + '#' + table.name)
+             self.tablesToExport.append(table.name)
+         self.alterPspsTables();
     '''
 …
     '''
     def indexIppTables(self):
+        self.log("indexIppTables not implemented")
+    '''
+    Adds an index to the supplied table and column
+    '''
+    def createIndex(self, table, column):
+        self.log("Creating index on column '"+column+"' for table '"+table+"'")
+        sql = "CREATE INDEX "+table+"_index ON "+table+" ("+column+")"
+        try:
+            self.stmt.execute(sql)
+        except:
+            self.log("Index already in place on '" + column + "' for table '" + table + "'")
+    '''
+    Subclass should implement this to index PSPS tables
+        self.logger.warn("indexIppTables not implemented")
+    '''
+    Alter PSPS tables
     '''
     def indexPspsTables(self):
         self.log("indexPspsTables not implemented")
+    def alterPspsTables(self):
+        self.logger.warn("alterPspsTables not implemented")
     '''
     Imports IPP tables from FITS file
     Accepts a regular expression filter so not all tabls need to be imported
+    Accepts a regular expression filter so not all tables need to be imported
     '''
     def importIppTables(self, filter):
+      self.logger.info("Attempting to import tables from input FITS file")
       tables = stilts.treads(self.inputFitsPath)
 …
           match = re.match(filter, table.name)
           if not match: continue
           self.log("Creating IPP table " + table.name)
+          self.logger.info("   Reading IPP table " + table.name + " from FITS file")
           table = stilts.tpipe(table, cmd='explodeall')
+          # drop any previous tables before import
+          self.scratchDb.dropTable(table.name)
+          # IPP FITS files are littered with infinities, so remove these
+          self.logger.info("   Removing Infinity values from all columns")
+          table = stilts.tpipe(table, cmd='replaceval -Infinity null *')
+          table = stilts.tpipe(table, cmd='replaceval Infinity null *')
           try:
               table.write(self.url + '#' + table.name)
+              table.write(self.scratchDb.url + '#' + table.name)
           except:
+              self.log("ERROR problem writing table '" + table.name + "' to the database")
+              self.logger.exception("   Problem writing table '" + table.name + "' to the database")
           count = count + 1
       self.log("Imported %d tables from '%s' " % (count, self.inputFitsPath))
+      self.logger.info("Done. Imported %d tables" % count)
       self.indexIppTables()
     '''
     Exports PSPS tables from the database to FITS format
+    Exports PSPS tables from the database to FITS format. Optional regex if you want to alter table names prior to export
     '''
     def exportPspsTablesToFits(self):
         self.log("Exporting all PSPS tables to FITS")
+    def exportPspsTablesToFits(self, regex="(.*)"):
+        self.logger.info("Replacing NULLs with -999 then exporting all PSPS tables to FITS")
         _tables = []
+        self.log("    Selecting database tables")
+        self.logger.info("    Selecting database tables")
+        for table in self.tablesToExport:
+           # check for an empty table
+           if self.scratchDb.getRowCount(table) < 1: continue
+           # get everything from table
+           _table = stilts.tread(self.scratchDb.url + '#SELECT * FROM ' + table)
+           # replace nulls and empty fields with weird PSPS -999 pseudo-null
+           _table = stilts.tpipe(_table, cmd='replaceval "" -999 *')
+           match = re.match(regex, table)
+           newTableName = match.group(1)
+           # change table names
+           _table = stilts.tpipe(_table, cmd='tablename ' + newTableName)
+           _tables.append(_table)
+        self.logger.info("    Writing to FITS file '" + self.outputFitsPath + "'...")
+        stilts.twrites(_tables, self.outputFitsPath, fmt='fits')
+        self.logger.info("    ...done")
+        self.ippToPspsDb.updateProcessed(self.batchID, 1)
+    '''
+    Searches all PSPS tables and reports the columns that are either partially or completely populated with NULLs
+    '''
+    def reportNullsInAllPspsTables(self, showPartials):
         for table in self.pspsTables:
+           _table = stilts.tread(self.url + '#SELECT * FROM ' + table.name)
+           _table = stilts.tpipe(_table, cmd='tablename ' + table.name)
+           _tables.append(_table)
+        self.log("    Writing to FITS file " + self.outputFitsPath)
+        stilts.twrites(_tables, self.outputFitsPath, fmt='fits')
+            self.scratchDb.reportNulls(table.name, showPartials)
+    '''
+    Searches all PSPS tables and replaces all NULLs with the provided substitute
+    '''
+    def replaceAllPspsNulls(self, sub):
+        self.logger.info("Replacing all NULL values in PSPS tables with '" + sub + "'...")
+        for table in self.pspsTables:
+            self.scratchDb.replaceNulls(table.name, sub)
+        self.logger.info("...done")
     '''
 …
     '''
     def populatePspsTables(self):
+        self.log("Not implemented yet")
+        self.logger.warn("Not implemented yet")
+    '''
+    Calls DVO program to 'query' DVO database and populate results to local MySQL Db table
+    '''
+    def getIDsFromDVO(self):
+        # TODO path to DVO prog hardcoded temporarily
+        cmd = "../src/dvograbber " + self.dvoLocation
+        self.logger.info("Running: '" + cmd + "'...")
+        p = Popen(cmd, shell=True, stdout=PIPE)
+        p.wait()
+        #        out = p.stdout.read()
+        self.logger.info("...done")
+        if self.scratchDb.getRowCount("dvoDetection") < 1:
+            self.logger.error("No DVO IDs found")
+            return False
+        return True
+    '''
+    Checks whether this batch has already been processed and published. To be implemented by all subclasses
+    '''
+    def alreadyProcessed(self):
+           self.logger.info("Not implemented")

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 31439 for branches/eam_branches/ipp-20110404/ippToPsps/jython/batch.py

Legend:

branches/eam_branches/ipp-20110404/ippToPsps/jython/batch.py

Download in other formats: