#! /usr/bin/env python

import sys,os,stat,datetime,getopt,time,threading,Queue,re,copy,signal,cPickle
import tarfile
import parse_config

##################
# Notes-
# -need to fetch updates of a given product in time order, 
#  irrespective of their order in the archive.
# -Is there a way we can test recursiveRm for safety?
##################

#topURI= "http://alala2.ifa.hawaii.edu/ipp025/ds"
#topURI= "http://alala2.ifa.hawaii.edu/ipp049/ds"
topURI= "http://datastore.ipp.ifa.hawaii.edu"
#mirrorRootDir= "/Volumes/data/PS1/data/MIRROR"
#mirrorRootDir= "/n/panstarrs/data/MIRROR"
#mirrorRootDir= "/panstarrs/data/MIRROR"
#mirrorRootDir= "/bessemer/welling/ps_ipp/MIRROR"
#mirrorRootDir= "/big/welling/MIRROR"
mirrorRootDir= "/var/tmp/MIRROR"
#desiredProductList= ['dist0','gpc1-sample','iq-evaluation','md-all-rel-200907']
desiredProductList= ['ps1-md']
hiddenProductList= ['md-all-rel-200907',
                    'durham-200907',
                    'qub-200907',
                    'edb-200907',
                    'jhu-200907',
                    'threepi-all-200908',
                    'ps1-200910','ps1-cat-200910',
                    'ps1-md']
productStatusTableFilename= "product_status.csv"
failureTableFilename= "download_failures.csv"
leftoverWorkFilename= "update_mirror_leftover_work.pkl"
maxFetchAttempts= 3
logFileName= "update_mirror.log"
logFile= None
lastFilesetId= None
indexColumnFilterDict= {}
verbose= 0
debug= 0
groupWriteFlag= 0
selectTarballRegex= None
numDownloadThreads= 10    # This seems to be about the limit for cyclops01
gblKeepRunning= True

# We use these in lieu of lists because they support the locking needed
# by the threads.
pendingDownloadQueue= None   # All the products we're trying to get
failedDownloadQueue= None    # Products we've given up on

# All of the workers will contribute to the count of total downloaded bytes
totalKBytesDownloaded= 0
lockTotalKBytesDownloaded= None

def handler( signum, frame ):
    raise IOError, "caught signal %s!"%signum

def logMessage(str):
    global logFile
    if not logFile:
        logFile= open(logFileName,"a")
    logFile.write("%s: %s\n"%\
                      (datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S"),
                       str))
    logFile.flush()

def logMessageAndExit(str):
    logMessage(str)
    sys.exit(str)

def readCmdOutputToList(cmd):
    #logMessage("running <%s>"%cmd)
    cmdout= os.popen(cmd)
    result= cmdout.readlines()
    if cmdout.close() != None :
        raise RuntimeError("Command failed: <%s>"%cmd)
    #logMessage("read %d entries"%len(result))
    return result

def parseCSV( ifile ):
    "returns a tuple containing a list of keys and a list of dicts"
    lineList= []
    possibleDelimiters= [",","\t",None] # empty string means whitespace-delimited
    lines= ifile.readlines()
    if len(lines)==0:
        return [],lineList  # in case input is an empty file
    delimFound= 0
    delimForThisFile= None
    for delim in possibleDelimiters:
        wordCount= len(lines[0].split(delim))
        if wordCount<2: continue
        delimStillACandidate= 1
        for line in lines[1:]:
            nwords= len(line.split(delim))
            if nwords>1 and nwords != wordCount:
                if debug: print "Delim is not <%s>\n"%delim
                delimStillACandidate= 0
                break
        if delimStillACandidate:
            delimFound= 1
            delimForThisFile= delim
            break
    if not delimFound:
        sys.exit("Cannot find the right delimiter for this CSV input!")
    if debug: print"delimForThisFile= <%s>"%delimForThisFile
    keys= lines[0].split(delimForThisFile)
    keys= [ x.strip() for x in keys ]
    stringsAreQuoted= 1
    for key in keys:
        if len(key)>0 \
                and (not key.startswith('"') or not key.endswith('"')) \
                and (not key.startswith("'") or not key.endswith("'")):
            stringsAreQuoted= 0
    if debug: print "stringsAreQuoted= %d"%stringsAreQuoted
    if stringsAreQuoted:
        keys = [ x[1:-1] for x in keys ]
    lines= lines[1:]
    lineNum= 1
    for line in lines:
        words= line.split(delimForThisFile)
        words= [x.strip() for x in words]
        if len(words)>0:
            dict= {}
            if len(words)!=len(keys):
                errorMessage("Line length error: %d vs %d"%\
                             (len(words),len(keys)))
                for i in xrange(len(keys)):
                    errorMessage("%d: <%s> <%s>"%(i,keys[i],words[i]))
                sys.exit("Line length error parsing CSV at line %d"%(lineNum))
            for i in xrange(len(keys)):
                if (stringsAreQuoted 
                    and ((words[i].startswith('"') 
                          and words[i].endswith('"'))
                         or (words[i].startswith('"') 
                             and words[i].endswith('"')))):
                        dict[keys[i]]= words[i][1:-1]
                else:
                    try:
                        dict[keys[i]]= int(words[i])
                    except ValueError:
                        try:
                            dict[keys[i]]= float(words[i])
                        except ValueError:
                            dict[keys[i]]= words[i]

            lineList.append(dict)
        lineNum += 1
    return (keys, lineList)

def writeCSV( ofile, keyList, recDictList, delim=",", quoteStrings=0 ):
    """
    Each element of the input recDictList is a dictionary containing
    keys from keyList.
    """
    if quoteStrings:
        ofile.write('"%s"'%keyList[0])
        for key in keyList[1:]:
            ofile.write('%s"%s"'%(delim,key))
    else:
        ofile.write("%s"%keyList[0])
        for key in keyList[1:]:
            ofile.write("%s%s"%(delim,key))
    ofile.write("\n")
    for dict in recDictList:
        if dict.has_key(keyList[0]):
            val= dict[keyList[0]]
        else: val= 'NA'
        if isinstance(val,int): ofile.write("%d"%val)
        elif isinstance(val,float): ofile.write("%r"%val)
        elif quoteStrings:
            ofile.write('"%s"'%val)
        else:
            ofile.write("%s"%val)
        for key in keyList[1:]:
            if dict.has_key(key):
                val= dict[key]
            else: val= 'NA'
            if isinstance(val,int): ofile.write("%s%d"%(delim,val))
            elif isinstance(val,float): ofile.write("%s%r"%(delim,val))
            elif quoteStrings:
                ofile.write('%s"%s"'%(delim,val))
            else:
                ofile.write("%s%s"%(delim,val))
        ofile.write("\n")
    if debug:
        print "Wrote %d recs, delim=<%s>, quoteStrings= %d"%\
                  (len(recDictList),delim,quoteStrings)

def parseIndexList(lines):
    if len(lines)==0 or len(lines[0])==0:
        raise RuntimeError("Got an empty index list")
    if lines[0][0]=="#":
        keys= lines[0][1:].split('|')
        keys= ( k.strip() for k in keys )
        cleanKeys= []
        for k in keys:
            if len(k)>0:
                cleanKeys.append(k)
        lines= lines[1:]
    else:
        raise RuntimeError("Did not find '#' in line 1 column 1 of downloaded file list")

    result= []
    lineCount= 1
    for l in lines:
        lineCount += 1
        l.strip()
        if len(l)>0:
            words= l.strip().split('|')
            cleanWords= []
            for w in words:
                if len(w)>0:
                    cleanWords.append(w)
            cleanWords= [w.strip() for w in cleanWords]
            if len(cleanWords)==0:
                continue
            if len(cleanWords) < len(cleanKeys):
                logMessageAndExit("Wrong length line %d in index list"%lineCount)
            lineDict= {}
            for i in xrange(len(cleanKeys)):
                lineDict[cleanKeys[i]]= cleanWords[i]
            result.append(lineDict)

    return result

def makeGroupWritable(fname):
    os.chmod(fname,os.stat(fname)[stat.ST_MODE] | stat.S_IWGRP)

def makeDirsGroupWritable(path):
    if not os.access(path,os.F_OK):
        parent= os.path.dirname(path)
        if parent!="/": makeDirsGroupWritable(parent)
        os.mkdir(path)
        makeGroupWritable(path)

def placeFiles(product,incomingDir, runType, dirinfoDict, newFileInfoList):
    topDestDir= os.path.join(mirrorRootDir, product,
                             dirinfoDict['destdir']['destdir'][1])
    for entryType,entryComponent,entryFileID in newFileInfoList:
        #logMessage("treating %s %s %s"%(entryType,entryComponent,entryFileID))
        if entryType=='text':
            outDir= topDestDir
            if not os.access(outDir,os.W_OK):
                if groupWriteFlag: makeDirsGroupWritable(outDir)
                else: os.makedirs(outDir)
            # Move annoying metadata to a subdirectory
            if entryComponent=='dbinfo' or entryComponent=='dirinfo':
                outDir= os.path.join(outDir,"metadata")
                if not os.access(outDir,os.W_OK):
                    if groupWriteFlag: makeDirsGroupWritable(outDir)
                    else: os.makedirs(outDir)
            os.rename(os.path.join(incomingDir,entryFileID),
                      os.path.join(outDir,entryFileID))
            if groupWriteFlag: 
                makeGroupWritable(os.path.join(outDir,entryFileID))
        elif entryType=='tgz':
            outDir= os.path.join(topDestDir,
                                 dirinfoDict['components'][entryComponent][1])
            if not os.access(outDir,os.W_OK):
                if groupWriteFlag: makeDirsGroupWritable(outDir)
                else: os.makedirs(outDir)
            tarFile= tarfile.open(os.path.join(incomingDir,entryFileID))
            fnames= tarFile.getnames()
            tarFile.extractall(path=outDir)
            if groupWriteFlag:
                for fn in fnames:
                    makeGroupWritable(os.path.join(outDir,fn))
            logMessage("untarred %s (%d files)"%(entryFileID,len(fnames)))
        else:
            raise RuntimeError("Unknown entry type moving %s into place"%\
                                   entryFileID)
    
def fetchSingleUpdate(product,dict,incomingDir):
    global totalKBytesDownloaded, lockTotalKBytesDownloaded
    fetchCount= 0
    totalBytesThisUpdate= 0
    lines= readCmdOutputToList("wget -nv -a %s -O - %s/%s/%s/index.txt"%\
                                   (logFileName,
                                    topURI,product,dict['filesetID']))
    indexList= parseIndexList(lines)
    try:
        if groupWriteFlag: makeDirsGroupWritable(incomingDir)
        else: os.makedirs(incomingDir)
    except IOError:
        pass # this arises occasionally if incomingDir exists from prev run
    newFileInfoList= []
    runType= None
    dirinfoDict= None
    for entry in indexList:
        if selectTarballRegex is not None \
           and entry['type']=="tgz" \
           and not selectTarballRegex.match(entry['fileID']):
            continue
        lines= readCmdOutputToList("wget -nv -a %s -P %s %s/%s/%s/%s"%\
                                       (logFileName,
                                        incomingDir,
                                        topURI,product,dict['filesetID'],
                                        entry['fileID']))
        fetchCount += 1
        fname= os.path.join(incomingDir, entry['fileID'])
        totalBytesThisUpdate += int(entry['bytes'])
        md5SumLines= readCmdOutputToList("md5sum %s"%fname)
        md5Sum= md5SumLines[0].split()[0].strip()
        #logMessage("%s: <%s> vs <%s>"%(entry['fileID'],
        #                               md5Sum,entry['md5sum']))
        if md5Sum!=entry['md5sum']:
            raise RuntimeError("checksum mismatch on %s: %s vs. %s"%\
                                   (entry['fileID'],md5Sum,entry['md5sum']))
        if entry['type']=="tgz":
            pass
        elif entry['type']=='text':
            if entry['component']=='dbinfo':
                f= open(fname,"r")
                l= f.readline().strip()
                words= l.split()
                f.close()
                if len(words)!=2 or words[1]!="MULTI":
                    raise RuntimeError("First line of %s is not 'something MULTI'"%\
                                           entry['fileID'])
                runType= words[0]
            elif entry['component']=='dirinfo':
                f= open(fname,"r")
                dirinfoDict= parse_config.parse(f,{})
                f.close()
            else:
                raise RuntimeError("Unrecognized text component %s processing %s"%\
                                       (entry['component'],entry['fileID']))
        else:
            raise RuntimeError("Unrecognized type %s processing %s"%\
                                   (entry['type'],entry['fileID']))
        newFileInfoList.append( (entry['type'], entry['component'], 
                             entry['fileID']) )
    if runType==None or dirinfoDict==None:
        raise RuntimeError("Missing needed info processing %s"%\
                               dict["filesetID"])
    placeFiles(product, incomingDir, runType, dirinfoDict, newFileInfoList)
    recursiveRm(incomingDir)

    logMessage("%s finished fileset %s: %d downloads, %d bytes"%\
               (threading.currentThread().name,dict['filesetID'],
                fetchCount,totalBytesThisUpdate))

    lockTotalKBytesDownloaded.acquire()
    totalKBytesDownloaded += totalBytesThisUpdate/1024
    lockTotalKBytesDownloaded.release()

    return dict

def getFailureList():
    try:
        f= open(failureTableFilename,"r")
    except IOError:
        logMessage("Download failure table <%s> does not exist; will create it"%\
                       failureTableFilename)
        return []

    keyList,recs= parseCSV(f)
    f.close()
    tbl= {}
    for r in recs:
        tbl[r['productID']]= r
    return recs

def getProductStatusTable():
    try:
        f= open(productStatusTableFilename,"r")
    except IOError:
        logMessage("Product status table <%s> does not exist; will create it"%\
                       productStatusTableFilename)
        return {}

    keyList,recs= parseCSV(f)
    f.close()
    tbl= {}
    for r in recs:
        tbl[r['productID']]= r
    return tbl

def writeProductStatusTable(tbl):
    keyList= []
    recDictList= []
    for k in tbl.keys():
        thisRecDict= tbl[k]
        thisRecDict['productID']= k
        recDictList.append(thisRecDict)
        for subKey in thisRecDict:
            if not subKey in keyList:
                keyList.append(subKey)
    try:
        f= open(productStatusTableFilename,"w")
    except IOError:
        logMessageAndExit("Can't write product status table <%s>"%\
                              productStatusTableFilename)
    if len(keyList)>0 and len(recDictList)>0:
        writeCSV(f, keyList, recDictList)
    f.close()

def writeFailureList(failureList):
    " Write the failure list .csv if there are failures; otherwise clear the file."
    if len(failureList)>0:
        keyList= []
        for dict in failureList:
            for k in dict.keys():
                if not k in keyList:
                    keyList.append(k)
        try:
            f= open(failureTableFilename,"w")
        except IOError:
            logMessageAndExit("Can't write failure table <%s>"%\
                                  failureTableFilename)
        writeCSV(f, keyList, failureList)
        f.close()
    else:
        try:
            os.remove(failureTableFilename)
        except OSError:
            pass # may fail because the file does not exist

def recursiveRm(targetDir):
    if os.access(targetDir,os.F_OK):
        logMessage("recursiveRm: killing <%s>\n"%targetDir)
        for root, dirs, files in os.walk(targetDir, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
                os.rmdir(os.path.join(root, name))
        os.rmdir(targetDir)
    else:
        logMessage("recursiveRm: no <%s> to remove\n"%targetDir)
    

def workerRun():
    logMessage("%s starts"%\
                   (threading.currentThread().name))
    while gblKeepRunning:
        workTuple= pendingDownloadQueue.get()
        if workTuple:
            try:
                (product, productDict, triesSoFar)= workTuple
                logMessage("%s fetching %s %s"%\
                               (threading.currentThread().name,
                                product,productDict['filesetID']))
                incomingDir= os.path.join(mirrorRootDir,product,
                                          threading.currentThread().name,
                                          productDict['filesetID'])
                fetchResultsInfo= None
                try:
                    fetchResultsInfo= fetchSingleUpdate(product,productDict,
                                                        incomingDir)
                except OSError,e:
                    logMessage("File handling problem fetching %s %s: %s"%\
                                   (product, productDict['filesetID'],e))
                    fetchResultsInfo=None
                except RuntimeError,e:
                    logMessage("Fetch of %s %s failed: %s"%\
                                   (product, productDict['filesetID'],e))
                    fetchResultsInfo=None
                if fetchResultsInfo==None:
                    recursiveRm(incomingDir)
                    if triesSoFar<maxFetchAttempts:
                        pendingDownloadQueue.put((product, productDict,
                                                  triesSoFar+1))
                    else:
                        failedDownloadQueue.put((product,productDict,
                                                 triesSoFar+1))
                        logMessage("%s failed fetching %s %s"%\
                                       (threading.currentThread().name,
                                        product,productDict['filesetID']))
            finally:
                pendingDownloadQueue.task_done()
        else:
            time.sleep(0.1)

def fetchUpdates(product, productStatusTable):
    global lastFilesetId, indexColumnFilterDict
    logMessage( "starting fetchUpdates for product %s"%product )
    try:
        if productStatusTable.has_key(product):
            lines= readCmdOutputToList("wget -nv -a %s -O - %s/%s/index.txt?%s"%\
                                           (logFileName,topURI, product,
                                            productStatusTable[product]['filesetID']))
        else:
            lines= readCmdOutputToList("wget -nv -a %s -O - %s/%s/index.txt"%\
                                           (logFileName,topURI,product))
    except:
        logMessage( "Unable to fetch the product index for %s; skipping it"%\
                        product )
        return
        
    productIndex= parseIndexList(lines)
    culledProductIndex= []
    cut= None
    if productStatusTable.has_key(product):
        cut= productStatusTable[product]['time registered']
        for l in productIndex:
            if l['time registered']>cut:
                culledProductIndex.append(l)
            if lastFilesetId!=None and l['filesetID']==lastFilesetId:
                break
    else:
        if lastFilesetId!=None:
            for l in productIndex:
                culledProductIndex.append(l)
                if l['filesetID']==lastFilesetId:
                    break
        else:
            culledProductIndex= productIndex

    if len(indexColumnFilterDict.keys())!=0:
        productIndex= culledProductIndex
        culledProductIndex= []
        for l in productIndex:
            ok= True
            for k,compRegex in indexColumnFilterDict.items():
                if l.has_key(k) and not compRegex.match(l[k]): 
                    ok= False
                    break
            if ok:
                culledProductIndex.append(l)

    if len(culledProductIndex)>0:
        for productDict in culledProductIndex:
            pendingDownloadQueue.put((product, productDict,0))

        # Any that fail to download will be noted separately as failures
        productStatusTable[product]= culledProductIndex[-1]

def describeSelf():
    print """
    usage: %s [-v][-d][--groupwrite][--mirrorroot RootDir]
                [--statusfile Filename][--failurefile Filename]
                [--pipecrash Filename][--logfile Filename]
                [--last LastFilesetId][--select_fsid Regex]
                [--select_targetid Regex][--select_stage Regex]
                [--select_fstag Regex][--select_label Regex]
                [--select_filter Regex][--select_distgroup Regex]
                [--select_datagroup Regex][--select_tarball Regex]

          where:
          -v requests verbose output
          -d requests debugging output
          --groupwrite requests that the imported files be made
            group writable.  The algorithm isn't terribly efficient
            but can be convenient.
          --mirrorroot specifies the directory to store the incoming
            distribution (default %s)
          --statusfile specifies the name of the .csv file storing
            information about the last downloaded fileset in each
            distribution (default %s)
          --failurefile specifies the name of the .csv file storing
            information about filesets for which downloading has failed
            (default %s)
          --pipecrash specifies the name of the Python .pkl file in which
            the contents of the working download queue are to be stored
            in case of an interrupt (default %s)
          --logfile specifies the name of the log (default %s)
          --last specifies a filesetID at which to stop downloading
            (default %s)
          --select_... options can be used to specify a regular expression
            to check against the corresponding column of the datastore
            index.  The options correspond to the following columns:

                ..._fsid      filesetID
                ..._targetid  target_id
                ..._stage     stage
                ..._fstag     fs_tag
                ..._filter    filter
                ..._label     label or dist_group or data_group
                ..._distgroup dist_group or label or data_group
                ..._datagroup data_group or dist_group or label
                ..._tarball   match against .tgz files before downloading

            Any index entry for which a regex is specified must match that
            regex to be downloaded.  The options which come in multiple
            versions arise because of a backward compatibility issue.

            The regex for tarballs is a little different, in that matches
            are checked at the stage of downloading the individual files
            of a fileset.  The condition applies only to .tgz files, not
            text or metadata.  This is useful for selecting a specific
            skycell, for example.

    NOTE: if you 'kill' this script (with SIGTERM, the default) it will
    attempt to clean up and take good notes before it dies.  This can
    take a while as the worker threads finish downloading.  If you must
    kill it immediately, use 'kill -9'.
    """%(os.path.basename(sys.argv[0]),
         mirrorRootDir, productStatusTableFilename, failureTableFilename,
         leftoverWorkFilename, logFileName, lastFilesetId)

def main():
    global verbose, debug, groupWriteFlag, gblKeepRunning
    global mirrorRootDir, productStatusTableFilename, logFileName
    global failureTableFilename, leftoverWorkFilename, lastFilesetId
    global pendingDownloadQueue, failedDownloadQueue
    global totalKBytesDownloaded, lockTotalKBytesDownloaded
    global indexColumnFilterDict, selectTarballRegex

    try:
        (opts,pargs) = getopt.getopt(sys.argv[1:],"vdh",["groupwrite",
                                                         "mirrorroot=",
                                                         "statusfile=",
                                                         "failurefile=",
                                                         "logfile=",
                                                         "pipecrash=",
                                                         "last=","help",
                                                         "select_fsid=",
                                                         "select_targetid=",
                                                         "select_stage=",
                                                         "select_fstag=",
                                                         "select_label=",
                                                         "select_distgroup=",
                                                         "select_filter=",
                                                         "select_datagroup=",
                                                         "select_tarball="])
    except:
        sys.exit("%s: Invalid command line parameter" % sys.argv[0])
   
    # Parse args
    for a,b in opts:
        if a=="-h" or a=="--help":
            describeSelf()
            sys.exit(0)
        if a=="-v":
            verbose= 1
        if a=="-d":
            debug= 1
        if a=="--groupwrite":
            groupWriteFlag= 1
        if a=="--mirrorroot":
            mirrorRootDir= b
        if a=="--statusfile":
            productStatusTableFilename= b
        if a=="--failurefile":
            failureTableFilename= b
        if a=="--pipecrash":
            leftoverWorkFilename= b
        if a=="--logfile":
            logFileName= b
        if a=="--last":
            lastFilesetId= b
        if a=="--select_fsid":
            indexColumnFilterDict["filesetID"]= b
        if a=="--select_targetid":
            indexColumnFilterDict["target_id"]= b
        if a=="--select_stage":
            indexColumnFilterDict["stage"]= b
        if a=="--select_fstag":
            indexColumnFilterDict["fs_tag"]= b
        if a=="--select_label" \
                or a=="--select_distgroup" \
                or a=="--select_datagroup":
            indexColumnFilterDict["label"]= b
            indexColumnFilterDict["dist_group"]= b
            indexColumnFilterDict["data_group"]= b
        if a=="--select_filter":
            indexColumnFilterDict["filter"]= b
        if a=="--select_tarball":
            selectTarballRegex= b

    if len(pargs)>0:
        describeSelf()
        sys.exit(-1)

    logMessage("####### Starting #######")

    # Check any regular expressions provided
    regex= None
    try:
        for k,regex in indexColumnFilterDict.items():
            indexColumnFilterDict[k]= re.compile(regex)
        if selectTarballRegex is not None:
            regex= selectTarballRegex    # to get error message correct
            selectTarballRegex= re.compile(regex)
    except Exception,e:
        sys.exit("<%s> is not a valid regular expression: %s"%(regex,str(e)))

    productStatusTable= getProductStatusTable()
    failureList= getFailureList()
    if debug: print failureList

    # This will allow us to exit in an orderly way if someone
    # kills the process.
    signal.signal(signal.SIGTERM, handler)

    pendingDownloadQueue= Queue.Queue()
    failedDownloadQueue= Queue.Queue()
    lockTotalKBytesDownloaded= threading.Lock()
    gblKeepRunning= True
    threadsThatAreNotWorkers= threading.activeCount()
    try:
        for i in range(numDownloadThreads):
            t= threading.Thread(target=workerRun)
            t.setDaemon(True)
            t.start()

        # Try to get any downloads that we failed to get last time
        for failDict in failureList:
            pendingDownloadQueue.put((failDict['productID'],failDict,0))

        # Generate the top-level index
        topIndexLines= readCmdOutputToList("wget -a %s -nv -O - %s/index.txt"%
                                           (logFileName,topURI))
        topIndex= parseIndexList(topIndexLines)
        for dist in hiddenProductList:
            topIndex.append({'productID':dist,'type':'dump'})
        productTable= {}
        for dict in topIndex:
            productTable[dict['productID']]= dict

        for product in desiredProductList:
            if productTable.has_key(product):
                fetchUpdates(product, productStatusTable)

        # Queue.join() seems to be immune to interrupts.
        while pendingDownloadQueue.qsize()>0:
            time.sleep(0.1)
    except IOError,e:
        logMessage("IOError or signal in main: %s"%e)
        gblKeepRunning= False  # bail on processing

        # Stall while the workers finish their tasks and exit
        while threading.activeCount()>threadsThatAreNotWorkers:
            time.sleep(0.1)

        # Freeze a copy of the elements left in the work queue-
        # this also empties the queue so queue.join() works below
        remainingPipeWork= []
        while not pendingDownloadQueue.empty():
            remainingPipeWork.append( pendingDownloadQueue.get() )
            pendingDownloadQueue.task_done()

        if len(remainingPipeWork)>0:
            pklOut= open(leftoverWorkFilename,"w")
            cPickle.dump(remainingPipeWork, pklOut)
            pklOut.close()

    finally:
        pendingDownloadQueue.join()

        gblKeepRunning= False  # might as well cause workers to terminate

        # Save update state and failure list
        writeProductStatusTable(productStatusTable)
        failureList= []
        while not failedDownloadQueue.empty():
            prod, dict, nTries= failedDownloadQueue.get()
            dict['productID']= prod
            failureList.append(dict)
            failedDownloadQueue.task_done()
        writeFailureList(failureList)


        # And a final message on the way out
        logMessage("Exiting; downloaded %d KBytes since startup"%\
                   totalKBytesDownloaded)

############
# Main hook
############

if __name__=="__main__":
    main()

