Index: trunk/ippScripts/scripts/receive_file.pl
===================================================================
--- trunk/ippScripts/scripts/receive_file.pl	(revision 24102)
+++ trunk/ippScripts/scripts/receive_file.pl	(revision 24125)
@@ -17,6 +17,8 @@
 use IPC::Cmd 0.36 qw( can_run run );
 use PS::IPP::Metadata::Config;
+use PS::IPP::Metadata::List qw( parse_md_list );
 use PS::IPP::Config 1.01 qw( :standard );
 use File::Temp qw( tempfile );
+use File::Basename qw( basename );
 use Carp;
 
@@ -36,5 +38,5 @@
 
 # Parse the command-line arguments
-my ( $file_id, $source, $product, $fileset, $fileset_id, $file, $component, $bytes, $md5sum, $workdir, $dbname, $verbose, $no_update, $save_temps );
+my ( $file_id, $source, $product, $fileset, $fileset_id, $file, $component, $bytes, $md5sum, $workdir, $dirinfo_uri, $dbname, $verbose, $no_update, $save_temps );
 
 GetOptions(
@@ -49,4 +51,5 @@
            'md5sum=s'          => \$md5sum, # md5sum for file from data store
            'workdir=s'         => \$workdir, # Working directory for output
+           'dirinfo=s'    => \$dirinfo_uri, # file containing the destination directories for this component
            'dbname=s'          => \$dbname,    # Database name
            'verbose'           => \$verbose,   # Print to stdout
@@ -56,5 +59,5 @@
 
 pod2usage( -msg => "Unknown option: @ARGV", -exitval => 2 ) if @ARGV;
-pod2usage( -msg => "Required options: --file_id --source --product --fileset --file --workdir",
+pod2usage( -msg => "Required options: --file_id --source --product --fileset --file --component --workdir --bytes --md5sum --dirinfo",
            -exitval => $PS_EXIT_CONFIG_ERROR) unless
     defined $file_id and
@@ -66,10 +69,18 @@
     defined $bytes and
     defined $md5sum and
+    defined $dirinfo_uri and
     defined $workdir;
 
 $tempdir .= "/$file_id";
+
+&my_die( "dirinfo is NULL for $component", $file_id, $PS_EXIT_CONFIG_ERROR )
+    if (($dirinfo_uri eq "NULL") and ($component ne "dirinfo"));
 
 my $ipprc = PS::IPP::Config->new() or
     &my_die( "Unable to set up", $file_id, $PS_EXIT_CONFIG_ERROR ); # IPP configuration
+
+my $mdcParser = PS::IPP::Metadata::Config->new;
+
+
 
 # Retrieve file
@@ -87,41 +98,161 @@
 my $mjd_copy = DateTime->now->mjd;   # MJD of finishing copy
 
+# figure out which dirinfo file to read
+my $dirinfo_file_to_read = $component eq "dirinfo" ? $filename : $dirinfo_uri;
+
+# process it
+my ($destdir, $components, $dirinfo_lines) = read_dirinfo_file($dirinfo_file_to_read, $file_id);
+
+# select a directory for the dirinfo and dbinfo files
+# XXX: perhaps this directory should be set by the script and passed in
+# rather than computed here.
+
+my ($day, $month, $year) = (localtime)[3,4,5];
+my $datestr = sprintf "%04d%02d%02d", $year+1900, $month + 1, $day;
+my $dir_for_info_files = caturi($workdir, $datestr, $fileset);
+
 # Deal with file
-if ($file =~ m|^dbinfo\.\S+\.mdc$|) {
-    # Load into database
-
-    my $target = "$workdir/$file"; # Target destination for file
-    my $fixName = $ipprc->file_create( $target ); # Target for move
-    my $fixFile;
-
-    open $fixFile, ">$fixName" or &my_die( "can't open $fixName\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
-
-    # Need to fix paths to point to new workdir
-    open my $inFile, $filename or &my_die( "Can't open $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); # Input file
-    my $workdir_old;            # Old workdir
-    while (<$inFile>) {
-        # XXX This is a global approach to fixing the path: it should fix anything and everything, but won't
-        # work if there are multiple workdirs in a file and the bits are all mixed up.  To cover that case,
-        # we should fix each of the elements (workdir, uri, path_base) separately.
-        if (m|^\s*workdir\s+STR\s+(\S+)|) {
-            $workdir_old = $1;
-            $workdir_old =~ s|\@HOST\@|\\S+|;
-        }
-        if (defined $workdir_old) {
-            s|$workdir_old|$workdir|;
-        }
-        print $fixFile $_;
-    }
-    close($inFile);
-    close($fixFile);
-#{
-    my $command = "$receivetool -updatefileset -fileset_id $fileset_id -dbinfo_uri $fixName"; # Command to execute
+if ($component eq 'dirinfo') {
+    # save the dirinfo file contents into the $workdir
+
+    $dirinfo_uri = caturi($dir_for_info_files, basename($filename));
+    print "dirinfo_uri: $dirinfo_uri\n" if $verbose;
+
+    my $resolved = $ipprc->file_resolve($dirinfo_uri, 'create');
+    &my_die( "failed to resolve $dirinfo_uri\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$resolved;
+
+    open OUT, ">$resolved" 
+        or &my_die( "failed to open $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+    print OUT @$dirinfo_lines
+        or &my_die( "failed to write $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+    close OUT
+        or &my_die( "failed to close $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+    
+    # update the fileset to allow processing of other files
+    my $command = "$receivetool -updatefileset -fileset_id $fileset_id";
+    $command .= " -set_state new -dirinfo $dirinfo_uri";
     $command .= " -dbname $dbname" if defined $dbname;
+
     my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
         run(command => $command, verbose => $verbose);
-    &my_die( "Unable to set dbinfo_uri for $fileset_id to  $fixName\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success;
-#}
-
-} elsif ($file =~ m|.*\.tgz$|) {
+    &my_die( "Unable to update fileset $fileset_id to\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success;
+
+} elsif ($component eq "dbinfo") {
+
+    open INFILE, $filename or &my_die( "Can't open $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+
+    my @lines = (<INFILE>);
+
+    my $dbinfo = join "", @lines;
+
+    close INFILE;
+
+    my $dbinfo_uri = caturi($dir_for_info_files, basename($filename));
+    print "dbinfo_uri: $dbinfo_uri\n" if $verbose;
+
+    my $resolved = $ipprc->file_resolve($dbinfo_uri, 'create');
+    &my_die( "failed to resolve $dbinfo_uri\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$resolved;
+
+    open OUT, ">$resolved" 
+        or &my_die( "failed to open $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+
+    # We process the dbinfo file (the exported run from the distribution) line by line
+    # Rather than read it as an mdc and interptet it, we do our substitutions directly
+    # This is much faster. Parsing a mdc file for a chip run takes several seconds.
+    # we are very strict on the formatting
+
+    # first line tells us the run type. From this we get the stage
+    #
+    my $line = $lines[0];
+    my ($runType, $multi) = split " ", $line;
+    &my_die( "unexpected first line found in $filename: $line\n", $file_id, $PS_EXIT_UNKNOWN_ERROR)
+        if !$runType or ($multi ne 'MULTI');
+    my $stage;
+    my $comp_name;
+    my $current_component;
+    if ($runType eq 'rawExp') {
+        $stage = 'raw';
+        $comp_name = 'class_id';
+    } elsif ($runType eq 'chipRun') {
+        $stage = 'chip';
+        $comp_name = 'class_id';
+    } elsif ($runType eq 'camRun') {
+        $stage = 'camera';
+        $comp_name = 'exposure';
+        $current_component = $comp_name;
+    } elsif ($runType eq 'fakeRun') {
+        $stage = 'fake';
+        $comp_name = 'class_id';
+    } elsif ($runType eq 'warpRun') {
+        $stage = 'warp';
+        $comp_name = 'skycell_id';
+    } elsif ($runType eq 'diffRun') {
+        $stage = 'diff';
+        $comp_name = 'skycell_id';
+    } elsif ($runType eq 'stackRun') {
+        $stage = 'stack';
+        $comp_name = 'skycell_id';
+    } else {
+        &my_die( "unexpected run type line found in $filename: $runType\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+    }
+
+    my $new_workdir_value;
+    if ($destdir eq 'none') {
+        $new_workdir_value = "$workdir";
+    } else {
+        $new_workdir_value = "$workdir/$destdir";
+    }
+    my $component_dir;
+    if ($current_component) {
+        $component_dir = $components->{$current_component};
+    }
+    foreach $line (@lines) {
+        my $out_line = $line;
+
+        my ($name, $type, $value) = split " ", $line;
+        # we only edit complete lines
+        if ($name and $type and $value) {
+            my $new_value;
+            if ($name eq $comp_name) {
+                $current_component = $value;
+                $component_dir = $components->{$current_component};
+                &my_die( "$component_dir is null for $value in $filename: $runType\n", 
+                        $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$component_dir;
+            } elsif ($name eq 'workdir') {
+                $new_value = $new_workdir_value;
+            } elsif ($name eq 'tess_id') {
+                # for tess_id strip off any directories just keep the basename.
+                # The site configuration will need to map this to a proper location
+                # XXX: Document this
+                $new_value = basename($value);
+            } elsif ((($name eq 'uri') or ($name eq 'path_base')) and ($value ne 'NULL')) {
+                &my_die( "$component_dir is null and we need it for $name", 
+                        $file_id, $PS_EXIT_PROG_ERROR) if !$component_dir;
+
+                $new_value = caturi($new_workdir_value, $component_dir, basename($value));
+            }
+
+            if ($new_value) {
+                $out_line = "   " . $name . "\t\t" . $type . "\t" . $new_value . "\n";
+            }
+        }
+
+        print OUT $out_line or &my_die( "failed to write to $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+    }
+
+    close OUT
+        or &my_die( "failed to close $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+
+    # update the fileset to allow processing of other files
+    my $command = "$receivetool -updatefileset -fileset_id $fileset_id";
+    $command .= " -dbinfo $dbinfo_uri";
+    $command .= " -dbname $dbname" if defined $dbname;
+
+    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+        run(command => $command, verbose => $verbose);
+    &my_die( "Unable to update fileset $fileset_id to\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success;
+
+
+} elsif ($file =~ m|.*\.tgz$|) {        # XXX: perhaps get this off of file type ?
     # Get contents of tarball
     my @files = ();
@@ -151,8 +282,19 @@
     }
 
+    my $component_dir = $components->{$component};
+    &my_die( "Unable to find component_dir for $component $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $component_dir;
+
+    my $target_dir;
+    if ($destdir eq 'none') {
+        $target_dir = "$workdir";
+    } else {
+        $target_dir = "$workdir/$destdir";
+    }
+    $target_dir .= "/$component_dir";
+
     # Move files into filesystem of choice
     foreach my $file ( @files ) {
         my $from = "$tempdir/$file"; # Source for file
-        my $target = "$workdir/$file"; # Target destination for file
+        my $target = "$target_dir/$file"; # Target destination for file
         my $to = $ipprc->file_create( $target ); # Target for move
         system("mv $from $to") == 0 or &my_die( "Unable to move $file into workdir $workdir: $!\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
@@ -162,5 +304,7 @@
 }
 
-unlink $filename or &my_die( "Unable to unlink $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+if (!$save_temps) {
+    unlink $filename or &my_die( "Unable to unlink $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+}
 my $mjd_extract = DateTime->now->mjd;   # MJD of finishing extract
 
@@ -186,4 +330,32 @@
 # Pau.
 
+sub read_dirinfo_file
+{
+    my $filename = shift;
+    my $file_id = shift;
+
+    open INFILE, $filename or &my_die( "Can't open $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+
+    my @lines = (<INFILE>);
+
+    my $dirinfo = join "", @lines;
+
+    close INFILE;
+
+    my $metadata = $mdcParser->parse($dirinfo) or
+        &my_die("Unable to parse metadata config doc", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+
+    my $array = parse_md_list($metadata) or
+        &my_die("Unable to parse metadata list", $file_id, $PS_EXIT_UNKNOWN_ERROR);
+
+    my $dest_hash = $array->[0];
+
+    my $destdir = $dest_hash->{destdir};
+    &my_die("destdir not found in $filename", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$destdir;
+
+    my $components = $array->[1];
+
+    return ($destdir, $components, \@lines);
+}
 
 sub my_die
