IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Aug 7, 2009, 4:08:25 PM (17 years ago)
Author:
Paul Price
Message:

Merging trunk (r25026) to get up-to-date on old branch.

Location:
branches/pap
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/pap

  • branches/pap/ippScripts/scripts/receive_file.pl

    r23895 r25027  
    1717use IPC::Cmd 0.36 qw( can_run run );
    1818use PS::IPP::Metadata::Config;
     19use PS::IPP::Metadata::List qw( parse_md_list );
    1920use PS::IPP::Config 1.01 qw( :standard );
    20 use File::Temp qw( tempfile );
     21use File::Temp qw( tempfile tempdir );
     22use File::Basename qw( basename );
    2123use Carp;
    2224
     
    3335}
    3436
    35 my $tempdir = "/tmp";
     37my $temproot = "/tmp";
    3638
    3739# Parse the command-line arguments
    38 my ( $file_id, $source, $product, $fileset, $file, $workdir, $dbname, $verbose, $no_update, $save_temps );
     40my ( $file_id, $source, $product, $fileset, $fileset_id, $file, $component, $bytes, $md5sum, $workdir, $dirinfo_uri, $dbname, $verbose, $no_update, $save_temps );
    3941
    4042GetOptions(
     
    4345           'product=s'         => \$product, # Product for data
    4446           'fileset=s'         => \$fileset, # Fileset for data
     47           'fileset_id=s'      => \$fileset_id, # database id for the fileset
    4548           'file=s'            => \$file, # File to retrieve
     49           'component=s'       => \$component, # component for this file (class_id, skycell_id or dbinfo)
     50           'bytes=i'           => \$bytes, # file size in bytes
     51           'md5sum=s'          => \$md5sum, # md5sum for file from data store
    4652           'workdir=s'         => \$workdir, # Working directory for output
     53           'dirinfo=s'    => \$dirinfo_uri, # file containing the destination directories for this component
    4754           'dbname=s'          => \$dbname,    # Database name
    4855           'verbose'           => \$verbose,   # Print to stdout
     
    5259
    5360pod2usage( -msg => "Unknown option: @ARGV", -exitval => 2 ) if @ARGV;
    54 pod2usage( -msg => "Required options: --file_id --source --product --fileset --file --workdir",
     61pod2usage( -msg => "Required options: --file_id --source --product --fileset --file --component --workdir --bytes --md5sum --dirinfo",
    5562           -exitval => $PS_EXIT_CONFIG_ERROR) unless
    5663    defined $file_id and
     
    5966    defined $fileset and
    6067    defined $file and
     68    defined $component and
     69    defined $bytes and
     70    defined $md5sum and
     71    defined $dirinfo_uri and
    6172    defined $workdir;
     73
     74my $tempdir = tempdir( "$temproot/receive.$file_id.XXXX", CLEANUP => !$save_temps);
     75
     76&my_die( "dirinfo is NULL for $component", $file_id, $PS_EXIT_CONFIG_ERROR )
     77    if (($dirinfo_uri eq "NULL") and ($component ne "dirinfo"));
    6278
    6379my $ipprc = PS::IPP::Config->new() or
    6480    &my_die( "Unable to set up", $file_id, $PS_EXIT_CONFIG_ERROR ); # IPP configuration
     81
     82my $mdcParser = PS::IPP::Metadata::Config->new;
     83
     84
    6585
    6686# Retrieve file
     
    6989    my $uri = "$source/$product/$fileset/$file"; # URI for datastore file
    7090    my $command = "dsget --uri $uri --filename $filename"; # Command to execute
     91    $command .= " --timeout 590";
     92    $command .= " --bytes $bytes --md5 $md5sum";
    7193
    7294    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
    7395        run(command => $command, verbose => $verbose);
    74     die "Unable to retrieve file from $uri\n" unless $success;
     96    &my_die( "Unable to retrieve file from $uri\n", $file_id, $PS_EXIT_DATA_ERROR) unless $success;
    7597}
    7698my $mjd_copy = DateTime->now->mjd;   # MJD of finishing copy
    7799
     100# figure out which dirinfo file to read
     101my $dirinfo_file_to_read = $component eq "dirinfo" ? $filename : $dirinfo_uri;
     102
     103# process it
     104my ($destdir, $components, $dirinfo_lines) = read_dirinfo_file($dirinfo_file_to_read, $file_id);
     105
     106# select a directory for the dirinfo and dbinfo files
     107# XXX: perhaps this directory should be set by the script and passed in
     108# rather than computed here.
     109
     110my ($day, $month, $year) = (localtime)[3,4,5];
     111my $datestr = sprintf "%04d%02d%02d", $year+1900, $month + 1, $day;
     112my $dir_for_info_files = caturi($workdir, $datestr, $fileset);
     113
    78114# Deal with file
    79 if ($file =~ m|^dbinfo\.\S+\.mdc$|) {
    80     # Load into database
    81 
    82     # Need to fix paths to point to new workdir
    83     my ($fixFile, $fixName) = tempfile( "$tempdir/$file.XXXX", UNLINK => !$save_temps ); # Fixed file
    84     open my $inFile, $filename or die "Can't open $filename\n"; # Input file
    85     my $workdir_old;            # Old workdir
    86     while (<$inFile>) {
    87         # XXX This is a global approach to fixing the path: it should fix anything and everything, but won't
    88         # work if there are multiple workdirs in a file and the bits are all mixed up.  To cover that case,
    89         # we should fix each of the elements (workdir, uri, path_base) separately.
    90         if (m|^\s*workdir\s+STR\s+(\S+)|) {
    91             $workdir_old = $1;
    92             $workdir_old =~ s|\@HOST\@|\\S+|;
    93         }
    94         if (defined $workdir_old) {
    95             s|$workdir_old|$workdir|;
    96         }
    97         print $fixFile $_;
    98     }
    99     close($inFile);
    100 
    101     my ($stage) = $file =~ m|^dbinfo\.(\S+)\.\d+\.mdc$|; # Stage of interest
    102     my $tool = can_run("${stage}tool") or die "Can't find tool to load $file\n";
    103 
    104     my $command = "$tool -importrun -infile $fixName"; # Command to execute
     115if ($component eq 'dirinfo') {
     116    # save the dirinfo file contents into the $workdir
     117
     118    $dirinfo_uri = caturi($dir_for_info_files, basename($filename));
     119    print "dirinfo_uri: $dirinfo_uri\n" if $verbose;
     120
     121    my $resolved = $ipprc->file_resolve($dirinfo_uri, 'create');
     122    &my_die( "failed to resolve $dirinfo_uri\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$resolved;
     123
     124    print "dirinfo resolved is: $resolved\n" if $verbose;
     125
     126    open OUT, ">$resolved"
     127        or &my_die( "failed to open $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     128    print OUT @$dirinfo_lines
     129        or &my_die( "failed to write $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     130    close OUT
     131        or &my_die( "failed to close $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     132   
     133    # update the fileset to allow processing of other files
     134    my $command = "$receivetool -updatefileset -fileset_id $fileset_id";
     135    $command .= " -set_state new -dirinfo $dirinfo_uri";
    105136    $command .= " -dbname $dbname" if defined $dbname;
     137
    106138    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
    107139        run(command => $command, verbose => $verbose);
    108     die "Unable to load $fixName\n" unless $success;
    109 } elsif ($file =~ m|.*\.tgz$|) {
     140    &my_die( "Unable to update fileset $fileset_id to\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success;
     141
     142} elsif ($component eq "dbinfo") {
     143
     144    open INFILE, $filename or &my_die( "Can't open $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     145
     146    my @lines = (<INFILE>);
     147
     148    my $dbinfo = join "", @lines;
     149
     150    close INFILE;
     151
     152    my $dbinfo_uri = caturi($dir_for_info_files, basename($filename));
     153    print "dbinfo_uri: $dbinfo_uri\n" if $verbose;
     154
     155    my $resolved = $ipprc->file_resolve($dbinfo_uri, 'create');
     156    &my_die( "failed to resolve $dbinfo_uri\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$resolved;
     157
     158    open OUT, ">$resolved"
     159        or &my_die( "failed to open $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     160
     161    # We process the dbinfo file (the exported run from the distribution) line by line
     162    # Rather than read it as an mdc and interptet it, we do our substitutions directly
     163    # This is much faster. Parsing a mdc file for a chip run takes several seconds.
     164    # we are very strict about the format of the file
     165    #
     166    # First comes the data for the Run
     167    # Next is the data for each component
     168    # The component_id (class_id, skycell_id) must come before any of the paths that we edit
     169
     170    # The first line tells us the run type. From this we get the stage
     171    #
     172    my $line = $lines[0];
     173    my ($runType, $multi) = split " ", $line;
     174    &my_die( "unexpected first line found in $filename: $line\n", $file_id, $PS_EXIT_UNKNOWN_ERROR)
     175        if !$runType or ($multi ne 'MULTI');
     176
     177    my $stage;
     178    my $comp_name;
     179    my $current_component;
     180    if ($runType eq 'rawExp') {
     181        $stage = 'raw';
     182        $comp_name = 'class_id';
     183    } elsif ($runType eq 'chipRun') {
     184        $stage = 'chip';
     185        $comp_name = 'class_id';
     186    } elsif ($runType eq 'camRun') {
     187        $stage = 'camera';
     188        $comp_name = 'exposure';
     189        $current_component = $comp_name;
     190    } elsif ($runType eq 'fakeRun') {
     191        $stage = 'fake';
     192        $comp_name = 'class_id';
     193    } elsif ($runType eq 'warpRun') {
     194        $stage = 'warp';
     195        $comp_name = 'skycell_id';
     196    } elsif ($runType eq 'diffRun') {
     197        $stage = 'diff';
     198        $comp_name = 'skycell_id';
     199    } elsif ($runType eq 'stackRun') {
     200        $stage = 'stack';
     201        $comp_name = 'skycell_id';
     202    } else {
     203        &my_die( "unexpected run type line found in $filename: $runType\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     204    }
     205
     206    my $new_workdir_value;
     207    if ($destdir eq 'none') {
     208        # this only appiles to rawExp
     209        $new_workdir_value = "$workdir";
     210    } else {
     211        $new_workdir_value = "$workdir/$destdir";
     212    }
     213    my $component_dir;
     214    if ($current_component) {
     215        $component_dir = $components->{$current_component};
     216    }
     217    foreach $line (@lines) {
     218        my $out_line = $line;
     219
     220        my ($name, $type, $value) = split " ", $line;
     221        # only complete lines have things that we need to examine
     222        if ($name and $type and $value) {
     223            my $new_value;
     224            # we have a new component id, save it and look up the corresponding
     225            # component_dir
     226            if ($name eq $comp_name) {
     227                $current_component = $value;
     228                $component_dir = $components->{$current_component};
     229                &my_die( "$component_dir is null for $value in $filename: $runType\n",
     230                        $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$component_dir;
     231            } elsif ($name eq 'workdir') {
     232                $new_value = $new_workdir_value;
     233            } elsif ($name eq 'tess_id') {
     234                # for tess_id strip off any directories just keep the basename.
     235                # The site configuration will need to map this to a proper location
     236                # XXX: Document this
     237                $new_value = basename($value);
     238            } elsif ((($name eq 'uri') or ($name eq 'path_base')) and ($value ne 'NULL')) {
     239                &my_die( "$component_dir is null and we need it for $name",
     240                        $file_id, $PS_EXIT_PROG_ERROR) if !$component_dir;
     241
     242                $new_value = caturi($new_workdir_value, $component_dir, basename($value));
     243            }
     244
     245            # if the value changed re-write the line, otherwise just print what we read
     246            if ($new_value) {
     247                $out_line = "   " . $name . "\t\t" . $type . "\t" . $new_value . "\n";
     248            }
     249        }
     250
     251        print OUT $out_line or &my_die( "failed to write to $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     252    }
     253
     254    close OUT
     255        or &my_die( "failed to close $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     256
     257    # update the fileset to allow processing of other files
     258    my $command = "$receivetool -updatefileset -fileset_id $fileset_id";
     259    $command .= " -dbinfo $dbinfo_uri";
     260    $command .= " -dbname $dbname" if defined $dbname;
     261
     262    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
     263        run(command => $command, verbose => $verbose);
     264    &my_die( "Unable to update fileset $fileset_id to\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success;
     265
     266
     267} elsif ($file =~ m|.*\.tgz$|) {        # XXX: perhaps get this off of file type ?
    110268    # Get contents of tarball
    111269    my @files = ();
     
    114272        my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
    115273            run(command => $command, verbose => $verbose);
    116         die "Unable to get listing of tar file $filename\n" unless $success;
     274        &my_die( "Unable to get listing of tar file $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success;
    117275
    118276        my @lines = split(/\n/, join "", @$stdout_buf); # Lines from output
     
    132290        my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
    133291            run(command => $command, verbose => $verbose);
    134         die "Unable to extract tar file $filename\n" unless $success;
    135     }
     292        &my_die( "Unable to extract tar file $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success;
     293    }
     294
     295    my $component_dir = $components->{$component};
     296    &my_die( "Unable to find component_dir for $component $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $component_dir;
     297
     298    my $target_dir;
     299    if ($destdir eq 'none') {
     300        $target_dir = "$workdir";
     301    } else {
     302        $target_dir = "$workdir/$destdir";
     303    }
     304    $target_dir .= "/$component_dir";
    136305
    137306    # Move files into filesystem of choice
    138307    foreach my $file ( @files ) {
    139308        my $from = "$tempdir/$file"; # Source for file
    140         my $target = "$workdir/$file"; # Target destination for file
     309        my $target = "$target_dir/$file"; # Target destination for file
     310
     311
     312        $ipprc->file_delete ($target);
     313
    141314        my $to = $ipprc->file_create( $target ); # Target for move
    142         system("mv $from $to") == 0 or die "Unable to move $file into workdir $workdir: $!\n";
     315
     316        if (!$to) {
     317            &my_die( "failed to create: $target\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     318        }
     319
     320        if ( $file =~ /.+\.mdc/ ) {
     321            # this file is a config dump file edit the paths
     322            edit_mdc_file($file_id, $from, $to, $workdir);
     323        } else {
     324            system("mv $from $to") == 0 or &my_die( "Unable to move $file into workdir $workdir: $!\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     325        }
    143326    }
    144327} else {
    145     die "Unrecognised file: $file\n";
    146 }
    147 
    148 unlink $filename or die "Unable to unlink $filename\n";
     328    &my_die( "Unrecognised file: $file\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     329}
     330
    149331my $mjd_extract = DateTime->now->mjd;   # MJD of finishing extract
    150 
    151332
    152333# All done
     
    170351# Pau.
    171352
     353sub read_dirinfo_file
     354{
     355    my $filename = shift;
     356    my $file_id = shift;
     357
     358    my $resolved = $ipprc->file_resolve($filename);
     359    &my_die("failed to resolve dirinfo file: $filename ", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$resolved;
     360
     361    open INFILE, $resolved or &my_die( "Can't open $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     362
     363    my @lines = (<INFILE>);
     364
     365    my $dirinfo = join "", @lines;
     366
     367    close INFILE;
     368
     369    my $metadata = $mdcParser->parse($dirinfo) or
     370        &my_die("Unable to parse metadata config doc", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     371
     372    my $array = parse_md_list($metadata) or
     373        &my_die("Unable to parse metadata list", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     374
     375    my $dest_hash = $array->[0];
     376
     377    my $destdir = $dest_hash->{destdir};
     378    &my_die("destdir not found in $filename", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$destdir;
     379
     380    my $components = $array->[1];
     381
     382    return ($destdir, $components, \@lines);
     383}
     384
     385# edit a config dump file replacing the "volume" value with the new local value: $workdir
     386sub edit_mdc_file
     387{
     388    my $file_id = shift;
     389    my $src = shift;
     390    my $dest = shift;
     391    my $workdir = shift;
     392
     393    open my $IN,  "<$src" or &my_die("failed to open $src for input", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     394    open my $OUT, ">$dest" or &my_die("failed to open $dest for output", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     395
     396    # Assumed file structure
     397    # stuff
     398    # FILES.INPUT metadata
     399    # FILES.OUTPUT metadata
     400    # more stuff
     401    # only the paths in the FILES.* metadata are monkeyed with
     402    my $done_editing = 0;
     403    my $numFilesMD = 0;
     404    foreach my $line (<$IN>) {
     405        my $out_line = $line;
     406        if (!$done_editing) {
     407            my (@words) = split " ", $line;
     408            if (scalar @words) {
     409                # get rid of any leading blank words
     410                while ((scalar @words) and !defined $words[0]) {
     411                    shift @words;
     412                }
     413
     414                if ($words[1] and $words[1] eq "METADATA") {
     415                    if ( $words[0] =~ /^FILES\..+/ ) {
     416                        $numFilesMD++;
     417                    }
     418                } elsif ($words[0] eq "END") {
     419                    # when we get to the end of the second FILES metadata we're done editing
     420                    if ($numFilesMD == 2) {
     421                        $done_editing = 1;
     422                    }
     423                } elsif ($numFilesMD and ($words[1] eq "STR"))  {
     424                    # we're processing one of the files metadata edit the path
     425                    my $key = shift @words;
     426                    my $type = shift @words;
     427                    my $path = shift @words;
     428                    my $extra = join " ", @words;
     429
     430                    $path = edit_path($file_id, $workdir, $path);
     431
     432                    $out_line = "\t" . $key ."\t" . "STR" . "\t" . $path;
     433                    $out_line .= "\t" . $extra if $extra;
     434                    $out_line .= "\n";
     435                }
     436            }
     437        }
     438        print $OUT $out_line;
     439    }
     440
     441    close $IN;
     442    close $OUT or &my_die("failed to close $dest", $file_id, $PS_EXIT_UNKNOWN_ERROR);
     443}
     444
     445
     446# XXX: this should go into a module
     447# Replace 'volume portion of path with $workdir/
     448# Volume is defined here by
     449#   neb://volume/
     450#   /xxx/xxxxx/         i.e. /data/ippxxx.y/
     451#   file://xxx/xxxxx/   i.e. file://data/ippxxx.y/
     452#   path://somepath/
     453sub edit_path
     454{
     455    my $file_id = shift;
     456    my $workdir = shift;
     457    my $path = shift;
     458
     459    my $scheme = file_scheme($path);
     460    my $tail;
     461    if ($scheme) {
     462            # strip off scheme://
     463        $tail = substr($path, length($scheme) + 3);
     464    } elsif (substr($path, 0, 1) eq '/') {
     465        $tail = substr($path, 1);
     466        $scheme = "";
     467    }
     468    # remove any leading / that are left
     469    while ((substr($tail, 0, 1) eq '/')) {
     470        $tail = substr($tail, 1);
     471    }
     472
     473    my @segments;
     474    if (($scheme eq 'neb') or ($scheme eq 'path')) {
     475        my $volume;
     476        ($volume, @segments) = split '/', $tail;
     477
     478    } elsif (!$scheme or ($scheme eq 'file')) {
     479
     480        # XXX Here we're assuming the /data/ipp??? structure. This won't be true when data is forwarded
     481        # by remote sites. We need a way to configure this
     482        my $volume;
     483
     484        # data/ippxxx/dirs
     485        (undef, $volume, @segments) = split '/', $tail;
     486    } else {
     487        &my_die( "unexpected workdir value: $path\n", $file_id, $PS_EXIT_PROG_ERROR);
     488    }
     489
     490    my $new_path = caturi($workdir, @segments);
     491
     492    return $new_path;
     493}
    172494
    173495sub my_die
Note: See TracChangeset for help on using the changeset viewer.