Changeset 25027 for branches/pap/ippScripts/scripts/receive_file.pl
- Timestamp:
- Aug 7, 2009, 4:08:25 PM (17 years ago)
- Location:
- branches/pap
- Files:
-
- 2 edited
-
. (modified) (1 prop)
-
ippScripts/scripts/receive_file.pl (modified) (9 diffs)
Legend:
- Unmodified
- Added
- Removed
-
branches/pap
- Property svn:mergeinfo changed
-
branches/pap/ippScripts/scripts/receive_file.pl
r23895 r25027 17 17 use IPC::Cmd 0.36 qw( can_run run ); 18 18 use PS::IPP::Metadata::Config; 19 use PS::IPP::Metadata::List qw( parse_md_list ); 19 20 use PS::IPP::Config 1.01 qw( :standard ); 20 use File::Temp qw( tempfile ); 21 use File::Temp qw( tempfile tempdir ); 22 use File::Basename qw( basename ); 21 23 use Carp; 22 24 … … 33 35 } 34 36 35 my $temp dir= "/tmp";37 my $temproot = "/tmp"; 36 38 37 39 # Parse the command-line arguments 38 my ( $file_id, $source, $product, $fileset, $file , $workdir, $dbname, $verbose, $no_update, $save_temps );40 my ( $file_id, $source, $product, $fileset, $fileset_id, $file, $component, $bytes, $md5sum, $workdir, $dirinfo_uri, $dbname, $verbose, $no_update, $save_temps ); 39 41 40 42 GetOptions( … … 43 45 'product=s' => \$product, # Product for data 44 46 'fileset=s' => \$fileset, # Fileset for data 47 'fileset_id=s' => \$fileset_id, # database id for the fileset 45 48 'file=s' => \$file, # File to retrieve 49 'component=s' => \$component, # component for this file (class_id, skycell_id or dbinfo) 50 'bytes=i' => \$bytes, # file size in bytes 51 'md5sum=s' => \$md5sum, # md5sum for file from data store 46 52 'workdir=s' => \$workdir, # Working directory for output 53 'dirinfo=s' => \$dirinfo_uri, # file containing the destination directories for this component 47 54 'dbname=s' => \$dbname, # Database name 48 55 'verbose' => \$verbose, # Print to stdout … … 52 59 53 60 pod2usage( -msg => "Unknown option: @ARGV", -exitval => 2 ) if @ARGV; 54 pod2usage( -msg => "Required options: --file_id --source --product --fileset --file -- workdir",61 pod2usage( -msg => "Required options: --file_id --source --product --fileset --file --component --workdir --bytes --md5sum --dirinfo", 55 62 -exitval => $PS_EXIT_CONFIG_ERROR) unless 56 63 defined $file_id and … … 59 66 defined $fileset and 60 67 defined $file and 68 defined $component and 69 defined $bytes and 70 defined $md5sum and 71 defined $dirinfo_uri and 61 72 defined $workdir; 73 74 my $tempdir = tempdir( "$temproot/receive.$file_id.XXXX", CLEANUP => !$save_temps); 75 76 &my_die( "dirinfo is NULL for $component", $file_id, $PS_EXIT_CONFIG_ERROR ) 77 if (($dirinfo_uri eq "NULL") and ($component ne "dirinfo")); 62 78 63 79 my $ipprc = PS::IPP::Config->new() or 64 80 &my_die( "Unable to set up", $file_id, $PS_EXIT_CONFIG_ERROR ); # IPP configuration 81 82 my $mdcParser = PS::IPP::Metadata::Config->new; 83 84 65 85 66 86 # Retrieve file … … 69 89 my $uri = "$source/$product/$fileset/$file"; # URI for datastore file 70 90 my $command = "dsget --uri $uri --filename $filename"; # Command to execute 91 $command .= " --timeout 590"; 92 $command .= " --bytes $bytes --md5 $md5sum"; 71 93 72 94 my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 73 95 run(command => $command, verbose => $verbose); 74 die "Unable to retrieve file from $uri\n"unless $success;96 &my_die( "Unable to retrieve file from $uri\n", $file_id, $PS_EXIT_DATA_ERROR) unless $success; 75 97 } 76 98 my $mjd_copy = DateTime->now->mjd; # MJD of finishing copy 77 99 100 # figure out which dirinfo file to read 101 my $dirinfo_file_to_read = $component eq "dirinfo" ? $filename : $dirinfo_uri; 102 103 # process it 104 my ($destdir, $components, $dirinfo_lines) = read_dirinfo_file($dirinfo_file_to_read, $file_id); 105 106 # select a directory for the dirinfo and dbinfo files 107 # XXX: perhaps this directory should be set by the script and passed in 108 # rather than computed here. 109 110 my ($day, $month, $year) = (localtime)[3,4,5]; 111 my $datestr = sprintf "%04d%02d%02d", $year+1900, $month + 1, $day; 112 my $dir_for_info_files = caturi($workdir, $datestr, $fileset); 113 78 114 # Deal with file 79 if ($file =~ m|^dbinfo\.\S+\.mdc$|) { 80 # Load into database 81 82 # Need to fix paths to point to new workdir 83 my ($fixFile, $fixName) = tempfile( "$tempdir/$file.XXXX", UNLINK => !$save_temps ); # Fixed file 84 open my $inFile, $filename or die "Can't open $filename\n"; # Input file 85 my $workdir_old; # Old workdir 86 while (<$inFile>) { 87 # XXX This is a global approach to fixing the path: it should fix anything and everything, but won't 88 # work if there are multiple workdirs in a file and the bits are all mixed up. To cover that case, 89 # we should fix each of the elements (workdir, uri, path_base) separately. 90 if (m|^\s*workdir\s+STR\s+(\S+)|) { 91 $workdir_old = $1; 92 $workdir_old =~ s|\@HOST\@|\\S+|; 93 } 94 if (defined $workdir_old) { 95 s|$workdir_old|$workdir|; 96 } 97 print $fixFile $_; 98 } 99 close($inFile); 100 101 my ($stage) = $file =~ m|^dbinfo\.(\S+)\.\d+\.mdc$|; # Stage of interest 102 my $tool = can_run("${stage}tool") or die "Can't find tool to load $file\n"; 103 104 my $command = "$tool -importrun -infile $fixName"; # Command to execute 115 if ($component eq 'dirinfo') { 116 # save the dirinfo file contents into the $workdir 117 118 $dirinfo_uri = caturi($dir_for_info_files, basename($filename)); 119 print "dirinfo_uri: $dirinfo_uri\n" if $verbose; 120 121 my $resolved = $ipprc->file_resolve($dirinfo_uri, 'create'); 122 &my_die( "failed to resolve $dirinfo_uri\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$resolved; 123 124 print "dirinfo resolved is: $resolved\n" if $verbose; 125 126 open OUT, ">$resolved" 127 or &my_die( "failed to open $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 128 print OUT @$dirinfo_lines 129 or &my_die( "failed to write $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 130 close OUT 131 or &my_die( "failed to close $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 132 133 # update the fileset to allow processing of other files 134 my $command = "$receivetool -updatefileset -fileset_id $fileset_id"; 135 $command .= " -set_state new -dirinfo $dirinfo_uri"; 105 136 $command .= " -dbname $dbname" if defined $dbname; 137 106 138 my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 107 139 run(command => $command, verbose => $verbose); 108 die "Unable to load $fixName\n" unless $success; 109 } elsif ($file =~ m|.*\.tgz$|) { 140 &my_die( "Unable to update fileset $fileset_id to\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success; 141 142 } elsif ($component eq "dbinfo") { 143 144 open INFILE, $filename or &my_die( "Can't open $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 145 146 my @lines = (<INFILE>); 147 148 my $dbinfo = join "", @lines; 149 150 close INFILE; 151 152 my $dbinfo_uri = caturi($dir_for_info_files, basename($filename)); 153 print "dbinfo_uri: $dbinfo_uri\n" if $verbose; 154 155 my $resolved = $ipprc->file_resolve($dbinfo_uri, 'create'); 156 &my_die( "failed to resolve $dbinfo_uri\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$resolved; 157 158 open OUT, ">$resolved" 159 or &my_die( "failed to open $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 160 161 # We process the dbinfo file (the exported run from the distribution) line by line 162 # Rather than read it as an mdc and interptet it, we do our substitutions directly 163 # This is much faster. Parsing a mdc file for a chip run takes several seconds. 164 # we are very strict about the format of the file 165 # 166 # First comes the data for the Run 167 # Next is the data for each component 168 # The component_id (class_id, skycell_id) must come before any of the paths that we edit 169 170 # The first line tells us the run type. From this we get the stage 171 # 172 my $line = $lines[0]; 173 my ($runType, $multi) = split " ", $line; 174 &my_die( "unexpected first line found in $filename: $line\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) 175 if !$runType or ($multi ne 'MULTI'); 176 177 my $stage; 178 my $comp_name; 179 my $current_component; 180 if ($runType eq 'rawExp') { 181 $stage = 'raw'; 182 $comp_name = 'class_id'; 183 } elsif ($runType eq 'chipRun') { 184 $stage = 'chip'; 185 $comp_name = 'class_id'; 186 } elsif ($runType eq 'camRun') { 187 $stage = 'camera'; 188 $comp_name = 'exposure'; 189 $current_component = $comp_name; 190 } elsif ($runType eq 'fakeRun') { 191 $stage = 'fake'; 192 $comp_name = 'class_id'; 193 } elsif ($runType eq 'warpRun') { 194 $stage = 'warp'; 195 $comp_name = 'skycell_id'; 196 } elsif ($runType eq 'diffRun') { 197 $stage = 'diff'; 198 $comp_name = 'skycell_id'; 199 } elsif ($runType eq 'stackRun') { 200 $stage = 'stack'; 201 $comp_name = 'skycell_id'; 202 } else { 203 &my_die( "unexpected run type line found in $filename: $runType\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 204 } 205 206 my $new_workdir_value; 207 if ($destdir eq 'none') { 208 # this only appiles to rawExp 209 $new_workdir_value = "$workdir"; 210 } else { 211 $new_workdir_value = "$workdir/$destdir"; 212 } 213 my $component_dir; 214 if ($current_component) { 215 $component_dir = $components->{$current_component}; 216 } 217 foreach $line (@lines) { 218 my $out_line = $line; 219 220 my ($name, $type, $value) = split " ", $line; 221 # only complete lines have things that we need to examine 222 if ($name and $type and $value) { 223 my $new_value; 224 # we have a new component id, save it and look up the corresponding 225 # component_dir 226 if ($name eq $comp_name) { 227 $current_component = $value; 228 $component_dir = $components->{$current_component}; 229 &my_die( "$component_dir is null for $value in $filename: $runType\n", 230 $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$component_dir; 231 } elsif ($name eq 'workdir') { 232 $new_value = $new_workdir_value; 233 } elsif ($name eq 'tess_id') { 234 # for tess_id strip off any directories just keep the basename. 235 # The site configuration will need to map this to a proper location 236 # XXX: Document this 237 $new_value = basename($value); 238 } elsif ((($name eq 'uri') or ($name eq 'path_base')) and ($value ne 'NULL')) { 239 &my_die( "$component_dir is null and we need it for $name", 240 $file_id, $PS_EXIT_PROG_ERROR) if !$component_dir; 241 242 $new_value = caturi($new_workdir_value, $component_dir, basename($value)); 243 } 244 245 # if the value changed re-write the line, otherwise just print what we read 246 if ($new_value) { 247 $out_line = " " . $name . "\t\t" . $type . "\t" . $new_value . "\n"; 248 } 249 } 250 251 print OUT $out_line or &my_die( "failed to write to $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 252 } 253 254 close OUT 255 or &my_die( "failed to close $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 256 257 # update the fileset to allow processing of other files 258 my $command = "$receivetool -updatefileset -fileset_id $fileset_id"; 259 $command .= " -dbinfo $dbinfo_uri"; 260 $command .= " -dbname $dbname" if defined $dbname; 261 262 my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 263 run(command => $command, verbose => $verbose); 264 &my_die( "Unable to update fileset $fileset_id to\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success; 265 266 267 } elsif ($file =~ m|.*\.tgz$|) { # XXX: perhaps get this off of file type ? 110 268 # Get contents of tarball 111 269 my @files = (); … … 114 272 my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 115 273 run(command => $command, verbose => $verbose); 116 die "Unable to get listing of tar file $filename\n"unless $success;274 &my_die( "Unable to get listing of tar file $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success; 117 275 118 276 my @lines = split(/\n/, join "", @$stdout_buf); # Lines from output … … 132 290 my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 133 291 run(command => $command, verbose => $verbose); 134 die "Unable to extract tar file $filename\n" unless $success; 135 } 292 &my_die( "Unable to extract tar file $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $success; 293 } 294 295 my $component_dir = $components->{$component}; 296 &my_die( "Unable to find component_dir for $component $filename\n", $file_id, $PS_EXIT_UNKNOWN_ERROR) unless $component_dir; 297 298 my $target_dir; 299 if ($destdir eq 'none') { 300 $target_dir = "$workdir"; 301 } else { 302 $target_dir = "$workdir/$destdir"; 303 } 304 $target_dir .= "/$component_dir"; 136 305 137 306 # Move files into filesystem of choice 138 307 foreach my $file ( @files ) { 139 308 my $from = "$tempdir/$file"; # Source for file 140 my $target = "$workdir/$file"; # Target destination for file 309 my $target = "$target_dir/$file"; # Target destination for file 310 311 312 $ipprc->file_delete ($target); 313 141 314 my $to = $ipprc->file_create( $target ); # Target for move 142 system("mv $from $to") == 0 or die "Unable to move $file into workdir $workdir: $!\n"; 315 316 if (!$to) { 317 &my_die( "failed to create: $target\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 318 } 319 320 if ( $file =~ /.+\.mdc/ ) { 321 # this file is a config dump file edit the paths 322 edit_mdc_file($file_id, $from, $to, $workdir); 323 } else { 324 system("mv $from $to") == 0 or &my_die( "Unable to move $file into workdir $workdir: $!\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 325 } 143 326 } 144 327 } else { 145 die "Unrecognised file: $file\n"; 146 } 147 148 unlink $filename or die "Unable to unlink $filename\n"; 328 &my_die( "Unrecognised file: $file\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 329 } 330 149 331 my $mjd_extract = DateTime->now->mjd; # MJD of finishing extract 150 151 332 152 333 # All done … … 170 351 # Pau. 171 352 353 sub read_dirinfo_file 354 { 355 my $filename = shift; 356 my $file_id = shift; 357 358 my $resolved = $ipprc->file_resolve($filename); 359 &my_die("failed to resolve dirinfo file: $filename ", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$resolved; 360 361 open INFILE, $resolved or &my_die( "Can't open $resolved\n", $file_id, $PS_EXIT_UNKNOWN_ERROR); 362 363 my @lines = (<INFILE>); 364 365 my $dirinfo = join "", @lines; 366 367 close INFILE; 368 369 my $metadata = $mdcParser->parse($dirinfo) or 370 &my_die("Unable to parse metadata config doc", $file_id, $PS_EXIT_UNKNOWN_ERROR); 371 372 my $array = parse_md_list($metadata) or 373 &my_die("Unable to parse metadata list", $file_id, $PS_EXIT_UNKNOWN_ERROR); 374 375 my $dest_hash = $array->[0]; 376 377 my $destdir = $dest_hash->{destdir}; 378 &my_die("destdir not found in $filename", $file_id, $PS_EXIT_UNKNOWN_ERROR) if !$destdir; 379 380 my $components = $array->[1]; 381 382 return ($destdir, $components, \@lines); 383 } 384 385 # edit a config dump file replacing the "volume" value with the new local value: $workdir 386 sub edit_mdc_file 387 { 388 my $file_id = shift; 389 my $src = shift; 390 my $dest = shift; 391 my $workdir = shift; 392 393 open my $IN, "<$src" or &my_die("failed to open $src for input", $file_id, $PS_EXIT_UNKNOWN_ERROR); 394 open my $OUT, ">$dest" or &my_die("failed to open $dest for output", $file_id, $PS_EXIT_UNKNOWN_ERROR); 395 396 # Assumed file structure 397 # stuff 398 # FILES.INPUT metadata 399 # FILES.OUTPUT metadata 400 # more stuff 401 # only the paths in the FILES.* metadata are monkeyed with 402 my $done_editing = 0; 403 my $numFilesMD = 0; 404 foreach my $line (<$IN>) { 405 my $out_line = $line; 406 if (!$done_editing) { 407 my (@words) = split " ", $line; 408 if (scalar @words) { 409 # get rid of any leading blank words 410 while ((scalar @words) and !defined $words[0]) { 411 shift @words; 412 } 413 414 if ($words[1] and $words[1] eq "METADATA") { 415 if ( $words[0] =~ /^FILES\..+/ ) { 416 $numFilesMD++; 417 } 418 } elsif ($words[0] eq "END") { 419 # when we get to the end of the second FILES metadata we're done editing 420 if ($numFilesMD == 2) { 421 $done_editing = 1; 422 } 423 } elsif ($numFilesMD and ($words[1] eq "STR")) { 424 # we're processing one of the files metadata edit the path 425 my $key = shift @words; 426 my $type = shift @words; 427 my $path = shift @words; 428 my $extra = join " ", @words; 429 430 $path = edit_path($file_id, $workdir, $path); 431 432 $out_line = "\t" . $key ."\t" . "STR" . "\t" . $path; 433 $out_line .= "\t" . $extra if $extra; 434 $out_line .= "\n"; 435 } 436 } 437 } 438 print $OUT $out_line; 439 } 440 441 close $IN; 442 close $OUT or &my_die("failed to close $dest", $file_id, $PS_EXIT_UNKNOWN_ERROR); 443 } 444 445 446 # XXX: this should go into a module 447 # Replace 'volume portion of path with $workdir/ 448 # Volume is defined here by 449 # neb://volume/ 450 # /xxx/xxxxx/ i.e. /data/ippxxx.y/ 451 # file://xxx/xxxxx/ i.e. file://data/ippxxx.y/ 452 # path://somepath/ 453 sub edit_path 454 { 455 my $file_id = shift; 456 my $workdir = shift; 457 my $path = shift; 458 459 my $scheme = file_scheme($path); 460 my $tail; 461 if ($scheme) { 462 # strip off scheme:// 463 $tail = substr($path, length($scheme) + 3); 464 } elsif (substr($path, 0, 1) eq '/') { 465 $tail = substr($path, 1); 466 $scheme = ""; 467 } 468 # remove any leading / that are left 469 while ((substr($tail, 0, 1) eq '/')) { 470 $tail = substr($tail, 1); 471 } 472 473 my @segments; 474 if (($scheme eq 'neb') or ($scheme eq 'path')) { 475 my $volume; 476 ($volume, @segments) = split '/', $tail; 477 478 } elsif (!$scheme or ($scheme eq 'file')) { 479 480 # XXX Here we're assuming the /data/ipp??? structure. This won't be true when data is forwarded 481 # by remote sites. We need a way to configure this 482 my $volume; 483 484 # data/ippxxx/dirs 485 (undef, $volume, @segments) = split '/', $tail; 486 } else { 487 &my_die( "unexpected workdir value: $path\n", $file_id, $PS_EXIT_PROG_ERROR); 488 } 489 490 my $new_path = caturi($workdir, @segments); 491 492 return $new_path; 493 } 172 494 173 495 sub my_die
Note:
See TracChangeset
for help on using the changeset viewer.
