IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 4763


Ignore:
Timestamp:
Aug 14, 2005, 10:33:53 AM (21 years ago)
Author:
eugene
Message:

cleanup up communications for speed

Location:
trunk/Ohana/src/opihi
Files:
1 added
26 edited

Legend:

Unmodified
Added
Removed
  • trunk/Ohana/src/opihi/include/pcontrol.h

    r4762 r4763  
    168168int VerboseMode ();
    169169int StartJob (Job *job);
     170void gotsignal (int signum);
     171int HarvestHost (int pid);
  • trunk/Ohana/src/opihi/include/psched.h

    r4762 r4763  
    167167CommandF *FindControllerCommand (char *cmd);
    168168int QuitController ();
     169int StopController ();
    169170int VerboseMode ();
    170171int KillLocalJob (Job *job);
     
    175176int CheckControllerStatus ();
    176177int TestElapsedCheck ();
     178void gotsignal (int signum);
    177179
  • trunk/Ohana/src/opihi/include/shell.h

    r4751 r4763  
    5858int           is_list                   PROTO((char *line));
    5959int           is_loop                   PROTO((char *line));
     60int           is_task                   PROTO((char *line));
     61int           is_task_exit              PROTO((char *line));
     62int           is_task_exec              PROTO((char *line));
    6063int           is_macro_create           PROTO((char *line));
    6164void          InitLists                 PROTO(());
  • trunk/Ohana/src/opihi/lib.shell/ListOps.c

    r4714 r4763  
    126126}
    127127
     128int is_task (char *line) {
     129
     130  int status;
     131  char *comm;
     132
     133  comm = thisword (line);
     134  if (comm == (char *) NULL) return (FALSE);
     135
     136  status = !strcmp (comm, "task");
     137  free (comm);
     138  return (status);
     139}
     140
     141int is_task_exit (char *line) {
     142
     143  int status;
     144  char *comm;
     145
     146  comm = thisword (line);
     147  if (comm == (char *) NULL) return (FALSE);
     148
     149  status = !strcmp (comm, "task.exit");
     150  free (comm);
     151  return (status);
     152}
     153
     154int is_task_exec (char *line) {
     155
     156  int status;
     157  char *comm;
     158
     159  comm = thisword (line);
     160  if (comm == (char *) NULL) return (FALSE);
     161
     162  status = !strcmp (comm, "task.exec");
     163  free (comm);
     164  return (status);
     165}
    128166
    129167int is_list (char *line) {
     
    135173  status |= is_for_loop (line);
    136174  status |= is_loop (line);
     175  status |= is_task (line);
     176  status |= is_task_exit (line);
     177  status |= is_task_exec (line);
    137178
    138179  return (status);
  • trunk/Ohana/src/opihi/lib.shell/opihi.c

    r4689 r4763  
    1919
    2020    line = readline (prompt);
     21
    2122    if (line == NULL) {
    2223     
  • trunk/Ohana/src/opihi/pantasks/CheckController.c

    r4762 r4763  
    3838  gettimeofday (&stop, (void *) NULL);
    3939  dtime = DTIME (stop, start);
    40   if (VerboseMode()) fprintf (stderr, "check stack %f\n", dtime);
     40  if (VerboseMode()) fprintf (stderr, "check exit stack %f\n", dtime);
     41  /* if (Njobs) fprintf (stderr, "check exit stack %f\n", dtime); */
    4142  gettimeofday (&start, (void *) NULL);
    4243
     
    6162  gettimeofday (&stop, (void *) NULL);
    6263  dtime = DTIME (stop, start);
    63   if (VerboseMode()) fprintf (stderr, "check %d jobs %f\n", i, dtime);
     64  /* if (VerboseMode()) fprintf (stderr, "clear %d exit jobs %f\n", i, dtime); */
     65  gettimeofday (&start, (void *) NULL);
    6466
    6567  if (TestElapsedCheck()) return (TRUE);
     
    8587  }
    8688
     89  gettimeofday (&stop, (void *) NULL);
     90  dtime = DTIME (stop, start);
     91  /* if (VerboseMode()) fprintf (stderr, "check crash stack %f\n", dtime); */
     92  gettimeofday (&start, (void *) NULL);
     93
    8794  p = buffer.buffer;
    8895  for (i = 0; (i < Njobs) && !TestElapsedCheck(); i++) {
     
    104111  }
    105112  FreeIOBuffer (&buffer);
     113
     114  gettimeofday (&stop, (void *) NULL);
     115  dtime = DTIME (stop, start);
     116  /* if (VerboseMode()) fprintf (stderr, "clear %d crash jobs %f\n", i, dtime); */
    106117  return (TRUE);
    107118}
  • trunk/Ohana/src/opihi/pantasks/CheckJobs.c

    r4762 r4763  
    1717    switch (status) {
    1818      case JOB_PENDING:
    19         if (VerboseMode()) fprintf (stderr, "job %s (%d) pending\n", job[0].task[0].name, job[0].JobID);
     19        /* if (VerboseMode()) fprintf (stderr, "job %s (%d) pending\n", job[0].task[0].name, job[0].JobID); */
    2020        break;
    2121
    2222      case JOB_BUSY:
    23         if (VerboseMode()) fprintf (stderr, "job %s (%d) busy\n", job[0].task[0].name, job[0].JobID);
     23        /* if (VerboseMode()) fprintf (stderr, "job %s (%d) busy\n", job[0].task[0].name, job[0].JobID); */
    2424        break;
    2525
  • trunk/Ohana/src/opihi/pantasks/CheckSystem.c

    r4762 r4763  
    99  gettimeofday (&start, (void *) NULL);
    1010
    11   if (Ncheck < 5) {
     11  if (Ncheck < 20) {
    1212    CheckTasks ();
    1313    CheckJobs ();
  • trunk/Ohana/src/opihi/pantasks/ControllerOps.c

    r4762 r4763  
    2525  gettimeofday (&stop, (void *) NULL);
    2626  dtime = DTIME (stop, start);
    27   if (VerboseMode()) fprintf (stderr, "check job status %f\n", dtime);
     27  /* if (VerboseMode()) fprintf (stderr, "check job status %f\n", dtime); */
    2828
    2929  if ((job[0].state == JOB_EXIT) || (job[0].state == JOB_CRASH)) {
     
    3232    gettimeofday (&stop, (void *) NULL);
    3333    dtime = DTIME (stop, start);
    34     if (VerboseMode()) fprintf (stderr, "get stdout %f\n", dtime);
    35 
     34    /* if (VerboseMode()) fprintf (stderr, "get stdout %f\n", dtime); */
     35
     36    gettimeofday (&start, (void *) NULL);
    3637    GetJobOutput ("stderr", job[0].pid, &job[0].stderr, job[0].stderr_size);
     38    gettimeofday (&stop, (void *) NULL);
     39    dtime = DTIME (stop, start);
     40    /* if (VerboseMode()) fprintf (stderr, "get stderr %f\n", dtime); */
     41
     42    gettimeofday (&start, (void *) NULL);
    3743    DeleteControllerJob (job);
     44    gettimeofday (&stop, (void *) NULL);
     45    dtime = DTIME (stop, start);
     46    /* if (VerboseMode()) fprintf (stderr, "delete job %f\n", dtime); */
    3847  } 
    3948  return (TRUE);
     
    138147  if (status == -1) return (CONTROLLER_HUNG);
    139148
    140   if (VerboseMode()) fprintf (stderr, "message received (GetJobOutput : %s)\n", cmd); 
     149  /* if (VerboseMode()) fprintf (stderr, "message received (GetJobOutput : %s)\n", cmd);   */
    141150  /* drop extra bytes from pcontrol (not pclient:job) */
    142151  buffer[0].Nbuffer = Nstart + Nbytes;
     
    328337  FlushIOBuffer (buffer);
    329338
    330   if (VerboseMode()) fprintf (stderr, "send: %s\n", cmd);
    331 
    332339  /* send command, is pipe still open? */
    333340  status = write_fmt (stdin_cntl, "%s\n", cmd);
    334341  if ((status == -1) && (errno == EPIPE)) {
    335     ControllerStatus = FALSE;
     342    StopController ();
    336343    if (VerboseMode()) fprintf (stderr, "controller is down\n");
    337344    return (FALSE);
     
    347354  }
    348355  if (status ==  0) {
    349     ControllerStatus = FALSE;
     356    StopController ();
    350357    if (VerboseMode()) fprintf (stderr, "controller is down\n");
    351358    return (FALSE);
    352359  }
    353360  if (status == -1) {
     361    StopController ();
    354362    if (VerboseMode()) fprintf (stderr, "controller is not responding\n");
    355363    return (FALSE);
     
    362370    bzero (buffer[0].buffer + buffer[0].Nbuffer, buffer[0].Nalloc - buffer[0].Nbuffer);
    363371  }
    364   if (VerboseMode()) fprintf (stderr, "message received, %d cycles\n", i);
     372  /* if (VerboseMode()) fprintf (stderr, "message received, %d cycles\n", i); */
    365373  return (TRUE);
    366374}
     
    428436int QuitController () {
    429437
    430   int i, status, waitstatus, result;
     438  int status;
    431439  char cmd[128];
    432440  IOBuffer buffer;
     
    436444  sprintf (cmd, "quit");
    437445  InitIOBuffer (&buffer, 0x100);
    438   status = ControllerCommand (cmd, CONTROLLER_PROMPT, &buffer);
    439   FreeIOBuffer (&buffer);
     446  status = ControllerCommand (cmd, "", &buffer);
     447  FreeIOBuffer (&buffer);
     448
     449  /* the quit command does not return a prompt, so we always
     450     get an error on the controller here */
     451  StopController ();
     452  return (TRUE);
     453}
     454
     455int StopController () {
     456
     457  int i, waitstatus, result;
     458
     459  if (!ControllerStatus) return (TRUE);
    440460
    441461  ControllerStatus = FALSE;
     
    451471  FreeIOBuffer (&stdout_buffer);
    452472  FreeIOBuffer (&stderr_buffer);
    453 
    454   return (TRUE);
    455 }
     473  return (TRUE);
     474}
  • trunk/Ohana/src/opihi/pantasks/Makefile

    r4748 r4763  
    5151$(SDIR)/controller_status.$(ARCH).o \
    5252$(SDIR)/controller_output.$(ARCH).o \
     53$(SDIR)/controller_pulse.$(ARCH).o \
    5354$(SDIR)/task.$(ARCH).o \
    5455$(SDIR)/task_host.$(ARCH).o \
  • trunk/Ohana/src/opihi/pantasks/controller.c

    r4693 r4763  
    66int controller_check   PROTO((int, char **));
    77int controller_output  PROTO((int, char **));
     8int controller_pulse   PROTO((int, char **));
    89
    910static Command controller_cmds[] = {
     
    1314  {"status", controller_status, "check controller status"},
    1415  {"output", controller_output, "print controller output"},
     16  {"pulse",  controller_pulse,  "set controller pulse"},
    1517};
    1618
  • trunk/Ohana/src/opihi/pantasks/controller_host.c

    r4714 r4763  
    2222  return (TRUE);
    2323}
     24
     25/* should I keep an internal host table so I can reload the
     26   hosts if the controller exits?
     27
     28   alternatively, that could be a user-level choice
     29*/
  • trunk/Ohana/src/opihi/pantasks/controller_status.c

    r4706 r4763  
    2222  InitIOBuffer (&buffer, 0x100);
    2323  status = ControllerCommand (command, CONTROLLER_PROMPT, &buffer);
    24   if (status) fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr);
     24  if (status) {
     25    fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr);
     26  } else {
     27    fprintf (stderr, "controller is down\n");
     28  }
    2529  FreeIOBuffer (&buffer);
    2630  return (TRUE);
    27 
    2831}
  • trunk/Ohana/src/opihi/pantasks/psched.c

    r4714 r4763  
    4141
    4242  signal (SIGINT, SIG_IGN);
     43  signal (SIGPIPE, gotsignal);
     44  signal (SIGTSTP, gotsignal);
     45  signal (SIGTTIN, gotsignal);
    4346  return;
    4447}
     
    5760  return;
    5861}
     62
     63void gotsignal (int signum) {
     64  fprintf (stderr, "got signal : %d\n", signum);
     65  return;
     66}
  • trunk/Ohana/src/opihi/pclient/ChildOps.c

    r4762 r4763  
    11# include "pclient.h"
     2#include <sys/ioctl.h>
     3#include <sys/types.h>
     4#include <unistd.h>
     5#include <stropts.h>
    26
    37static int Nbad = 0;
     
    2933  struct timeval now;
    3034
    31   /* this is really lame : check if we are calling too quickly
    32      this is unneeded: pclient.c rl_keyboard_input_timeout limits
    33      the rate
    34   gettimeofday (&now, NULL);
    35   dtime = DTIME (now, last);
     35  /* runaway test - if pcontrol is killed, pclient starts running away.  this test is a bit
     36     dangerous: the choice of dtime probably depends on the processor and the value provided to
     37     pclient.c:rl_set_keyboard_input_timeout (1000); note that we cannot use getppid == 1 as a test
     38     because the parent of pclient is the ssh process on the pclient host, not pcontrol.  in any
     39     case, the opihi shell catches if the ssh dies using getppid
     40   */
     41  gettimeofday (&now, (void *) NULL);
     42  dtime = 1e6*DTIME (now, last);
     43  if (dtime < 100) {
     44    Nbad ++;
     45    if (Nbad > 10) {
     46      fprintf (stderr, "runaway!\n");
     47      exit (2);
     48    }
     49  }
     50  if (dtime > 950) Nbad = 0;
    3651  last = now;
    37   if (dtime < 0.0001) Nbad ++;
    38   if (dtime > 0.01) Nbad = 0;
    39   if (Nbad > 10) exit (2);
    40   */
    41 
    42   /* this is a bit lame : we must exit if calling process exits */
    43   ppid = getppid();
    44   if (ppid == 1) exit (2);
    4552
    4653  CheckChildStatus ();
  • trunk/Ohana/src/opihi/pclient/pclient.c

    r4762 r4763  
    2121  rl_event_hook = CheckChild;
    2222  rl_set_keyboard_input_timeout (1000);
     23  /* 1 ms seems to be the minimum valid number */
    2324
    2425  set_str_variable ("HISTORY", opihi_history);
  • trunk/Ohana/src/opihi/pcontrol/CheckBusyJob.c

    r4689 r4763  
    2020  switch (status) {
    2121    case PCLIENT_DOWN:
     22      HarvestHost (host[0].pid);
    2223      UnlinkJobAndHost (job);
    2324      PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
  • trunk/Ohana/src/opihi/pcontrol/CheckDoneHost.c

    r4575 r4763  
    1414  switch (status) {
    1515    case PCLIENT_DOWN:
    16       /** do we need to close the connection? **/
     16      if (VerboseMode()) fprintf (stderr, "host %s is down\n", host[0].hostname);
     17      /* DONE host does not have an incomplete job */
     18      HarvestHost (host[0].pid);
    1719      PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
    18       if (VerboseMode()) fprintf (stderr, "host %s is down\n", host[0].hostname);
    1920      FreeIOBuffer (&buffer);
    2021      return (FALSE);
     22      /** do we need to close the connection? **/
    2123
    2224    case PCLIENT_HUNG:
  • trunk/Ohana/src/opihi/pcontrol/CheckHost.c

    r4762 r4763  
    1313    case 0:
    1414      if (VerboseMode()) fprintf (stderr, "host %s is down\n", host[0].hostname);
    15 
    16       /* if host has a job, job is dead, push to Pending */
    17       if (host[0].stack == PCONTROL_HOST_BUSY) {
    18         job = (Job *) host[0].job;
    19         if (job != NULL) {
    20           N = FindJob (job[0].JobID, PCONTROL_JOB_BUSY);
    21           if (N < 0) {
    22             fprintf (stderr, "programming error: job is not found in BUSY list\n");
    23             exit (2);
    24           }
    25           job[0].host = NULL; /* unlink host & job */
    26           PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM);
    27         }
     15      /* if host has a job, job is dead, return to Pending */
     16      job = (Job *) host[0].job;
     17      if (job != NULL) {
     18        UnlinkJobAndHost (job);
     19        PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM);
    2820      }
    29       host[0].job = NULL;
     21      HarvestHost (host[0].pid);
    3022      PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
    3123      FreeIOBuffer (&buffer);
  • trunk/Ohana/src/opihi/pcontrol/CheckSystem.c

    r4762 r4763  
    11# include "pcontrol.h"
    22
    3 static Npass = 0;
     3static struct timeval lastlive = {0, 0};
    44
    55int CheckSystem () {
     6
     7  struct timeval now;
     8  float dtime;
    69
    710  /* we want to give each block a maximum allowed time */
     
    1417  CheckDownHosts(0.100); /* launch the host */
    1518
     19  /* always allow at least one test */
    1620  /* most tests require about 2ms per host. 
    1721     CheckDoneJobs must depend on the size of the output buffer */
    1822
    19   /* this is a waste of cycles: no need to do this every loop */
    20   if (Npass > 20) {
     23  gettimeofday (&now, (void *) NULL);
     24  dtime = DTIME (now, lastlive);
     25  if (dtime > 1.0) {
    2126    CheckLiveHosts(0.040);
    22     Npass = 0;
    23   } else {
    24     Npass ++;
    25   }
     27    lastlive = now;
     28  }
    2629
    2730  if (0) {
  • trunk/Ohana/src/opihi/pcontrol/KillJob.c

    r4450 r4763  
    1818  switch (status) {
    1919    case PCLIENT_DOWN:
     20      HarvestHost (host[0].pid);
    2021      UnlinkJobAndHost (job);
     22      PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM);
    2123      PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
    22       PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM);
    2324      FreeIOBuffer (&buffer);
    2425      return (FALSE);
  • trunk/Ohana/src/opihi/pcontrol/ResetJob.c

    r4450 r4763  
    2020  switch (status) {
    2121    case PCLIENT_DOWN:
    22       /*** different behavior for ANYHOST, WANTHOST, NEEDHOST ***/
     22      /*** different behavior for ANYHOST, WANTHOST, NEEDHOST? ***/
    2323      fprintf (stderr, "host %s is down\n", host[0].hostname);
     24      HarvestHost (host[0].pid);
     25      PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
    2426      FreeIOBuffer (&buffer);
    2527      return (FALSE);
  • trunk/Ohana/src/opihi/pcontrol/StartJob.c

    r4575 r4763  
    8888  job[0].host = NULL;
    8989  host[0].job = NULL;
     90  HarvestHost (host[0].pid);
    9091  PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
    9192  PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM);
  • trunk/Ohana/src/opihi/pcontrol/StopHosts.c

    r4762 r4763  
    2929int StopHost (Host *host) {
    3030
    31   int       result;
    32   int       waitstatus;
    3331  int       status;
    3432  IOBuffer  buffer;
     
    5351      exit (1);
    5452  }
     53  HarvestHost (host[0].pid);
     54  return (TRUE);
     55}
    5556
    56   /* check current child status */
     57void DownHost (Host *host) {
     58  CLOSE (host[0].stdin);
     59  CLOSE (host[0].stdout);
     60  CLOSE (host[0].stderr);
     61  host[0].job = NULL;
     62  PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
     63}
     64
     65void OffHost (Host *host) {
     66  CLOSE (host[0].stdin);
     67  CLOSE (host[0].stdout);
     68  CLOSE (host[0].stderr);
     69  host[0].job = NULL;
     70  PutHost (host, PCONTROL_HOST_OFF, STACK_BOTTOM);
     71}
     72
     73int HarvestHost (int pid) {
     74 
     75  int       result;
     76  int       waitstatus;
     77
    5778  /* I probably should loop a few time with max timeout larger than 10ms... */
    5879  usleep (10000);
    59   result = waitpid (host[0].pid, &waitstatus, WNOHANG);
     80  result = waitpid (pid, &waitstatus, WNOHANG);
    6081  switch (result) {
    6182    case -1:  /* error with waitpid */
     
    83104
    84105    default:
    85       if (result != host[0].pid) {
    86         fprintf (stderr, "waitpid error: mis-matched PID (%d vs %d).  programming error\n", result, host[0].pid);
     106      if (result != pid) {
     107        fprintf (stderr, "waitpid error: mis-matched PID (%d vs %d).  programming error\n", result, pid);
    87108        exit (1);
    88109      }
     
    101122  return (TRUE);
    102123}
    103 
    104 void DownHost (Host *host) {
    105   CLOSE (host[0].stdin);
    106   CLOSE (host[0].stdout);
    107   CLOSE (host[0].stderr);
    108   host[0].job = NULL;
    109   PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
    110 }
    111 
    112 void OffHost (Host *host) {
    113   CLOSE (host[0].stdin);
    114   CLOSE (host[0].stdout);
    115   CLOSE (host[0].stderr);
    116   host[0].job = NULL;
    117   PutHost (host, PCONTROL_HOST_OFF, STACK_BOTTOM);
    118 }
  • trunk/Ohana/src/opihi/pcontrol/pcontrol.c

    r4762 r4763  
    1919  rl_attempted_completion_function = command_completer;
    2020  rl_event_hook = CheckSystem;
    21   rl_set_keyboard_input_timeout (100000);
     21  rl_set_keyboard_input_timeout (1000);
    2222
    2323  set_str_variable ("HISTORY", opihi_history);
     
    3030  /* ignore the history file.  to change this, see, eg, mana.c */
    3131  signal (SIGINT, SIG_IGN);
     32  signal (SIGPIPE, gotsignal);
     33  signal (SIGTSTP, gotsignal);
     34  signal (SIGTTIN, gotsignal);
    3235  return;
    3336}
     
    4447  return;
    4548}
     49
     50void gotsignal (int signum) {
     51  fprintf (stderr, "got signal : %d\n", signum);
     52  return;
     53}
  • trunk/Ohana/src/opihi/scripts/psched.pro

    r4714 r4763  
    11
    22controller exit true
    3 controller host add kiawe
    4 controller host add alala
     3# controller host add kiawe
     4$Ntest = 0
     5# controller host add alala
    56# verbose on
     7pulse 1000
     8controller pulse 1000
     9
     10macro load.machines
     11  if ($0 != 2)
     12    echo "load.machines (nmach)"
     13    break
     14  end
     15
     16  for i 0 $1
     17    $n = $i + 1
     18    sprintf host "po%02d" $n
     19    controller host add $host
     20  end
     21end
    622
    723task test
    824  command partest
    9   periods -poll 0.1
    10   periods -exec 0.1
     25  # polling period is no longer valid: we check for completed controller tasks
     26  # correction: still valid for local tasks
     27  periods -poll 0.20
     28  periods -exec 0.001
    1129  periods -timeout 10.0
    12   nmax 5
     30  nmax 1024
    1331  host anyhost
    1432
     
    1836    queuedelete stdout
    1937    queuedelete stderr
    20     memory leaks
     38    date date
     39    queuepush done "$date"
     40    $Ntest ++
     41#   memory leaks
    2142#   queuesize stdout -var Nstdout
    2243#    for i 0 $Nstdout
     
    4263  end
    4364end
     65
     66# pulse == 100ms
     67# poll/exit = 0.2  : 29 sec / 100 jobs
     68# poll/exit = 0.1  : 20 sec / 100 jobs
     69# poll/exit = 0.05 : 17 sec / 100 jobs
     70# poll/exit = 0.01 : 18 sec / 100 jobs
     71
     72# pulse == 10ms
     73# poll/exit = 0.2  : 20 sec / 100 jobs
     74# poll/exit = 0.10 : 12 sec / 100 jobs
     75# poll/exit = 0.05 : 12 sec / 100 jobs
     76# poll/exit = 0.01 :  9 sec / 100 jobs
     77
     78# we are limited here by how quickly we can send data to the
     79# controller.  this is limited by the occasional 'CheckSystem'
     80# loops, with ~40ms minimum.
     81
     82# seems to be faster on po01 from kiawe (less interference?)
     83
     84# pulse == 1ms, controller pulse == 1ms
     85# poll/exit = 0.01 :  3 sec / 100 jobs
     86# 2 mach, 3 sec
     87# 4 mach, 3 sec
     88# 8 mach, 3 sec
     89
     90# 16 machines, 500 jobs, 13 sec: 26ms / job
     91# 32 machines, 1024 jobs, 26 sec: 26ms / job
     92# job harvesting rate is still the limitation.  Each job harvest requires:
     93#  - jobstack exit
     94#  - stdout
     95#  - stderr
     96#  - delete
     97#  - jobstack crash
     98
Note: See TracChangeset for help on using the changeset viewer.