IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 4705


Ignore:
Timestamp:
Aug 3, 2005, 5:50:08 PM (21 years ago)
Author:
eugene
Message:

psched / pscontrol / pclient dev work

Location:
trunk/Ohana/src/opihi
Files:
3 added
31 edited

Legend:

Unmodified
Added
Removed
  • trunk/Ohana/src/opihi/include/pcontrol.h

    r4693 r4705  
    8383  int         pid;
    8484  HostStat    stack;
    85   Ptime       start;
     85  struct timeval lasttry;
     86  struct timeval nexttry;
    8687  Ptime       accum;
    8788  Ptime       timer;
     
    101102# define FREE(X) if (X != NULL) { free (X); }
    102103# define CLOSE(FD) { if (FD) close (FD); FD = 0; }
     104# define DTIME(A,B) ((A.tv_sec - B.tv_sec) + 1e-6*(A.tv_usec - B.tv_usec))
     105# define ZTIME(A) ((A.tv_sec == 0) && (A.tv_usec == 0))
    103106
    104107void InitPcontrol ();
     
    113116Host *FindHostPtr (IDtype HostID, int StackID);
    114117Host *FindHostStack (IDtype HostID);
     118int FindNamedHostStack (char *name);
    115119Host *PullHost (IDtype HostID, int StackID);
    116120int FindNamedHost (char *name, int StackID);
  • trunk/Ohana/src/opihi/include/psched.h

    r4697 r4705  
    4949  Macro  *crash;                        /* name is 'crash' */
    5050  Macro  *timeout;
    51   Macro  *def;
     51  Macro  *defexit;
    5252
    5353  int     NEXIT;
     
    115115
    116116void InitPsched ();
    117 
    118117void InitTasks ();
    119118Task *NextTask ();
     
    132131void InitTaskTimers ();
    133132int TaskHash (char *input);
     133int RemoveTask (Task *task);
     134Task *SetNewTask (Task *task);
    134135
    135136int NextJobID ();
  • trunk/Ohana/src/opihi/lib.data/IOBufferOps.c

    r4697 r4705  
    1919  buffer[0].Nalloc = buffer[0].Nreset;
    2020  REALLOCATE (buffer[0].buffer, char, buffer[0].Nalloc);
     21  bzero (buffer[0].buffer, buffer[0].Nalloc);
    2122
    2223  return (TRUE);
     
    3738    REALLOCATE (buffer[0].buffer, char, buffer[0].Nalloc);
    3839    Nfree = buffer[0].Nalloc - buffer[0].Nbuffer;
     40    bzero (buffer[0].buffer + buffer[0].Nbuffer, Nfree);
    3941  }
    4042
  • trunk/Ohana/src/opihi/lib.shell/MacroOps.c

    r3907 r4705  
    5353  }
    5454  for (i = 0; i < macro[0].Nlines; i++) {
    55     fprintf (stderr, "%s\n", macro[0].line[i]);
     55    fprintf (stderr, "  %s\n", macro[0].line[i]);
    5656  }
    5757  return;
  • trunk/Ohana/src/opihi/lib.shell/memstr.c

    r4689 r4705  
    11# include "shell.h"
    22
     3/* memstr returns a view, not an allocated string : don't free */
    34/* returns pointer to start of m2 in m1, or NULL if failure */
    45char *memstr (char *m1, char *m2, int n) {
     
    1213
    1314}
     15
     16/* formatted write statement, with intelligent allocation */
     17int write_fmt (int fd, char *format, ...) {
     18
     19  int Nbyte, status;
     20  char tmp, *line;
     21  va_list argp; 
     22
     23  va_start (argp, format);
     24  Nbyte = vsnprintf (&tmp, 0, format, argp);
     25  va_end (argp);
     26
     27  va_start (argp, format);
     28  ALLOCATE (line, char, Nbyte + 1);
     29  vsnprintf (line, Nbyte + 1, format, argp);
     30  status = write (fd, line, strlen(line));
     31  va_end (argp);
     32
     33  free (line);
     34  return (status);
     35}
  • trunk/Ohana/src/opihi/pantasks/CheckJobs.c

    r4697 r4705  
    2626      case JOB_CRASH:
    2727        /* run task[0].crash macro, if it exists */
    28         /* set the stdout and stderr variables with job.stdout, job.stderr */
    29         /* XXX this will break on 0 values in output streams */
     28        /* push output buffer data to the stdout and stderr queues */
    3029        if (VerboseMode()) fprintf (stderr, "job %s (%d) crash\n", job[0].task[0].name, job[0].JobID);
    3130        PushNamedQueue ("stdout", job[0].stdout.buffer);
    3231        PushNamedQueue ("stderr", job[0].stderr.buffer);
     32        /* XXX this will break on 0 values in output streams */
     33        /* perhaps define PushNamedQueueBuffer */
    3334        if (job[0].task[0].crash != NULL) {
    3435          exec_loop (job[0].task[0].crash);
     
    4344        PushNamedQueue ("stderr", job[0].stderr.buffer);
    4445        /* run corresponding task[0].exit macro, if it exists */
    45         macro = job[0].task[0].def;
     46        macro = job[0].task[0].defexit;
    4647        for (i = 0; i < job[0].task[0].Nexit; i++) {
    4748          if (job[0].exit_status == atoi(job[0].task[0].exit[i][0].name)) {
  • trunk/Ohana/src/opihi/pantasks/ControllerOps.c

    r4697 r4705  
    11# include "psched.h"
    2 # define CONTROLLER_TIMEOUT 20
     2/* adding a new host can delay controller up to a second or so */
     3# define CONTROLLER_TIMEOUT 200
    34# define CONNECT_TIMEOUT 300
    45
    56/* local static variables to hold the connection to the controller */
    6 static int status = FALSE;
     7static int ControllerStatus = FALSE;
    78static int stdin_cntl, stdout_cntl, stderr_cntl;
    89static IOBuffer stdout_buffer;
    910static IOBuffer stderr_buffer;
     11static int ControllerPID = 0;
    1012
    1113/* test if the controller is running */
    1214int CheckControllerStatus () {
    13   return (status);
     15  return (ControllerStatus);
    1416}
    1517
     
    2931int CheckControllerJobStatus (Job *job) {
    3032
    31   int outstate;
     33  int outstate, status;
    3234  char cmd[128], status_string[64];
    3335  char *p;
     
    4143  switch (status) {
    4244    case CONTROLLER_DOWN:
    43       fprintf (stderr, "controller is down\n");
     45      if (VerboseMode()) fprintf (stderr, "controller is down\n");
    4446      FreeIOBuffer (&buffer);
    4547      return (FALSE);
    4648
    4749    case CONTROLLER_HUNG:
    48       fprintf (stderr, "controller is not responding\n");
     50      if (VerboseMode()) fprintf (stderr, "controller is not responding\n");
    4951      FreeIOBuffer (&buffer);
    5052      return (FALSE);
    5153
    5254    case CONTROLLER_GOOD:
    53       fprintf (stderr, "message received (CheckControllerJobStatus)\n");
     55      if (VerboseMode()) fprintf (stderr, "message received (CheckControllerJobStatus)\n");
    5456      break;
    5557
    5658    default:
    57       fprintf (stderr, "unknown status for controller command: programming error\n"); 
     59      if (VerboseMode()) fprintf (stderr, "unknown status for controller command: programming error\n"); 
    5860      exit (1);
    5961  }
     
    105107
    106108  /* send command to get appropriate channel */
    107   ALLOCATE (line, char, MAX (1, strlen(cmd) + 15));
    108   sprintf (line, "%s %d\n", cmd, pid);
    109   status = write (stdin_cntl, line, strlen(line));
    110   free (line);
     109  status = write_fmt (stdin_cntl, "%s %d\n", cmd, pid);
    111110
    112111  /* is pipe still open? */
     
    126125  if (status == -1) return (CONTROLLER_HUNG);
    127126
    128   fprintf (stderr, "message received (GetJobOutput : %s)\n", cmd); 
     127  if (VerboseMode()) fprintf (stderr, "message received (GetJobOutput : %s)\n", cmd); 
    129128  /* drop extra bytes from pcontrol (not pclient:job) */
    130129  buffer[0].Nbuffer = Nstart + Nbytes;
     
    161160
    162161  /* construct the controller command portion */
    163   if (!strcmp (job[0].task[0].host, "NONE")) {
     162  if (!strcasecmp (job[0].task[0].host, "ANYHOST")) {
    164163    sprintf (cmd, "job");
    165164  } else {
     
    177176  }
    178177
    179   fprintf (stderr, "sending command to controller: %s\n", cmd);
    180178  InitIOBuffer (&buffer, 0x100);
    181179  ControllerCommand (cmd, CONTROLLER_PROMPT, &buffer);
    182180  free (cmd);
    183 
    184   fprintf (stderr, "response from controller: %s\n", buffer.buffer);
    185181
    186182  /* extract the job PID from the controller response */
     
    203199  char *p;
    204200  char **argv, cmd[128];
    205   int i, pid;
     201  int i, pid, status;
    206202  int stdin_fd[2], stdout_fd[2], stderr_fd[2];
    207203  IOBuffer buffer;
    208204
    209   if (status) return (TRUE);
     205  if (ControllerStatus) return (TRUE);
    210206
    211207  if (VarConfig ("CONTROLLER", "%s", cmd) == NULL) strcpy (cmd, "pcontrol");
     
    262258
    263259  /* send handshake command */
    264   sprintf (cmd, "echo CONNECTED\n");
    265   status = write (stdin_fd[1], cmd, strlen(cmd));
     260  status = write_fmt (stdin_fd[1], "echo CONNECTED\n");
    266261  if ((status == -1) && (errno == EPIPE)) goto pipe_error;
    267262
     
    286281  InitIOBuffer (&stderr_buffer, 0x100);
    287282
     283  ControllerPID = pid;
     284  ControllerStatus = TRUE;
    288285  fprintf (stderr, "Connected\n");
    289286  return (TRUE);
     
    316313  FlushIOBuffer (buffer);
    317314
    318   fprintf (stderr, "send: %s\n", cmd);
    319 
    320   /* send command to client (adding on \n) */
    321   ALLOCATE (line, char, MAX (1, strlen(cmd)));
    322   sprintf (line, "%s\n", cmd);
    323   status = write (stdin_cntl, line, strlen(line));
    324   free (line);
    325 
    326   /* is pipe still open? */
     315  if (VerboseMode()) fprintf (stderr, "send: %s\n", cmd);
     316
     317  /* send command, is pipe still open? */
     318  status = write_fmt (stdin_cntl, "%s\n", cmd);
    327319  if ((status == -1) && (errno == EPIPE)) return (CONTROLLER_DOWN);
    328320 
     
    335327    if (status == -1) usleep (10000);
    336328  }
    337   if (status ==  0) return (CONTROLLER_DOWN);
     329  if (status ==  0) {
     330    ControllerStatus = FALSE;
     331    return (CONTROLLER_DOWN);
     332  }
    338333  if (status == -1) return (CONTROLLER_HUNG);
    339   /* fprintf (stderr, "buffer.buffer: %s\n", buffer[0].buffer); */
     334
     335  /* need to strip off the prompt */
     336  line = memstr (buffer[0].buffer, response, buffer[0].Nbuffer);
     337  if (line != NULL) {
     338    buffer[0].Nbuffer = line - buffer[0].buffer;
     339    bzero (buffer[0].buffer + buffer[0].Nbuffer, buffer[0].Nalloc - buffer[0].Nbuffer);
     340  }
    340341  return (CONTROLLER_GOOD);
    341342}
     
    345346  int Nread;
    346347
    347   if (!status) return (TRUE);
     348  if (!ControllerStatus) return (TRUE);
    348349
    349350  /* read stdout buffer */
     
    390391  IOBuffer buffer;
    391392
    392   if (!CheckControllerStatus()) return (TRUE);
     393  if (!ControllerStatus) return (TRUE);
    393394
    394395  sprintf (cmd, "kill %d", job[0].pid);
     
    403404int QuitController () {
    404405
    405   int status;
     406  int i, status, waitstatus, result;
    406407  char cmd[128];
    407408  IOBuffer buffer;
    408409
    409   if (!CheckControllerStatus()) return (TRUE);
     410  if (!ControllerStatus) return (TRUE);
    410411
    411412  sprintf (cmd, "quit");
    412413  InitIOBuffer (&buffer, 0x100);
    413414  status = ControllerCommand (cmd, CONTROLLER_PROMPT, &buffer);
    414   return (TRUE);
    415 }
    416 
    417 /* memstr returns a view, not an allocated string : don't free */
     415  ControllerStatus = FALSE;
     416  result = waitpid (ControllerPID, &waitstatus, WNOHANG);
     417  for (i = 0; (i < 10) && (result == 0); i++) {
     418    usleep (10000);  /* 10 ms is min */
     419    result = waitpid (ControllerPID, &waitstatus, WNOHANG);
     420  }
     421  ControllerPID = 0;
     422  close (stdin_cntl);
     423  close (stdout_cntl);
     424  close (stderr_cntl);
     425
     426  return (TRUE);
     427}
  • trunk/Ohana/src/opihi/pantasks/JobOps.c

    r4693 r4705  
    5050  int i;
    5151
     52  fprintf (stderr, "\n");
    5253  if (Njobs == 0) {
    53     fprintf (stderr, "no defined jobs\n");
     54    fprintf (stderr, " no defined jobs\n");
    5455    return;
    5556  }
    5657
     58  fprintf (stderr, " Jobs\n");
    5759  for (i = 0; i < Njobs; i++) {
    58     fprintf (stderr, "%d: %-15s %5d %20s (%lx)\n", Njobs, jobs[i][0].task[0].name, jobs[i][0].JobID, jobs[i][0].argv[0], (long) jobs[i][0].argv);
     60    fprintf (stderr, " %d: %-15s %5d %20s (%lx)\n", Njobs, jobs[i][0].task[0].name, jobs[i][0].JobID, jobs[i][0].argv[0], (long) jobs[i][0].argv);
    5961  }
    6062  return;
     
    7678  }
    7779
    78   /* we need our own copy of task[0].argv
    79    *  argc is the number of valid args, like the usual command line.
    80    *  we allocate one extra element, with value 0 to be passed to execvp
     80  /* we need our own copy of task[0].argv argc is the number of valid args, like the usual command line.  we
     81   *  allocate one extra element, with value 0 to be passed to execvp
    8182   */
    8283  job[0].argc = task[0].argc;
     
    8788  job[0].argv[i] = 0;
    8889
    89   /* other data from the task is needed by the job
    90      we carry a pointer back to the task.  this means we
    91      cannot modify the task once a job is created, or the changes will
    92      be applied to the existing jobs */
     90  /* Other data from the task is needed by the job. We carry a pointer back to the task.  Changes to an
     91     executing task are applied to the existing jobs (exit macros, poll_period, timeout) */
    9392
    9493  job[0].task = task;
    9594 
    96   /* if we decide we need to be able to dynamically set task qualities
    97      (like host, timeouts, etc), the we will need to have matched
    98      entries to these quantites in the job structure */
     95  /* if we decide we need to be able to dynamically set task qualities (like host, timeouts, etc), the we will
     96     need to have matched entries to these quantites in the job structure */
    9997
    10098  jobs[Njobs] = job;
     
    122120}
    123121
    124 /** are we deleting the active job?? **/
     122/* delete the job from the job list & adjust ActiveJob counter */
    125123int DeleteJob (Job *job) {
    126124
     
    140138
    141139  FreeJob (jobs[Nm]);
    142   for (i = Nm + 1; i < Njobs; i++)
    143     jobs[i - 1] = jobs[i];
     140  for (i = Nm; i < Njobs - 1; i++) {
     141    jobs[i] = jobs[i + 1];
     142  }
    144143  Njobs --;
    145144
  • trunk/Ohana/src/opihi/pantasks/Makefile

    r4693 r4705  
    4141$(SDIR)/run.$(ARCH).o \
    4242$(SDIR)/stop.$(ARCH).o \
     43$(SDIR)/pulse.$(ARCH).o \
    4344$(SDIR)/status.$(ARCH).o \
    4445$(SDIR)/kill.$(ARCH).o \
  • trunk/Ohana/src/opihi/pantasks/TaskOps.c

    r4697 r4705  
    5454  int i, valid;
    5555
     56  fprintf (stderr, "\n");
    5657  if (Ntasks == 0) {
    57     fprintf (stderr, "no defined tasks\n");
     58    fprintf (stderr, " no defined tasks\n");
    5859    return;
    5960  }
    6061
    61   fprintf (stderr, "Task Status\n");
    62   fprintf (stderr, "  name            Njobs  command\n");
     62  fprintf (stderr, " Task Status\n");
     63  fprintf (stderr, "  * Name            Njobs  Command\n");
    6364  for (i = 0; i < Ntasks; i++) {
    6465    valid = CheckTimeRanges (tasks[i][0].ranges, tasks[i][0].Nranges);
     66    if (verbose) fprintf (stderr, "\n");
    6567    if (valid) {
    66       fprintf (stderr, "+ ");
     68      fprintf (stderr, "  + ");
    6769    } else {
    68       fprintf (stderr, "- ");
     70      fprintf (stderr, "  - ");
    6971    }
    7072    if (tasks[i][0].argv == NULL) {
    71       fprintf (stderr, "%-15s %4d   %-20s\n", tasks[i][0].name, tasks[i][0].Njobs, "dynamic");
     73      fprintf (stderr, "%-15s %5d  %-20s\n", tasks[i][0].name, tasks[i][0].Njobs, "(dynamic)");
    7274    } else {
    73       fprintf (stderr, "%-15s %4d   %-20s\n", tasks[i][0].name, tasks[i][0].Njobs, tasks[i][0].argv[0]);
     75      fprintf (stderr, "%-15s %5d  %-20s\n", tasks[i][0].name, tasks[i][0].Njobs, tasks[i][0].argv[0]);
    7476    }
    7577    if (verbose) {
    7678      fprintf (stderr, "    spawn period: %f, polling period: %f, timeout period: %f\n",
    77                tasks[i][0].poll_period, tasks[i][0].poll_period, tasks[i][0].poll_period);
    78       if (tasks[i][0].host != NULL) {
    79         if (tasks[i][0].host_required) {
    80           fprintf (stderr, "    host %s (required)\n", tasks[i][0].host);
    81         } else {
    82           fprintf (stderr, "    host %s (desired)\n", tasks[i][0].host);
    83         }
     79               tasks[i][0].exec_period, tasks[i][0].poll_period, tasks[i][0].timeout_period);
     80      if (tasks[i][0].host == NULL) {
     81        fprintf (stderr, "    task runs locally\n");
     82        continue;
     83      }
     84      if (!strcasecmp(tasks[i][0].host, "ANYHOST")) {
     85        fprintf (stderr, "    task host selected by controller\n");
     86        continue;
     87      }
     88      if (tasks[i][0].host_required) {
     89        fprintf (stderr, "    host %s (required)\n", tasks[i][0].host);
    8490      } else {
    85         fprintf (stderr, "    task runs locally\n");
    86       }     
     91        fprintf (stderr, "    host %s (desired)\n", tasks[i][0].host);
     92      }
    8793    }
    8894  }
     
    102108  }
    103109
    104   fprintf (stderr, "macro %s\n", task[0].name);
    105 
    106   fprintf (stderr, "command: ");
     110  fprintf (stderr, "\n macro %s\n", task[0].name);
     111
     112  fprintf (stderr, "\n command: ");
    107113  for (i = 0; i < task[0].argc; i++) {
    108114    fprintf (stderr, "%s ", task[0].argv[i]);
    109115  }
    110 
    111   fprintf (stderr, "host: %s\n", task[0].host);
    112   fprintf (stderr, "time periods: exec: %f  poll: %f  timeout: %f\n",
     116  fprintf (stderr, "\n\n");
     117
     118  if (task[0].host == NULL) {
     119    fprintf (stderr, " task runs locally\n");
     120    goto periods;
     121  }
     122  if (!strcasecmp(task[0].host, "ANYHOST")) {
     123    fprintf (stderr, " task host selected by controller\n");
     124    goto periods;
     125  }
     126  if (task[0].host_required) {
     127    fprintf (stderr, " host %s (required)\n", task[0].host);
     128  } else {
     129    fprintf (stderr, " host %s (desired)\n", task[0].host);
     130  }
     131
     132periods:
     133  fprintf (stderr, " time periods: exec: %f  poll: %f  timeout: %f\n",
    113134           task[0].exec_period, task[0].poll_period, task[0].timeout_period);
    114135
    115   fprintf (stderr, "pre-execute macro\n");
     136  fprintf (stderr, "\n pre-execute macro\n");
    116137  ListMacro (task[0].exec);
    117138
    118   fprintf (stderr, "timeout macro\n");
     139  fprintf (stderr, "\n timeout macro\n");
    119140  ListMacro (task[0].timeout);
    120141
    121   fprintf (stderr, "crash macro\n");
     142  fprintf (stderr, "\n crash macro\n");
    122143  ListMacro (task[0].crash);
    123144
    124   fprintf (stderr, "default exit macro\n");
    125   ListMacro (task[0].def);
     145  fprintf (stderr, "\n default exit macro\n");
     146  ListMacro (task[0].defexit);
    126147
    127148  for (i = 0; i < task[0].Nexit; i++) {
    128     fprintf (stderr, "exit macro (status == %d)\n", atoi(task[0].exit[i][0].name));
     149    fprintf (stderr, "\n exit macro (status == %d)\n", atoi(task[0].exit[i][0].name));
    129150    ListMacro (task[0].exit[i]);
    130151  }
     
    192213  NewTask[0].crash = NULL;
    193214  NewTask[0].timeout = NULL;
     215  NewTask[0].defexit = NULL;
    194216
    195217  NewTask[0].Nexit = 0;
     
    211233
    212234  return (NewTask);
     235}
     236
     237/* remove the task from the task list */
     238int RemoveTask (Task *task) {
     239 
     240  int i, Nt;
     241
     242  /* find task in task list */
     243  Nt = -1;
     244  for (i = 0; i < Ntasks; i++) {
     245    if (task == tasks[i]) {
     246      Nt = i;
     247      break;
     248    }
     249  }
     250  if (Nt == -1) {
     251    fprintf (stderr, "programming error: task not found\n");
     252    return (FALSE);
     253  }
     254  for (i = Nt; i < Ntasks - 1; i++) {
     255    tasks[i] = tasks[i+1];
     256  }
     257  Ntasks --;
     258  return (TRUE);
    213259}
    214260
     
    263309  }
    264310  return (TRUE);
     311}
     312
     313Task *SetNewTask (Task *task) {
     314  NewTask = task;
     315  return (task);
    265316}
    266317
  • trunk/Ohana/src/opihi/pantasks/controller_check.c

    r4693 r4705  
    2121  InitIOBuffer (&buffer, 0x100);
    2222  status = ControllerCommand (command, CONTROLLER_PROMPT, &buffer);
     23  if (VerboseMode()) {
     24    fprintf (stderr, "controller command sent\n"); 
     25    fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr);
     26    fprintf (stderr, "\n Nbytes received: %d\n", buffer.Nbuffer); 
     27  }
    2328  FreeIOBuffer (&buffer);
    2429
     
    3439
    3540    case CONTROLLER_GOOD:
    36       fprintf (stderr, "controller command sent\n"); 
    37       fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr);
    3841      return (TRUE);
    3942
  • trunk/Ohana/src/opihi/pantasks/controller_host.c

    r4693 r4705  
    77  IOBuffer buffer;
    88
    9   if (argc != 2) {
    10     fprintf (stderr, "USAGE: controller host (hostname)\n");
     9  if (argc != 3) {
     10    fprintf (stderr, "USAGE: controller host (command) (hostname)\n");
    1111    return (FALSE);
    1212  }
     
    1515  StartController ();
    1616
    17   sprintf (command, "host %s", argv[1]);
     17  sprintf (command, "host %s %s", argv[1], argv[2]);
    1818  InitIOBuffer (&buffer, 0x100);
    1919  status = ControllerCommand (command, CONTROLLER_PROMPT, &buffer);
     20  fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr);
    2021  FreeIOBuffer (&buffer);
    2122
     
    3132
    3233    case CONTROLLER_GOOD:
    33       fprintf (stderr, "controller command sent\n"); 
    34       fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr);
    35       fprintf (stderr, "\n Nbytes received: %d\n", buffer.Nbuffer); 
    3634      return (TRUE);
    3735
  • trunk/Ohana/src/opihi/pantasks/controller_status.c

    r4693 r4705  
    2222  InitIOBuffer (&buffer, 0x100);
    2323  status = ControllerCommand (command, CONTROLLER_PROMPT, &buffer);
    24   FreeIOBuffer (&buffer);
    2524
    2625  /* check on success of controller command */
     
    2827    case CONTROLLER_DOWN:
    2928      fprintf (stderr, "controller is down\n");
     29      FreeIOBuffer (&buffer);
    3030      return (FALSE);
    3131
    3232    case CONTROLLER_HUNG:
    3333      fprintf (stderr, "controller is not responding\n");
     34      FreeIOBuffer (&buffer);
    3435      return (FALSE);
    3536
    3637    case CONTROLLER_GOOD:
    37       fprintf (stderr, "controller command sent\n"); 
    3838      fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr);
    39       fprintf (stderr, "\n Nbytes received: %d\n", buffer.Nbuffer); 
     39      FreeIOBuffer (&buffer);
    4040      return (TRUE);
    4141
  • trunk/Ohana/src/opihi/pantasks/init.c

    r4693 r4705  
    1111int run             PROTO((int, char **));
    1212int stop            PROTO((int, char **));
     13int pulse           PROTO((int, char **));
    1314int status_sys      PROTO((int, char **));
    1415int kill_job        PROTO((int, char **));
     
    2829  {"run",        run,          "run the scheduler"},
    2930  {"stop",       stop,         "stop the scheduler"},
     31  {"pulse",      pulse,        "set the scheduler update period"},
    3032  {"status",     status_sys,   "get system status"},
    3133  {"kill",       kill_job,     "kill job"},
  • trunk/Ohana/src/opihi/pantasks/psched.c

    r4693 r4705  
    3939  rl_attempted_completion_function = command_completer;
    4040  rl_event_hook = NULL;
    41   rl_set_keyboard_input_timeout (1000000);
     41  rl_set_keyboard_input_timeout (100000);
    4242
    4343  set_str_variable ("HISTORY", opihi_history);
  • trunk/Ohana/src/opihi/pantasks/run.c

    r4693 r4705  
    1010  InitTaskTimers ();
    1111  rl_event_hook = CheckSystem;
    12   rl_set_keyboard_input_timeout (1000000);
    1312
    1413  return (TRUE);
  • trunk/Ohana/src/opihi/pantasks/status.c

    r4693 r4705  
    33int status_sys (int argc, char **argv) {
    44
     5  fprintf (stderr, "\n");
    56  if (rl_event_hook == NULL) {
    6     fprintf (stderr, "scheduler is stopped\n");
     7    fprintf (stderr, " Scheduler is stopped\n");
    78  } else {
    8     fprintf (stderr, "scheduler is running\n");
     9    fprintf (stderr, " Scheduler is running\n");
     10  }
     11  if (CheckControllerStatus ()) {
     12    fprintf (stderr, " Controller is running\n");
     13  } else {
     14    fprintf (stderr, " Controller is stopped\n");
    915  }
    1016  ListTasks (FALSE);
  • trunk/Ohana/src/opihi/pantasks/stop.c

    r4693 r4705  
    99
    1010  rl_event_hook = NULL;
    11   rl_set_keyboard_input_timeout (1000000);
    1211
    1312  return (TRUE);
  • trunk/Ohana/src/opihi/pantasks/task.c

    r4697 r4705  
    3636  if (task == NULL) { /**** new task ****/
    3737    task = CreateTask (argv[1]);
     38  } else {
     39    RemoveTask (task);
     40    SetNewTask (task);
    3841  }
    39   /* temporary task is saved statically
    40      add to list after definition is complete */
     42  /* While a task is being defined, it is removed from the task list.  The new task is added to the task list
     43     when the definition process is complete. 
     44     XXX If an outstanding job has a task deleted, it will not be able to complete... */
    4145
    4246  /* read in task from appropriate source (keyboard or list) until end */
    4347
    44   /* allowed tokens: command, host, stderr, periods, end */
     48  /* allowed tokens: command, host, stderr, periods, trange, nmax, task.exit, task.exec, end */
    4549
    4650  ThisList = Nlists;
  • trunk/Ohana/src/opihi/pantasks/task_command.c

    r4697 r4705  
    88  if (argc < 2) {
    99    fprintf (stderr, "USAGE: command <command> <arg>. ..\n");
    10     fprintf (stderr, "  (define command machine for this task (or 'none'))\n");
     10    fprintf (stderr, "  (define command for this task)\n");
    1111    return (FALSE);
    1212  }
  • trunk/Ohana/src/opihi/pantasks/task_host.c

    r4693 r4705  
    1717    fprintf (stderr, "  -required flags indicates controller must use this host\n");
    1818    fprintf (stderr, "  value of 'local' for host indicates process not using controller\n");
    19     fprintf (stderr, "  value of 'none' for host indicates controller may assign at will\n");
     19    fprintf (stderr, "  value of 'anyhost' for host indicates controller may assign at will\n");
    2020    return (FALSE);
    2121  }
     
    2323  task = GetNewTask ();
    2424  if (task == NULL) {
    25     fprintf (stderr, "ERROR: not defining or running a task\n");
    26     return (FALSE);
     25    task = GetActiveTask ();
     26    if (task == NULL) {
     27      fprintf (stderr, "ERROR: not defining or running a task\n");
     28      return (FALSE);
     29    }
    2730  }
    2831  task[0].host_required = RequiredHost;
     
    3639  return (TRUE);
    3740}
     41
     42/* apparently, local is the default! */
  • trunk/Ohana/src/opihi/pantasks/task_macros.c

    r4693 r4705  
    6262  }
    6363  if (!strcmp (argv[0], "task.exit") && !strcmp (argv[1], "default")) {
    64     if (task[0].def != NULL) {
    65       FreeMacro (task[0].def);
    66       free (task[0].def);
     64    if (task[0].defexit != NULL) {
     65      FreeMacro (task[0].defexit);
     66      free (task[0].defexit);
    6767    }
    68     ALLOCATE (task[0].def, Macro, 1);
    69     macro = task[0].def;
     68    ALLOCATE (task[0].defexit, Macro, 1);
     69    macro = task[0].defexit;
    7070    macro[0].name = strcreate ("default");
    7171    goto found;
  • trunk/Ohana/src/opihi/pcontrol/CheckIdleHost.c

    r4689 r4705  
    4545    return (TRUE);
    4646  }
     47  /* no jobs for host, but back on IDLE stack */
     48  PutHost (host, PCONTROL_HOST_IDLE, STACK_BOTTOM);
    4749  return (TRUE);
    4850}
  • trunk/Ohana/src/opihi/pcontrol/CheckSystem.c

    r4689 r4705  
    9393  Stack *stack;
    9494  Host  *host;
     95  struct timeval now;
     96  float delta;
     97
     98  gettimeofday (&now, (void *) NULL);
    9599
    96100  stack = GetHostStack (PCONTROL_HOST_DOWN);
     
    99103  for (i = 0; i < Nobject; i++) {
    100104    host = GetStack (stack, STACK_TOP);
    101     StartHost (host);
     105    delta = DTIME (host[0].nexttry, now);
     106    if (delta > 0) {
     107      PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
     108    } else {
     109      StartHost (host);
     110    }
    102111  }
    103112  return (TRUE);
  • trunk/Ohana/src/opihi/pcontrol/GetJobOutput.c

    r4575 r4705  
    1414
    1515  /* send cmd (stdout / stderr) */
    16   ALLOCATE (line, char, MAX (1, strlen(cmd) + 1));
    17   sprintf (line, "%s\n", cmd);
    18   status = write (host[0].stdin, line, strlen(line));
    19   free (line);
     16  status = write_fmt (host[0].stdin, "%s\n", cmd);
    2017
    2118  /* is pipe still open? */
  • trunk/Ohana/src/opihi/pcontrol/HostOps.c

    r4573 r4705  
    5757}
    5858
     59int FindNamedHostStack (char *name) {
     60
     61  int N;
     62
     63  N = FindNamedHost (name, PCONTROL_HOST_IDLE);
     64  if (N > 0) return (PCONTROL_HOST_IDLE);
     65
     66  N = FindNamedHost (name, PCONTROL_HOST_DOWN);
     67  if (N > 0) return (PCONTROL_HOST_DOWN);
     68
     69  N = FindNamedHost (name, PCONTROL_HOST_DONE);
     70  if (N > 0) return (PCONTROL_HOST_DONE);
     71
     72  N = FindNamedHost (name, PCONTROL_HOST_BUSY);
     73  if (N > 0) return (PCONTROL_HOST_BUSY);
     74
     75  N = FindNamedHost (name, PCONTROL_HOST_OFF);
     76  if (N > 0) return (PCONTROL_HOST_OFF);
     77
     78  return (-1);
     79}
     80
    5981int PutHost (Host *host, int StackID, int where) {
    6082
     
    165187  host[0].HostID   = NextHostID();
    166188
     189  host[0].lasttry.tv_sec = 0;
     190  host[0].lasttry.tv_usec = 0;
     191  host[0].nexttry.tv_sec = 0;
     192  host[0].nexttry.tv_usec = 0;
     193
    167194  host[0].markoff  = FALSE;
    168195  host[0].job      = NULL;
  • trunk/Ohana/src/opihi/pcontrol/JobOps.c

    r4689 r4705  
    161161
    162162  PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM);
     163  fprintf (stderr, "added new job\n");
    163164  return (job[0].JobID);
    164165}
  • trunk/Ohana/src/opihi/pcontrol/StartHost.c

    r4689 r4705  
    11# include "pcontrol.h"
     2# define RETRY_BASE 1.0
    23
    34int StartHost (Host *host) {
     
    67  int stdio[3];
    78  char command[64], shell[64];
    8 
    9   /* pid = rconnect (CONNECT, host[0].hostname, PCLIENT, stdio); */
     9  struct timeval now;
     10  float delta;
    1011
    1112  /* perhaps change the name of these config variables... */
     
    1415
    1516  pid = rconnect (command, host[0].hostname, shell, stdio);
    16   if (!pid) {     /** failure to start **/
     17  if (!pid) {     
     18    /** failure to start: extend retry period **/
    1719    if (VerboseMode()) fprintf (stderr, "failure to start %s\n", host[0].hostname);
     20    gettimeofday (&now, (void *) NULL);
     21    if (ZTIME(host[0].nexttry) || ZTIME(host[0].lasttry)) {
     22      /* reset retry period if either is zero */
     23      delta = RETRY_BASE;
     24    } else {
     25      delta = 2*DTIME (host[0].nexttry, host[0].lasttry);
     26    }
     27    host[0].nexttry.tv_sec  = now.tv_sec  + delta;
     28    host[0].nexttry.tv_usec = now.tv_usec;
     29    host[0].lasttry.tv_sec  = now.tv_sec;
     30    host[0].lasttry.tv_usec = now.tv_usec;
    1831    PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);
    1932    return (FALSE);
    2033  }
     34  host[0].nexttry.tv_sec  = 0;
     35  host[0].nexttry.tv_usec = 0;
     36  host[0].lasttry.tv_sec  = 0;
     37  host[0].lasttry.tv_usec = 0;
     38
    2139  host[0].stdin  = stdio[0];
    2240  host[0].stdout = stdio[1];
  • trunk/Ohana/src/opihi/pcontrol/host.c

    r4689 r4705  
    33int host (int argc, char **argv) {
    44
     5  int N, Ns;
    56  IDtype HostID;
    6   int N, Delete, Off, On, Start, Stop, Check;
    77  Host *host;
    88
    9   /* this section needs some help: find the specified host in the stacks */
    10   Delete = FALSE;
    11   if ((N = get_argument (argc, argv, "-delete"))) {
    12     remove_argument (N, &argc, argv);
    13     Delete = TRUE;
     9  if (argc != 3) {
     10    fprintf (stdout, "USAGE: host (command) (hostname)\n");
     11    return (FALSE);
    1412  }
    15  
    16   /* this section needs some help: find the specified host in the stacks */
    17   Off = FALSE;
    18   if ((N = get_argument (argc, argv, "-off"))) {
    19     if (Delete) {
    20       fprintf (stdout, "-delete and -off incompatible\n");
     13
     14  if (!strcasecmp (argv[1], "ADD")) {
     15    HostID = AddHost (argv[2]);
     16    fprintf (stdout, "HostID: %d\n", (int) HostID);
     17    return (TRUE);
     18  }
     19  if (!strcasecmp (argv[1], "ON")) {
     20    N = FindNamedHost (argv[2], PCONTROL_HOST_OFF);
     21    if (N < 0) {
     22      fprintf (stdout, "host %s is not OFF\n", argv[2]);
    2123      return (FALSE);
    2224    }
    23     remove_argument (N, &argc, argv);
    24     Off = TRUE;
     25    host = GetHost (PCONTROL_HOST_OFF, N);
     26    DownHost (host);
     27    return (TRUE);
    2528  }
    26  
    27   /* this section needs some help: find the specified host in the stacks */
    28   On = FALSE;
    29   if ((N = get_argument (argc, argv, "-on"))) {
    30     if (Delete || Off) {
    31       fprintf (stdout, "only one of -delete, -off, -on\n");
     29  if (!strcasecmp (argv[1], "RETRY")) {
     30    N = FindNamedHost (argv[2], PCONTROL_HOST_DOWN);
     31    if (N < 0) {
     32      fprintf (stdout, "host %s is not DOWN\n", argv[2]);
    3233      return (FALSE);
    3334    }
    34     remove_argument (N, &argc, argv);
    35     On = TRUE;
     35    host = GetHost (PCONTROL_HOST_DOWN, N);
     36    host[0].nexttry.tv_sec  = 0;
     37    host[0].nexttry.tv_usec = 0;
     38    host[0].lasttry.tv_sec  = 0;
     39    host[0].lasttry.tv_usec = 0;
     40    StartHost (host);
     41    return (TRUE);
    3642  }
    37  
    38   /* this section needs some help: find the specified host in the stacks */
    39   Start = FALSE;
    40   if ((N = get_argument (argc, argv, "-start"))) {
    41     remove_argument (N, &argc, argv);
    42     Start = TRUE;
    43   }
    44  
    45   /* this section needs some help: find the specified host in the stacks */
    46   Check = FALSE;
    47   if ((N = get_argument (argc, argv, "-check"))) {
    48     remove_argument (N, &argc, argv);
    49     Check = TRUE;
    50   }
    51  
    52   /* this section needs some help: find the specified host in the stacks */
    53   Stop = FALSE;
    54   if ((N = get_argument (argc, argv, "-stop"))) {
    55     remove_argument (N, &argc, argv);
    56     Stop = TRUE;
    57   }
    58  
    59   if (argc != 2) {
    60     fprintf (stdout, "USAGE: host (hostname) [-delete]\n");
     43  if (!strcasecmp (argv[1], "CHECK")) {
     44    Ns = FindNamedHostStack (argv[2]);
     45    switch (Ns) {
     46      case PCONTROL_HOST_IDLE:
     47      case PCONTROL_HOST_BUSY:
     48      case PCONTROL_HOST_DONE:
     49        N = FindNamedHost (argv[2], Ns);
     50        host = GetHost (Ns, N);
     51        CheckHost (host);
     52        return (TRUE);
     53      case PCONTROL_HOST_DOWN:
     54        fprintf (stdout, "host %s is DOWN\n", argv[2]);
     55        return (TRUE);
     56      case PCONTROL_HOST_OFF:
     57        fprintf (stdout, "host %s is OFF\n", argv[2]);
     58        return (TRUE);
     59      default:
     60        fprintf (stdout, "host %s not found\n", argv[2]);
     61        return (FALSE);
     62    }
    6163    return (FALSE);
    6264  }
    63  
    64   if (Delete) {
    65     N = FindNamedHost (argv[1], PCONTROL_HOST_OFF);
     65  if (!strcasecmp (argv[1], "OFF")) {
     66    N = FindNamedHost (argv[2], PCONTROL_HOST_IDLE);
     67    if (N >= 0) {
     68      host = GetHost (PCONTROL_HOST_IDLE, N);
     69      StopHost (host);
     70      OffHost (host);
     71      return (TRUE);
     72    }
     73    N = FindNamedHost (argv[2], PCONTROL_HOST_DOWN);
     74    if (N >= 0) {
     75      host = GetHost (PCONTROL_HOST_DOWN, N);
     76      OffHost (host);
     77      return (TRUE);
     78    }
     79    N = FindNamedHost (argv[2], PCONTROL_HOST_BUSY);
     80    if (N >= 0) {
     81      host = GetHost (PCONTROL_HOST_BUSY, N);
     82      host[0].markoff  = TRUE;
     83      PutHost (host, PCONTROL_HOST_BUSY, STACK_BOTTOM);
     84      return (TRUE);
     85    }
     86    fprintf (stdout, "host %s is not BUSY, IDLE, or DOWN\n", argv[2]);
     87    return (FALSE);
     88  }
     89  if (!strcasecmp (argv[1], "DELETE")) {
     90    N = FindNamedHost (argv[2], PCONTROL_HOST_OFF);
    6691    if (N < 0) {
    67       fprintf (stdout, "host %s is not OFF\n", argv[1]);
     92      fprintf (stdout, "host %s is not OFF\n", argv[2]);
    6893      return (FALSE);
    6994    }
     
    7297    return (TRUE);
    7398  }
    74 
    75   if (On) {
    76     N = FindNamedHost (argv[1], PCONTROL_HOST_OFF);
    77     if (N < 0) {
    78       fprintf (stdout, "host %s is not OFF\n", argv[1]);
    79       return (FALSE);
    80     }
    81     host = GetHost (PCONTROL_HOST_OFF, N);
    82     DownHost (host);
    83     return (TRUE);
    84   }
    85 
    86   if (Check) {
    87     N = FindNamedHost (argv[1], PCONTROL_HOST_IDLE);
    88     if (N >= 0) {
    89       host = GetHost (PCONTROL_HOST_IDLE, N);
    90       CheckHost (host);
    91       return (TRUE);
    92     }
    93     N = FindNamedHost (argv[1], PCONTROL_HOST_BUSY);
    94     if (N >= 0) {
    95       host = GetHost (PCONTROL_HOST_BUSY, N);
    96       CheckHost (host);
    97       return (TRUE);
    98     }
    99     fprintf (stdout, "host %s is not BUSY or IDLE\n", argv[1]);
    100     return (FALSE);
    101   }
    102 
    103   if (Start) {
    104     N = FindNamedHost (argv[1], PCONTROL_HOST_DOWN);
    105     if (N < 0) {
    106       fprintf (stdout, "host %s is not DOWN\n", argv[1]);
    107       return (FALSE);
    108     }
    109     host = GetHost (PCONTROL_HOST_DOWN, N);
    110     StartHost (host);
    111     return (TRUE);
    112   }
    113   if (Stop) {
    114     N = FindNamedHost (argv[1], PCONTROL_HOST_IDLE);
    115     if (N < 0) {
    116       fprintf (stdout, "host %s is not IDLE\n", argv[1]);
    117       return (FALSE);
    118     }
    119     host = GetHost (PCONTROL_HOST_IDLE, N);
    120     StopHost (host);
    121     DownHost (host);
    122     return (TRUE);
    123   }
    124 
    125   if (Off) {
    126     N = FindNamedHost (argv[1], PCONTROL_HOST_IDLE);
    127     if (N >= 0) {
    128       host = GetHost (PCONTROL_HOST_IDLE, N);
    129       StopHost (host);
    130       OffHost (host);
    131       return (TRUE);
    132     }
    133     N = FindNamedHost (argv[1], PCONTROL_HOST_DOWN);
    134     if (N >= 0) {
    135       host = GetHost (PCONTROL_HOST_DOWN, N);
    136       OffHost (host);
    137       return (TRUE);
    138     }
    139     N = FindNamedHost (argv[1], PCONTROL_HOST_BUSY);
    140     if (N >= 0) {
    141       host = GetHost (PCONTROL_HOST_BUSY, N);
    142       host[0].markoff  = TRUE;
    143       PutHost (host, PCONTROL_HOST_BUSY, STACK_BOTTOM);
    144       return (TRUE);
    145     }
    146     fprintf (stdout, "host %s is not BUSY, IDLE, or DOWN\n", argv[1]);
    147     return (FALSE);
    148   }
    149 
    150   HostID = AddHost (argv[1]);
    151   fprintf (stdout, "HostID: %d\n", (int) HostID);
    152   return (TRUE);
     99 
     100  fprintf (stderr, "unknown host command %s\n", argv[1]);
     101  fprintf (stderr, "valid options: xxx\n");
     102  return (FALSE);
    153103}
  • trunk/Ohana/src/opihi/pcontrol/pclient.c

    r4573 r4705  
    1212
    1313  /* send command to client (adding on \n) */
    14   /* fprintf (stderr, "send: %s (%d)\n", command, buffer[0].Nbuffer); */
    15   ALLOCATE (line, char, MAX (1, strlen(command) + 1));
    16   sprintf (line, "%s\n", command);
    17   status = write (host[0].stdin, line, strlen(line));
    18   free (line);
     14  status = write_fmt (host[0].stdin, "%s\n", command);
    1915
    2016  /* is pipe still open? */
  • trunk/Ohana/src/opihi/pcontrol/rconnect.c

    r4689 r4705  
    1010
    1111  int i, stdin_fd[2], stdout_fd[2], stderr_fd[2], status;
     12  int result, waitstatus;
    1213  pid_t pid;
    1314  char *p;
     
    6970
    7071  /* send handshake command */
    71   sprintf (command, "echo CONNECTED\n");
    72   status = write (stdin_fd[1], command, strlen(command));
    73   if ((status == -1) && (errno == EPIPE)) goto pipe_error;
     72  status = write_fmt (stdin_fd[1], "echo CONNECTED\n");
     73  if ((status == -1) && (errno == EPIPE)) goto connect_error;
    7474
    7575  /* try to get evidence connection is alive - wait upto a few seconds */
     
    8181    usleep (20000);
    8282  }
     83  if (status == 0) goto connect_error;
     84  if (status == -1) goto connect_error;
    8385  if (VerboseMode()) fprintf (stderr, "%d cycles to connect\n", i);
    84   if (status == 0) goto pipe_error;
    85   if (status == -1) goto io_error;
    8686  FreeIOBuffer (&buffer);
    8787
     
    9898  goto close_pipes;
    9999
    100 io_error:
    101   if (VerboseMode()) fprintf (stderr, "timeout while connecting\n");
    102   goto close_pipes;
     100connect_error:
     101  if (VerboseMode()) fprintf (stderr, "error while connecting\n");
     102
     103  /* harvest the child process: kill & wait (< 100 ms) for exit */
     104  kill (pid, SIGKILL);
     105  result = waitpid (pid, &waitstatus, WNOHANG);
     106  for (i = 0; (i < 10) && (result == 0); i++) {
     107    usleep (10000);  /* 10 ms is min */
     108    result = waitpid (pid, &waitstatus, WNOHANG);
     109  }
     110
     111  if ((result == -1) && (errno != ECHILD)) {
     112    fprintf (stderr, "unexpected error from waitpid (%d): programming error\n", errno);
     113    exit (1);
     114  }
     115  if (result == 0) {
     116    if (VerboseMode()) fprintf (stderr, "child did not exit??");
     117  }
     118  if (result > 0) {
     119    if (result != pid) {
     120      fprintf (stderr, "waitpid error: mis-matched PID (%d vs %d).  programming error\n", result, pid);
     121      exit (1);
     122    }
     123    if (WIFSTOPPED(waitstatus)) {
     124      fprintf (stderr, "waitpid returns 'stopped': programming error\n");
     125      exit (1);
     126    }
     127  }
    103128
    104129close_pipes:
Note: See TracChangeset for help on using the changeset viewer.