IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 29421


Ignore:
Timestamp:
Oct 14, 2010, 3:15:30 PM (16 years ago)
Author:
eugene
Message:

add -xhost option to limit load on hosts other than the target host

Location:
branches/eam_branches/ipp-20100823/Ohana/src/opihi/pcontrol
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/eam_branches/ipp-20100823/Ohana/src/opihi/pcontrol/CheckIdleHost.c

    r28158 r29421  
    6060    if (strcasecmp (job[0].hostname, host[0].hostname)) continue;
    6161
     62    if (!CheckMachineJobs (host, job)) continue;
     63
    6264    /* we have found an appropriate job; link it to the host and send to StartJob */
    6365    job[0].host = (struct Host *) host;
     
    8082    ASSERT (job[0].hostname != NULL, "WANTHOST hostname missing");
    8183    if (strcasecmp (job[0].hostname, host[0].hostname)) continue;
     84
     85    if (!CheckMachineJobs (host, job)) continue;
    8286
    8387    /* we have found an appropriate job; link it to the host and send to StartJob */
  • branches/eam_branches/ipp-20100823/Ohana/src/opihi/pcontrol/JobOps.c

    r28242 r29421  
    195195}
    196196
    197 IDtype AddJob (char *hostname, JobMode mode, int timeout, int argc, char **argv) {
     197IDtype AddJob (char *hostname, JobMode mode, int timeout, int argc, char **argv, int Nxhosts, char **xhosts) {
    198198
    199199  int JobID;
     
    228228  job[0].host     = NULL;
    229229
     230  job[0].xhosts = xhosts;
     231  job[0].Nxhosts = Nxhosts;
     232
    230233  JobID = job[0].JobID;
    231234
     
    257260  FREE (job[0].argv);
    258261
     262  for (i = 0; i < job[0].Nxhosts; i++) {
     263    FREE (job[0].xhosts[i]);
     264  }
     265  FREE (job[0].xhosts);
     266
    259267  FreeIOBuffer (&job[0].stdout_buf.buffer);
    260268  FreeIOBuffer (&job[0].stderr_buf.buffer);
  • branches/eam_branches/ipp-20100823/Ohana/src/opihi/pcontrol/MachineOps.c

    r26521 r29421  
    11# include "pcontrol.h"
     2# define DEBUG 0
    23
    34static int MAX_UNWANTED_HOST_JOBS = 5;
     
    107108int AddMachineJob (Host *host, Job *job) {
    108109
     110  int i;
    109111  Machine *machine;
    110112
     
    114116  machine[0].NjobsRealhost ++;
    115117
    116   // skip jobs that do not have a targeted host
    117   if (job[0].hostname == NULL) {
    118     return (TRUE);
     118  // skip jobs that do not have a targeted host or any xhosts
     119  if (!job[0].hostname && !job[0].Nxhosts) {
     120    return (TRUE);
     121  }
     122
     123  for (i = 0; i < job[0].Nxhosts; i++) {
     124    // find machine matching the xhost name (these count against the unwanted host total)
     125    machine = FindMachineByName (job[0].xhosts[i]); // Can this fail?
     126    if (!machine) continue;
     127    machine[0].NjobsWanthost ++;
    119128  }
    120129
     
    134143int DelMachineJob (Host *host, Job *job) {
    135144
     145  int i;
    136146  Machine *machine;
    137147
     
    141151  machine[0].NjobsRealhost --;
    142152
    143   // skip jobs that do not have a targeted host
    144   if (job[0].hostname == NULL) {
    145     return (TRUE);
     153  // skip jobs that do not have a targeted host or any xhosts
     154  if (!job[0].hostname && !job[0].Nxhosts) {
     155    return (TRUE);
     156  }
     157
     158  for (i = 0; i < job[0].Nxhosts; i++) {
     159    // find machine matching the xhost name (these count against the unwanted host total)
     160    machine = FindMachineByName (job[0].xhosts[i]); // Can this fail?
     161    if (!machine) continue;
     162    machine[0].NjobsWanthost --;
    146163  }
    147164
     
    175192int CheckMachineJobs (Host *host, Job *job) {
    176193
     194  int i;
    177195  Machine *machine;
    178196
    179197  machine = FindMachineByName (job[0].hostname);
    180   if (machine == NULL) return (TRUE);
    181   // fprintf (stderr, "wanthost: %s, Ntotal: %d, Nmax: %d\n", job[0].hostname, machine[0].NjobsWanthost + machine[0].NjobsRealhost, machine[0].Nhosts + MAX_UNWANTED_HOST_JOBS);
    182   // if (machine[0].NjobsWanthost + machine[0].NjobsRealhost >= machine[0].Nhosts + MAX_UNWANTED_HOST_JOBS) {
    183   if (machine[0].NjobsWanthost >= MAX_UNWANTED_HOST_JOBS) {
    184     // fprintf (stderr, "too many outstanding jobs wanting host %s, delay job %s for now\n", job[0].hostname, job[0].argv[0]);
    185     return (FALSE);
    186   }
     198  if (machine) {
     199    if (DEBUG) fprintf (stderr, "wanthost: %s, Ntotal: %d, Nmax: %d\n", machine[0].name, machine[0].NjobsWanthost + machine[0].NjobsRealhost, machine[0].Nhosts + MAX_UNWANTED_HOST_JOBS);
     200    if (machine[0].NjobsWanthost >= MAX_UNWANTED_HOST_JOBS) {
     201      if (DEBUG) fprintf (stderr, "too many outstanding jobs wanting host %s, delay job %s for now\n", machine[0].name, job[0].argv[0]);
     202      return (FALSE);
     203    }
     204  }
     205
     206  for (i = 0; i < job[0].Nxhosts; i++) {
     207    machine = FindMachineByName (job[0].xhosts[i]);
     208    if (machine) {
     209      if (DEBUG) fprintf (stderr, "xhost: %s, Ntotal: %d, Nmax: %d\n", machine[0].name, machine[0].NjobsWanthost + machine[0].NjobsRealhost, machine[0].Nhosts + MAX_UNWANTED_HOST_JOBS);
     210      if (machine[0].NjobsWanthost >= MAX_UNWANTED_HOST_JOBS) {
     211        if (DEBUG) fprintf (stderr, "too many outstanding jobs wanting host %s, delay job %s for now\n", machine[0].name, job[0].argv[0]);
     212        return (FALSE);
     213      }
     214    }
     215  }   
    187216
    188217  machine = FindMachineByName (host[0].hostname);
    189   // fprintf (stderr, "realhost: %s, Ntotal: %d, Nmax: %d\n", host[0].hostname, machine[0].NjobsWanthost + machine[0].NjobsRealhost, machine[0].Nhosts + MAX_UNWANTED_HOST_JOBS);
    190   if (machine[0].NjobsWanthost >= MAX_UNWANTED_HOST_JOBS) {
    191     // fprintf (stderr, "too many outstanding jobs wanting host %s, delay job %s for now\n", job[0].hostname, job[0].argv[0]);
    192     return (FALSE);
    193   }
     218  if (machine) {
     219    if (DEBUG) fprintf (stderr, "realhost: %s, Ntotal: %d, Nmax: %d\n", machine[0].name, machine[0].NjobsWanthost + machine[0].NjobsRealhost, machine[0].Nhosts + MAX_UNWANTED_HOST_JOBS);
     220    if (machine[0].NjobsWanthost >= MAX_UNWANTED_HOST_JOBS) {
     221      if (DEBUG) fprintf (stderr, "too many outstanding jobs wanting host %s, delay job %s for now\n", machine[0].name, job[0].argv[0]);
     222      return (FALSE);
     223    }
     224  }
     225
    194226  return (TRUE);
    195227}
  • branches/eam_branches/ipp-20100823/Ohana/src/opihi/pcontrol/job.c

    r23586 r29421  
    66  int i, N, Mode, targc, Timeout;
    77  IDtype JobID;
     8  char **xhosts;
     9  int Nxhosts, NXHOSTS;
    810
    911  if (get_argument (argc, argv, "-host") && get_argument (argc, argv, "+host")) {
     
    3941  }
    4042
     43  xhosts = NULL;
     44  Nxhosts = 0;
     45  NXHOSTS = 10;
     46  while ((N = get_argument (argc, argv, "-xhost"))) {
     47    if (xhosts == NULL) {
     48      ALLOCATE (xhosts, char *, NXHOSTS);
     49    }
     50    remove_argument (N, &argc, argv);
     51    xhosts[Nxhosts] = strcreate (argv[N]);
     52    remove_argument (N, &argc, argv);
     53    Nxhosts ++;
     54    if (Nxhosts == NXHOSTS) {
     55      NXHOSTS += 10;
     56      REALLOCATE (xhosts, char *, NXHOSTS);
     57    }
     58  }
     59
    4160  if (argc < 2) {
    4261    FREE (Host);
     
    5170
    5271  // a JobID < 0 mean the job was not accepted
    53   JobID = AddJob (Host, Mode, Timeout, targc, targv);
     72  JobID = AddJob (Host, Mode, Timeout, targc, targv, Nxhosts, xhosts);
    5473  gprint (GP_LOG, "JobID: %d\n", (int) JobID);
    5574  return (TRUE);
     
    5776 usage:
    5877    gprint (GP_ERR, "USAGE: job [options] (arg0) (arg1) ... (argN)\n");
    59     gprint (GP_ERR, "  options: -host, +host, -timeout\n");
     78    gprint (GP_ERR, "  options: -host, +host, -timeout, -xhost (host)\n");
    6079    gprint (GP_ERR, "  arguments of the form @MAX_THREADS@ will be replaced when the job is launched\n");
     80
     81    FREE (Host);
     82    for (i = 0; i < Nxhosts; i++) {
     83      FREE (xhosts[i]);
     84    }
     85    FREE (xhosts);
    6186    return (FALSE);
    6287}
  • branches/eam_branches/ipp-20100823/Ohana/src/opihi/pcontrol/test/machines.sh

    r28158 r29421  
    1010  host add ipp022
    1111  host add ipp022
     12  host add ipp002
     13  host off ipp002
    1214
    1315  machines
     
    2325  job -host pikake sleep 10
    2426  job -host pikake sleep 10
     27end
     28
     29macro load.xhost
     30  job -host pikake -xhost ipp002 sleep 10
     31  job -host pikake -xhost ipp002 sleep 10
     32  job -host ipp022 -xhost ipp002 sleep 10
     33  job -host ipp022 -xhost ipp002 sleep 10
     34  job -host pikake -xhost ipp002 sleep 10
     35  job -host pikake -xhost ipp002 sleep 10
     36  job -host ipp022 -xhost ipp002 sleep 10
     37  job -host ipp022 -xhost ipp002 sleep 10
     38end
     39
     40macro load.2xhost
     41  job -host pikake -xhost ipp002 -xhost ipp022 sleep 10
     42  job -host pikake -xhost ipp002 -xhost ipp022 sleep 10
     43  job -host ipp022 -xhost ipp002 -xhost ipp022 sleep 10
     44  job -host ipp022 -xhost ipp002 -xhost ipp022 sleep 10
     45  job -host pikake -xhost ipp002 -xhost ipp022 sleep 10
     46  job -host pikake -xhost ipp002 -xhost ipp022 sleep 10
     47  job -host ipp022 -xhost ipp002 -xhost ipp022 sleep 10
     48  job -host ipp022 -xhost ipp002 -xhost ipp022 sleep 10
    2549end
    2650
Note: See TracChangeset for help on using the changeset viewer.