示例#1
0
int MTraceLoadWorkload(

  char     *Buffer,   /* I */
  int      *TraceLen, /* I (optional,modified) */
  mjob_t   *J,        /* O */
  int       Mode,     /* I */
  int      *Version)  /* O */

  {
  char *tail;
  char *head;

  char *ptr;
  char *ptr2;

  char *tok;
  char *TokPtr;

  char Line[MAX_MLINE << 4];
  char tmpLine[MAX_MLINE];

  char MHostName[MAX_MNAME];
  char MReqHList[MAX_MLINE << 2];
  char Reservation[MAX_MNAME];

  char tmpTaskRequest[MAX_MLINE];
  char tmpSpecWCLimit[MAX_MLINE];
  char tmpState[MAX_WORD];
  char tmpOpsys[MAX_WORD];
  char tmpArch[MAX_WORD];
  char tmpNetwork[MAX_WORD];
  char tmpMemCmp[MAX_WORD];
  char tmpDiskCmp[MAX_WORD];
  char tmpReqNFList[MAX_MLINE];
  char tmpClass[MAX_MLINE];
  char tmpRMName[MAX_MLINE];
  char tmpCmd[MAX_MLINE];
  char tmpRMXString[MAX_MLINE >> 2];
  char tmpParName[MAX_MNAME];
  char tmpJFlags[MAX_MLINE];
  char tmpQReq[MAX_WORD];
  char tmpASString[MAX_MLINE];
  char tmpRMemBuf[MAX_MNAME];
  char tmpRDiskBuf[MAX_MNAME];
  char tmpDSwapBuf[MAX_MNAME];
  char tmpDMemBuf[MAX_MNAME];
  char tmpDDiskBuf[MAX_MNAME];
  char UName[MAX_MNAME];
  char GName[MAX_MNAME];
  char AName[MAX_MNAME];

  char SetString[MAX_MLINE];

  unsigned long tmpQTime;
  unsigned long tmpDTime;
  unsigned long tmpSTime;
  unsigned long tmpCTime;

  unsigned long tmpSQTime;


  unsigned long tmpSDate;
  unsigned long tmpEDate;

  int      TPN;
  int      TasksAllocated;
  int      index;
  int      rc;

  int      tindex;
  int      nindex;

  mqos_t  *QDef;

  mreq_t  *RQ[MAX_MREQ_PER_JOB];

  mnode_t *N;

  long     ReqProcSpeed;

  const char *FName = "MTraceLoadWorkload";

  DBG(5,fSIM) DPrint("%s(Buffer,TraceLen,J,%d,%d)\n",
    FName,
    Mode,
    (Version != NULL) ? *Version : -1);

  if (J == NULL)
    {
    DBG(5,fSIM) DPrint("ALERT:    NULL job pointer passed in %s()\n",
      FName);

    return(FAILURE);
    }

  if (Buffer == NULL)
    {
    return(FAILURE);
    }

  if ((tail = strchr(Buffer,'\n')) == NULL)
    {
    /* assume line is '\0' terminated */

    if (TraceLen != NULL)
      tail = Buffer + *TraceLen;
    else
      tail = Buffer + strlen(Buffer);
    }

  head = Buffer;

  MUStrCpy(Line,head,MIN(tail - head + 1,sizeof(Line)));

  DBG(6,fSIM) DPrint("INFO:     parsing trace line '%s'\n",
    Line);

  if (TraceLen != NULL)
    *TraceLen = (tail - head) + 1;

  /* eliminate comments */

  if ((ptr = strchr(Line,'#')) != NULL)
    {
    DBG(5,fSIM) DPrint("INFO:     detected trace comment line, '%s'\n",
      Line);

    *ptr = '\0';
    }

  if (Line[0] == '\0')
    {
    return(FAILURE);
    }

  /* look for VERSION settings */

  if (!strncmp(Line,TRACE_WORKLOAD_VERSION_MARKER,strlen(TRACE_WORKLOAD_VERSION_MARKER)))
    {
    if (MTraceGetWorkloadVersion(Line,Version) == FAILURE)
      {
      DBG(0,fSIM) DPrint("ALERT:    cannot determine workload trace version\n");

      /* assume default version */

      if (Version != NULL)
        *Version = DEFAULT_WORKLOAD_TRACE_VERSION;
      }
    else
      {
      DBG(3,fSIM) DPrint("INFO:     workload trace version %d detected\n",
        (Version != NULL) ? *Version : -1);
      }

    return(FAILURE);
    }  /* END if (!strncmp(Line,TRACE_WORKLOAD_VERSION_MARKER)) */

  /* set default workload attributes */

  memset(J,0,sizeof(mjob_t));

  if (MReqCreate(J,NULL,&J->Req[0],FALSE) == FAILURE)
    {
    DBG(0,fSIM) DPrint("ALERT:    cannot create job req\n");

    return(FAILURE);
    }

  MRMJobPreLoad(J,NULL,0);

  RQ[0]         = J->Req[0];

  RQ[0]->Index  = 0;

  J->StartCount = 0;
  J->R          = NULL;

  switch(*Version)
    {
    case 230:

      /*                Name NR TReq User Grop WClk Stat Clss QUT DST STT CMT netw Arch Opsy MemC RMem DskC RDsk Feat SQT TA TPN QO MO AC CM CC BP PU Part DPrc DMem DDisk DSwap STARTDATE ENDDATE MNODE RM HL RES RES1 RES2 RES3 OVERFLOW */

      rc = sscanf(Line,"%64s %d %64s %64s %64s %64s %64s %64s %lu %lu %lu %lu %64s %64s %64s %64s %64s %64s %64s %64s %lu %d %d %64s %1024s %64s %1024s %1024s %d %lf %64s %d %64s %64s %64s %lu %lu %1024s %64s %1024s %64s %64s %1024s %s %s",
           J->Name,
           &J->Request.NC,
           tmpTaskRequest,
           UName,
           GName,
           tmpSpecWCLimit,
           tmpState,
           tmpClass,
           &tmpQTime,
           &tmpDTime,
           &tmpSTime,
           &tmpCTime,
           tmpNetwork,
           tmpArch,
           tmpOpsys,
           tmpMemCmp,
           tmpRMemBuf,
           tmpDiskCmp,
           tmpRDiskBuf,
           tmpReqNFList,
           &tmpSQTime,             /* SystemQueueTime */
           &TasksAllocated,        /* allocated task count */
           &TPN,
           tmpQReq,
           tmpJFlags,
           AName,
           tmpCmd,
           tmpRMXString,           /* RM extension string */
           &J->Bypass,             /* Bypass          */
           &J->PSUtilized,         /* PSUtilized      */
           tmpParName,             /* partition used  */
           &RQ[0]->DRes.Procs,
           tmpDMemBuf,
           tmpDDiskBuf,
           tmpDSwapBuf,
           &tmpSDate,
           &tmpEDate,
           MHostName,
           tmpRMName,
           MReqHList,
           Reservation,
           SetString,              /* resource sets required by job */
           tmpASString,            /* application simulator name */
           tmpLine,                /* RES1 */
           tmpLine                 /* check for too many fields */
           );

      /* fail if all fields not read in */

      switch (rc)
        {
        case 43:

          strcpy(MHostName,NONE);

          J->RM = &MRM[0];

          break;

        case 44:

          /* canonical 230 trace */

          if (isdigit(tmpRMName[0]))
            J->RM = &MRM[(int)strtol(tmpRMName,NULL,0)];
          else
            J->RM = &MRM[0];

          break;

        default:

          DBG(1,fSIM) DPrint("ALERT:    version %d workload trace is corrupt '%s' (%d of %d fields read) ignoring line\n",
            *Version,
            Line,
            rc,
            44);

          MJobDestroy(&J);

          return(FAILURE);

          /*NOTREACHED*/

          break;
        }  /* END switch(rc) */

      RQ[0]->RequiredMemory = MURSpecToL(tmpRMemBuf,mvmMega,mvmMega);
      RQ[0]->RequiredDisk   = MURSpecToL(tmpRDiskBuf,mvmMega,mvmMega);

      RQ[0]->DRes.Mem       = MURSpecToL(tmpDMemBuf,mvmMega,mvmMega);
      RQ[0]->DRes.Swap      = MURSpecToL(tmpDSwapBuf,mvmMega,mvmMega);
      RQ[0]->DRes.Disk      = MURSpecToL(tmpDDiskBuf,mvmMega,mvmMega);

      if (Mode != msmProfile)
        {
        ptr  = NULL;
        ptr2 = NULL;

        if (strcmp(tmpASString,NONE))
          {
          if ((ptr = MUStrTok(tmpASString,":",&TokPtr)) != NULL)
            {
            ptr2 = MUStrTok(NULL,"|",&TokPtr);
            }
          }

        if (MASGetDriver((void **)&J->ASFunc,ptr,msdApplication) == SUCCESS)
          {
          (*J->ASFunc)(J,mascCreate,ptr2,NULL);
          }
        }    /* END if (Mode != msmProfile) */

      /* obtain job step number from LL based job name */

      /* FORMAT:  <FROM_HOST>.<CLUSTER>.<PROC> */

      MUStrCpy(tmpLine,J->Name,sizeof(tmpLine));

      if ((ptr = MUStrTok(tmpLine,".",&TokPtr)) != NULL)
        {
        MUStrCpy(J->SubmitHost,ptr,sizeof(J->SubmitHost));

        if ((ptr = MUStrTok(NULL,".",&TokPtr)) != NULL)
          {
          J->Cluster = (int)strtol(ptr,NULL,0);

          if ((ptr = MUStrTok(NULL,".",&TokPtr)) != NULL)
            {
            J->Proc = (int)strtol(ptr,NULL,0);
            }
          }
        }

      /* adjust job flags */

      if (Mode == msmProfile)
        {
        MUStrDup(&J->MasterHostName,MHostName);

        /* NOTE:  flags may be specified as string or number */

        if ((J->SpecFlags = strtol(tmpJFlags,NULL,0)) == 0)
          {
          MUBMFromString(tmpJFlags,MJobFlags,&J->SpecFlags);
          }
        }
      else
        {
        /* simulation mode */

        if ((MSim.Flags & (1 << msimfIgnMode)) ||
            (MSim.Flags & (1 << msimfIgnAll)))
          {
          J->SpecFlags = 0;
          }
        else
          {
          /* NOTE:  flags may be specified as string or number */

          if ((J->SpecFlags = strtol(tmpJFlags,NULL,0)) == 0)
            {
            MUBMFromString(tmpJFlags,MJobFlags,&J->SpecFlags);
            }

          if (J->SysFlags & (1 << mjfBackfill))
            J->SysFlags ^= (1 << mjfBackfill);

          if ((J->SpecFlags & (1 << mjfHostList)) &&
              (MSim.Flags & (1 << msimfIgnHostList)))
            {
            J->SpecFlags ^= (1 << mjfHostList);
            }
          }
        }    /* END else (Mode == msmProfile) */

      /* extract arbitrary job attributes */

      /* FORMAT:  JATTR:<ATTR>[=<VALUE>] */

      {
      char *ptr;
      char *TokPtr;

      ptr = MUStrTok(tmpJFlags,"[]",&TokPtr);

      while (ptr != NULL)
        {
        if (!strncmp(ptr,"JATTR:",strlen("JATTR:")))
          {
          MJobSetAttr(J,mjaGAttr,(void **)(ptr + strlen("JATTR:")),0,mSet);
          }

        ptr = MUStrTok(NULL,"[]",&TokPtr);
        }  /* END while (ptr != NULL) */
      }    /* END BLOCK */

      /* set default flags, remove set 'ignore' flags */

      J->SpecFlags |= MSim.TraceDefaultJobFlags;

      for (index = 0;index < M64.INTBITS;index++)
        {
        if (!(MSim.TraceIgnoreJobFlags & (1 << index)))
          continue;

        if (!(J->SpecFlags & (1 << index)))
          continue;

        J->SpecFlags ^= (1 << index);
        }  /* END for (index) */

      /* load task info */

      if (!strncmp(tmpTaskRequest,"PBS=",strlen("PBS=")))
        {
        char tmpLine[MAX_MLINE];

        tpbsa_t tmpA;
        short   TaskList[MAX_MTASK];

        memset(&tmpA,0,sizeof(tmpA));

        sprintf(tmpLine,"Resource_List,nodes,%s",
          &tmpTaskRequest[strlen("PBS=")]);

        MPBSJobSetAttr(J,NULL,tmpLine,&tmpA,TaskList,0);

        MPBSJobAdjustResources(J,&tmpA,J->RM);
        }
      else
        {
        ptr = MUStrTok(tmpTaskRequest,",",&TokPtr);
        tindex = 1;

        while (ptr != NULL)
          {
          if (strchr(ptr,'-') != NULL)
            {
            /* NOTE:  not handled */
            }
          else
            {
            RQ[0]->TaskRequestList[tindex] = atoi(ptr);
            tindex++;
            }

          ptr = MUStrTok(NULL,",",&TokPtr);
          }

        RQ[0]->TaskRequestList[tindex] = 0;
        RQ[0]->TaskRequestList[0]      = RQ[0]->TaskRequestList[1];

        RQ[0]->TaskCount    = RQ[0]->TaskRequestList[0];

        J->Request.TC   = RQ[0]->TaskRequestList[0];

        RQ[0]->TasksPerNode = MAX(0,TPN);

        if (MPar[0].JobNodeMatch & (1 << nmExactNodeMatch))
          {
          RQ[0]->NodeCount  = RQ[0]->TaskCount;
          J->Request.NC = RQ[0]->TaskCount;
          }

        if (MPar[0].JobNodeMatch & (1 << nmExactProcMatch))
          {
          if (RQ[0]->TasksPerNode >= 1)
            {
            RQ[0]->RequiredProcs = RQ[0]->TasksPerNode;
            RQ[0]->ProcCmp       = mcmpEQ;
            }

          if (RQ[0]->TasksPerNode > J->Request.TC)
            {
            RQ[0]->TasksPerNode = 0;
            }
          }
        }    /* END BLOCK */

      /* process WCLimit constraints */

      ptr = MUStrTok(tmpSpecWCLimit,",:",&TokPtr);
      tindex = 1;

      while (ptr != NULL)
        {
        if (strchr(ptr,'-') != NULL)
          {
          /* NOTE:  not handled */
          }
        else
          {
          J->SpecWCLimit[tindex] = strtol(ptr,NULL,0);

          tindex++;
          }

        ptr = MUStrTok(NULL,",:",&TokPtr);
        }  /* END while (ptr != NULL) */

      J->SpecWCLimit[tindex] = 0;
      J->SpecWCLimit[0]      = J->SpecWCLimit[1];

      J->SubmitTime     = tmpQTime;
      J->DispatchTime   = MAX(tmpDTime,tmpSTime);
      J->StartTime      = MAX(tmpDTime,tmpSTime);
      J->CompletionTime = tmpCTime;

      J->SystemQueueTime = tmpSQTime;

      J->SpecSMinTime   = tmpSDate;
      J->CMaxTime       = tmpEDate;

      /* determine required reservation */

      if (strcmp(Reservation,NONE) &&
          strcmp(Reservation,J->Name))
        {
        J->SpecFlags |= (1 << mjfAdvReservation);

        if (strcmp(Reservation,ALL) != 0)
          {
          strcpy(J->ResName,Reservation);
          }
        }

      /* determine job class */

      if ((MSim.Flags & (1 << msimfIgnClass)) ||
          (MSim.Flags & (1 << msimfIgnAll)))
        {
        mclass_t *C;

        MClassAdd("DEFAULT",&C);

        /* ignore specified classes */

        RQ[0]->DRes.PSlot[0].count = 1;
        RQ[0]->DRes.PSlot[C->Index].count = 1;
        }
      else
        {
        int cindex;

        MUNumListFromString(RQ[0]->DRes.PSlot,tmpClass,eClass);

        for (cindex = 1;cindex < MAX_MCLASS;cindex++)
          {
          if (RQ[0]->DRes.PSlot[cindex].count > 0)
            {
            MClassAdd(MAList[eClass][cindex],NULL);
            }
          }  /* END for (cindex) */
        }

      if (Mode != msmProfile)
        {
        if ((strcmp(MReqHList,NONE) != 0) &&
           !(MSim.Flags & (1 << msimfIgnHostList)) &&
           !(MSim.Flags & (1 << msimfIgnAll)))
          {
          J->SpecFlags |= (1 << mjfHostList);

          ptr = MUStrTok(MReqHList,":",&TokPtr);

          nindex = 0;

          if (J->ReqHList == NULL)
            J->ReqHList = (mnalloc_t *)calloc(1,sizeof(mnalloc_t) * (MAX_MNODE_PER_JOB + 1));

          J->ReqHList[0].N = NULL;

          while (ptr != NULL)
            {
            if (MNodeFind(ptr,&N) == FAILURE)
              {
              if (MSched.DefaultDomain[0] != '\0')
                {
                if ((tail = strchr(ptr,'.')) != NULL)
                  {
                  /* try short name */

                  MUStrCpy(MReqHList,ptr,MIN(sizeof(MReqHList),(tail - ptr + 1)));
                  }
                else
                  {
                  /* append default domain */

                  if (MSched.DefaultDomain[0] == '.')
                    {
                    sprintf(MReqHList,"%s%s",
                      ptr,
                      MSched.DefaultDomain);
                    }
                  else
                    {
                    sprintf(MReqHList,"%s.%s",
                      ptr,
                      MSched.DefaultDomain);
                    }
                  }

                if (MNodeFind(MReqHList,&N) != SUCCESS)
                  {
                  DBG(1,fUI) DPrint("ALERT:    cannot locate node '%s' for job '%s' hostlist\n",
                    ptr,
                    J->Name);

                  N = NULL;
                  }
                }
              else
                {
                DBG(1,fUI) DPrint("ALERT:    cannot locate node '%s' for job '%s' hostlist\n",
                  ptr,
                  J->Name);

                N = NULL;
                }
              }  /* END if (MNodeFind() == FAILURE) */

            if (N != NULL)
              {
              if (J->ReqHList[nindex].N == N)
                {
                J->ReqHList[nindex].TC++;
                }
              else
                {
                if (J->ReqHList[nindex].N != NULL)
                  nindex++;

                J->ReqHList[nindex].N = N;

                J->ReqHList[nindex].TC = 1;
                }
              }

            ptr = MUStrTok(NULL,":",&TokPtr);
            }    /* END while (ptr != NULL) */

          J->ReqHList[nindex + 1].N = NULL;
          }      /* END if (strcmp(MReqHList,NONE)) */
        else if ((J->SpecFlags & (1 << mjfHostList)) &&
                 (J->ReqHList == NULL))
          {
          DBG(1,fSIM) DPrint("ALERT:    job %s requests hostlist but has no hostlist specified\n",
            J->Name);

          J->SpecFlags ^= (1 << mjfHostList);
          }
        }    /* END if (MODE != msmProfile) */

      break;

    default:

      DBG(4,fSIM) DPrint("ALERT:    cannot load version %d workload trace record\n",
        *Version);

      MJobDestroy(&J);

      return(FAILURE);

      /*NOTREACHED*/

      break;
    }    /* END switch(*Version) */

  if (!strcmp(AName,NONE) ||
      !strcmp(AName,"0"))
    {
    AName[0] = '\0';
    }

  if ((strcmp(tmpRMXString,NONE) != 0) && (strcmp(tmpRMXString,"0") != 0))
    {
    MJobSetAttr(J,mjaRMXString,(void **)tmpRMXString,mdfString,0);
    }

  if (RQ[0]->DRes.Procs == 0)
    RQ[0]->DRes.Procs = 1;

  if (J->SpecWCLimit[0] == (unsigned long)-1)
    J->SpecWCLimit[0] = MAX_MTIME;

  MUStrDup(&J->E.Cmd,tmpCmd);

  if (Mode == msmSim)
    {
    J->State           = mjsIdle;
    J->EState          = mjsIdle;
    J->Bypass          = 0;
    J->PSUtilized      = 0.0;

    J->SystemQueueTime = J->SubmitTime;
    J->SimWCTime       = MAX(1,J->CompletionTime - J->StartTime);

    J->StartTime       = 0;
    J->DispatchTime    = 0;
    J->CompletionTime  = 0;

    J->WCLimit         = J->SpecWCLimit[0];
    }
  else
    {
    /* profile mode */

    int tmpI;

    tmpI = (int)strtol(tmpParName,NULL,0);

    if (tmpI > 0) 
      {
      RQ[0]->PtIndex = tmpI;
      }
    else
      {
      mpar_t *P;

      MParFind(tmpParName,&P);

      RQ[0]->PtIndex = P->Index;
      }

    J->TaskCount = TasksAllocated;

    J->PSDedicated = MJobGetProcCount(J) * (J->CompletionTime - J->StartTime);

    J->WCLimit = J->SpecWCLimit[0];

    for (index = 0;MJobState[index] != NULL;index++)
      {
      if (!strcmp(tmpState,MJobState[index]))
        {
        J->State  = index;
        J->EState = index;

        break;
        }
      }    /* END for (index) */
    }

  if (J->State == mjsNotRun)
    {
    DBG(3,fSIM) DPrint("ALERT:    ignoring job '%s' (job never ran, state '%s')\n",
      J->Name,
      MJobState[J->State]);

    MJobDestroy(&J);

    return(FAILURE);
    }

  /* check for timestamp corruption */

  if (Mode == msmProfile)
    {
    /* profiler mode */

    if (MSched.TraceFlags & (1 << tfFixCorruption))
      {
      if (J->StartTime < J->DispatchTime)
        {
        DBG(2,fSIM) DPrint("WARNING:  fixing job '%s' with corrupt dispatch/start times (%ld < %ld)\n",
          J->Name,
          J->DispatchTime,
          J->StartTime);

        J->DispatchTime = J->StartTime;
        }

      if (J->SubmitTime > J->StartTime)
        {
        DBG(2,fSIM) DPrint("WARNING:  fixing job '%s' with corrupt queue/start times (%ld > %ld)\n",
          J->Name,
          J->SubmitTime,
          J->StartTime);

        if (J->SystemQueueTime > 0)
          J->SubmitTime = MIN(J->StartTime,J->SystemQueueTime);
        else
          J->SubmitTime = J->StartTime;
        }

      if (J->SystemQueueTime > J->DispatchTime)
        {
        DBG(2,fSIM) DPrint("WARNING:  fixing job '%s' with corrupt system queue/dispatch times (%ld > %ld)\n",
          J->Name,
          J->SystemQueueTime,
          J->DispatchTime);

        J->SystemQueueTime = J->DispatchTime;
        }

      if (J->SystemQueueTime < J->SubmitTime)
        {
        DBG(2,fSIM) DPrint("WARNING:  fixing job '%s' with corrupt system queue/queue time (%ld < %ld)\n",
          J->Name,
          J->SystemQueueTime,
          J->SubmitTime);

        J->SystemQueueTime = J->SubmitTime;
        }
      }    /* END if (MSched.TraceFlags & (1 << tfFixCorruption)) */

    if ((J->SubmitTime > J->DispatchTime) ||
        (J->DispatchTime > J->CompletionTime))
      {
      DBG(1,fSIM) DPrint("ALERT:    ignoring job '%s' with corrupt queue/start/completion times (%ld > %ld > %ld)\n",
        J->Name,
        J->SubmitTime,
        J->DispatchTime,
        J->CompletionTime);

      MJobDestroy(&J);

      return(FAILURE);
      }
    }    /* END if (Mode != msmSim) */

  if (MJobSetCreds(J,UName,GName,AName) == FAILURE)
    {
    DBG(1,fSTRUCT) DPrint("ALERT:    ignoring job '%s' with invalid authentication info (%s:%s:%s)\n",
      J->Name,
      UName,
      GName,
      AName);

    MJobDestroy(&J);

    return(FAILURE);
    }

  /* determine network */

  if (strstr(tmpNetwork,MAList[eNetwork][0]) == NULL)
    {
    RQ[0]->Network = MUMAGetIndex(eNetwork,tmpNetwork,mAdd);
    }

  if (strstr(tmpOpsys,MAList[eOpsys][0]) == NULL)
    {
    if ((RQ[0]->Opsys = MUMAGetIndex(eOpsys,tmpOpsys,mAdd)) == FAILURE)
      {
      DBG(0,fSIM) DPrint("WARNING:  cannot add opsys '%s' for job '%s'\n",
        tmpOpsys,
        J->Name);
      }
    }

  if (strstr(tmpArch,MAList[eArch][0]) == NULL)
    {
    if ((RQ[0]->Arch = MUMAGetIndex(eArch,tmpArch,mAdd)) == FAILURE)
      {
      DBG(0,fSIM) DPrint("WARNING:  cannot add arch '%s' for job '%s'\n",
        tmpArch,
        J->Name);
      }
    }

  /* load feature values */

  if ((MSched.ProcSpeedFeatureHeader[0] != '\0') &&
      (MSched.ReferenceProcSpeed > 0))
    {
    sprintf(tmpLine,"[%s",
      MSched.ProcSpeedFeatureHeader);

    if ((ptr = strstr(tmpReqNFList,tmpLine)) != NULL)
      {
      ReqProcSpeed = strtol(ptr + strlen(tmpLine),NULL,0);

      J->SimWCTime *= (long)((double)ReqProcSpeed / MSched.ReferenceProcSpeed);
      }
    }

  if ((MSched.Mode != msmSim) || !(MSim.Flags & (1 << msimfIgnFeatures)))
    {
    if (!strstr(tmpReqNFList,MAList[eFeature][0]))
      {
      tok = MUStrTok(tmpReqNFList,"[]",&TokPtr);

      do
        {
        MUGetMAttr(eFeature,tok,mAdd,RQ[0]->ReqFBM,sizeof(RQ[0]->ReqFBM));
        }
      while ((tok = MUStrTok(NULL,"[]",&TokPtr)) != NULL);
      }
    }

  RQ[0]->MemCmp    = MUCmpFromString(tmpMemCmp,NULL);
  RQ[0]->DiskCmp   = MUCmpFromString(tmpDiskCmp,NULL);

  RQ[0]->NAccessPolicy = MSched.DefaultNAccessPolicy;

  RQ[0]->TaskCount = J->Request.TC;
  RQ[0]->NodeCount = J->Request.NC;

  MJobProcessExtensionString(J,J->RMXString);

  if ((MSim.Flags & (1 << msimfIgnQOS)))
    {
    tmpQReq[0] = '\0';
    }

  {
  char *ptr;
  char *TokPtr;

  ptr = MUStrTok(tmpQReq,":",&TokPtr);

  if ((ptr != NULL) &&
      (ptr[0] != '\0') &&
       strcmp(ptr,"-1") &&
       strcmp(ptr,"0") &&
       strcmp(ptr,NONE) &&
       strcmp(ptr,DEFAULT))
    {
    if (MQOSAdd(ptr,&J->QReq) == FAILURE)
      {
      /* cannot locate requested QOS */

      MQOSFind(DEFAULT,&J->QReq);
      }

    if (Mode == msmProfile)
      {
      if ((ptr = MUStrTok(NULL,":",&TokPtr)) != NULL)
        {
        MQOSAdd(ptr,&J->Cred.Q);
        }
      }
    }    /* END if ((ptr != NULL) && ...) */
  }      /* END BLOCK */

  if (Mode == msmSim)
    {
    if ((MQOSGetAccess(J,J->QReq,NULL,&QDef) == FAILURE) ||
        (J->QReq == NULL) ||
        (J->QReq == &MQOS[0]))
      {
      MJobSetQOS(J,QDef,0);
      }
    else
      {
      MJobSetQOS(J,J->QReq,0);
      }
    }    /* END if (Mode == msmSim) */

  if ((SetString[0] != '0') && (strcmp(SetString,NONE)))
    {
    /* FORMAT:  ONEOF,FEATURE[,X:Y:Z] */

    if ((ptr = MUStrTok(SetString,",: \t",&TokPtr)) != NULL)
      {
      /* determine selection type */

      RQ[0]->SetSelection = MUGetIndex(ptr,(const char **)MResSetSelectionType,0,mrssNONE);

      if ((ptr = MUStrTok(NULL,",: \t",&TokPtr)) != NULL)
        {
        /* determine set attribute */

        RQ[0]->SetType = MUGetIndex(ptr,(const char **)MResSetAttrType,0,mrstNONE);

        index = 0;

        if ((ptr = MUStrTok(NULL,", \t",&TokPtr)) != NULL)
          {
          /* determine set list */

          while (ptr != NULL)
            {
            MUStrDup(&RQ[0]->SetList[index],ptr);

            index++;

            ptr = MUStrTok(NULL,", \t",&TokPtr);
            }
          }

        RQ[0]->SetList[index] = NULL;
        }  /* END if ((ptr = MUStrTok(NULL)) != NULL) */
      }    /* END if ((ptr = MUStrTok(SetString)) != NULL) */
    }      /* END if ((SetString[0] != '0') && (strcmp(SetString,NONE))) */

  if (MJobEval(J) == FAILURE)
    {
    /* job not properly formed */

    DBG(1,fSTRUCT) DPrint("ALERT:    ignoring job '%s' with corrupt configuration\n",
      J->Name);

    MJobDestroy(&J);

    return(FAILURE);
    }

  DBG(6,fSIM) DPrint("INFO:     job '%s' loaded.  class: %s  opsys: %s  arch: %s\n",
    J->Name,
    MUCAListToString(RQ[0]->DRes.PSlot,NULL,NULL),
    MAList[eOpsys][RQ[0]->Opsys],
    MAList[eArch][RQ[0]->Arch]);

  return(SUCCESS);
  }  /* END MTraceLoadWorkload() */
示例#2
0
文件: MRM_CP.c 项目: dhh1128/cbase
int MRMLoadJobCP(

  mrm_t   *R,       /* I */
  mjob_t  *J,       /* I (modified) */
  char    *EMsg)    /* O (optional,minsize=MMAX_LINE) */

  {
  mbool_t IsFound = FALSE;

  mdb_t *MDBInfo;

  MSchedGetAttr(msaDBHandle,mdfNONE,(void **)&MDBInfo,-1);

  if ((R == NULL) ||
      (J == NULL))
    {
    return(FAILURE);
    }

  if (EMsg != NULL)
    {
    EMsg[0] = '\0';
    }

  if (bmisset(&R->Flags,mrmfFullCP) || 
      bmisset(&R->IFlags,mrmifLocalQueue) || 
     (R->Type == mrmtMoab))
    {
    IsFound = TRUE;

    if ((MCP.UseDatabase == FALSE) || (MDBInfo->DBType == mdbNONE))
      {
      char *CPFile=NULL;  /* NULL it: returned pointer to read buffer */

      /* Load full job record from spool file */
   
      char FileName[MMAX_PATH_LEN];

      MCPGetCPFileName(J,FileName,sizeof(FileName));

      /* note - MFULoadAllocReadBuffer does the MUCalloc on CPFile.
                This routine must do the MUFree on CPFile 
                ON SUCCESS ONLY. */

      if (MFULoadAllocReadBuffer(FileName,&CPFile,EMsg,NULL) == FAILURE)
        {
        IsFound = FALSE;
   
        if (J->Req[0] == NULL)
          {
          /* job is invalid */
   
          MDB(1,fRM) MLog("ERROR:    cannot load job checkpoint file for job %s - %s\n",
            J->Name,
            EMsg);
   
          return(FAILURE);
          }
        }
      else if (MJobLoadCPNew(J,CPFile) == FAILURE)
        {
        IsFound = FALSE;
   
        if (bmisset(&J->SpecFlags,mjfNoRMStart))
          {
          if (EMsg != NULL)
            strcpy(EMsg,"cannot load job checkpoint info");
   
          MDB(1,fRM) MLog("ERROR:    cannot load job checkpoint info for job %s\n",
            J->Name);
   
          /* MFULoadAllocReadBuffer was successful, so free its read buffer it passed back */
          MUFree(&CPFile);

          return(FAILURE);
          }    /* END if (bmisset(&J->SpecFlags,mjfNoRMStart)) */
        }      /* END else if (MJobLoadCPNew(J,CPFile) == FAILURE) */

      /* Free the read buffer which was allocated */
      MUFree(&CPFile);
      }        /* END if ((MCP.UseDatabase == FALSE) || (MDBInfo->DBType == mdbNONE)) */
    else
      {
      mstring_t MString(MMAX_BUFFER);
      int queryRC;
      int loadRC = FAILURE;

      /* check database for job, create new entry if job does not exist in database */

      queryRC = MDBQueryCP(MDBInfo,(char *)MCPType[mcpJob],J->Name,NULL,1,&MString,NULL);

      if (queryRC == SUCCESS)
        loadRC = MJobLoadCPNew(J,MString.c_str());

      if ((queryRC == SUCCESS) && (loadRC == FAILURE))
        {
        IsFound = FALSE;
   
        if (bmisset(&J->SpecFlags,mjfNoRMStart))
          {
          if (EMsg != NULL)
            strcpy(EMsg,"cannot load job checkpoint info");
   
          MDB(1,fRM) MLog("ERROR:    cannot load job checkpoint info for job %s\n",
            J->Name);
   
          return(FAILURE);
          }
        }
      else
        {
        IsFound = FALSE;

        /* MOCheckpoint(mxoJob,(void *)J,TRUE,NULL); -- why are we doing this?
         * this function is called in MRMJobPostLoad and we checkpoint in there as well */
        }
      }
    }        /* END if (bmisset(&R->Flags,mrmfFullCP) || ... */
  else if ((MSched.Iteration == 0) ||
           (MSched.EnableHighThroughput == FALSE) ||
           bmisset(&R->IFlags,mrmifLocalQueue))
    {
    /* in high throughput mode, only look in checkpoint record on iteration 0 */

    char tmpName[MMAX_NAME];

    if (J->SRMJID != NULL)
      {
      /* use source RM job id over job name */

      MJobGetName(NULL,J->SRMJID,R,tmpName,sizeof(tmpName),mjnShortName);
      }
    else
      {
      MUStrCpy(tmpName,J->Name,sizeof(tmpName));
      }

    MCPRestore(mcpJob,tmpName,(void *)J,&IsFound);
     
    /* Looking up the job name by SRMJID in the checkpoint file may not be correct.
     * The job may have been stored with J->Name instead of SRMJID.
     * If so, the check above could fail to load the job from the checkpoint, so we
     * would lose the information that was checkpointed.
     * Thus we try again if MCPRestore fails.
     */

    if ((IsFound == FALSE) && 
        (J->SRMJID != NULL) && 
        (strcmp(J->SRMJID,tmpName)))
      {
      MUStrCpy(tmpName,J->Name,sizeof(tmpName));

      MCPRestore(mcpJob,tmpName,(void *)J,&IsFound);
      }

    if (IsFound == FALSE)
      {
      MDB(4,fRM) MLog("INFO:     job checkpoint info for job %s (%s) not found\n",
        J->Name,
        (J->SRMJID == NULL) ? "" : J->SRMJID);
      } 
    }    /* END else (bmisset(&R->Flags,mrmfFullCP) || ...) */

  /* need to process RM ext. string from newly checkpointed jobs */

  if (IsFound == TRUE)
    {
#if 0
    char *FirstPartition;
    char *LastPartition;
    char *SearchString = "partition:";
#endif

    /* mark this job as being loaded from the checkpoint file so
     * routines know to handle some data differently */
    
    bmset(&J->IFlags,mjifWasLoadedFromCP);

#if 0
    /* WARNING - this code is a temporary bug fix for the llnl 5.2 branch only
       Do not port this code to other branches
       The fix is for ticket 5097 - if we discover how the ALL partition list (including internal)
       is being written to the checkpoint file we can remove this code. */

    /* Due to a bug in the client software we could have the default submit partition and a partition specified in the command
       script show up in the RMXString as follows...
       
       "x=partition:g02;partition:g04"
       
       Since llnl does not want to update all of their client systems we are putting this temporary fix in the moab code
       that reads in the checkpoint records to remove the first partition entry if multiple partition entries are found.
       This is a legal syntax, but we are assuming that we have encoutnered it due to the client bug.

       Note that if a user specifies multiple partitions that the RMXString has the format "x=partition:g02:g04" */
 
    if ((J->RMXString != NULL) &&
        ((FirstPartition = MUStrStr(J->RMXString,SearchString,0,TRUE,FALSE)) != NULL))
      {
      /* See if there is another partition:xyz in the RMXString */

      if ((LastPartition = MUStrStr(FirstPartition + strlen(SearchString),SearchString,0,TRUE,TRUE)) != NULL)
        {
        /* we assume that all the partitions are grouped together - x=partition:g02;partition:g03;partition:g04; */

        while (*LastPartition != '\0')
          {
          *FirstPartition++ = *LastPartition++;
          }
        *FirstPartition = '\0';
        }
      }
    /* END WARNING */
#endif

    MJobProcessExtensionString(J,J->RMXString,mxaNONE,NULL,NULL);
    }

  return(SUCCESS);
  }  /* END MRMLoadJobCP() */