static AvahiProbeJob* find_history_job(AvahiProbeScheduler *s, AvahiRecord *record) { AvahiProbeJob *pj; assert(s); assert(record); for (pj = s->history; pj; pj = pj->jobs_next) { assert(pj->done); if (avahi_record_equal_no_ttl(pj->record, record)) { /* Check whether this entry is outdated */ if (avahi_age(&pj->delivery) > AVAHI_PROBE_HISTORY_MSEC*1000) { /* it is outdated, so let's remove it */ job_free(s, pj); return NULL; } return pj; } } return NULL; }
int job_reap( bool interactive ) { ASSERT_IS_MAIN_THREAD(); job_t *jnext; int found=0; static int locked = 0; locked++; /* job_read may fire an event handler, we do not want to call ourselves recursively (to avoid infinite recursion). */ if( locked>1 ) return 0; job_iterator_t jobs; jnext = jobs.next(); while (jnext) { job_t *j = jnext; jnext = jobs.next(); process_t *p; /* If we are reaping only jobs who do not need status messages sent to the console, do not consider reaping jobs that need status messages */ if( (!job_get_flag( j, JOB_SKIP_NOTIFICATION ) ) && (!interactive) && (!job_get_flag( j, JOB_FOREGROUND ))) { continue; } for( p=j->first_process; p; p=p->next ) { int s; if( !p->completed ) continue; if( !p->pid ) continue; s = p->status; proc_fire_event( L"PROCESS_EXIT", EVENT_EXIT, p->pid, ( WIFSIGNALED(s)?-1:WEXITSTATUS( s )) ); if( WIFSIGNALED(s) ) { /* Ignore signal SIGPIPE.We issue it ourselves to the pipe writer when the pipe reader dies. */ if( WTERMSIG(s) != SIGPIPE ) { int proc_is_job = ((p==j->first_process) && (p->next == 0)); if( proc_is_job ) job_set_flag( j, JOB_NOTIFIED, 1 ); if( !job_get_flag( j, JOB_SKIP_NOTIFICATION ) ) { if( proc_is_job ) fwprintf( stdout, _( L"%ls: Job %d, \'%ls\' terminated by signal %ls (%ls)" ), program_name, j->job_id, j->command_wcstr(), sig2wcs(WTERMSIG(p->status)), signal_get_desc( WTERMSIG(p->status) ) ); else fwprintf( stdout, _( L"%ls: Process %d, \'%ls\' from job %d, \'%ls\' terminated by signal %ls (%ls)" ), program_name, p->pid, p->argv0(), j->job_id, j->command_wcstr(), sig2wcs(WTERMSIG(p->status)), signal_get_desc( WTERMSIG(p->status) ) ); tputs(clr_eol,1,&writeb); fwprintf (stdout, L"\n" ); found=1; } /* Clear status so it is not reported more than once */ p->status = 0; } } } /* If all processes have completed, tell the user the job has completed and delete it from the active job list. */ if( job_is_completed( j ) ) { if( !job_get_flag( j, JOB_FOREGROUND) && !job_get_flag( j, JOB_NOTIFIED ) && !job_get_flag( j, JOB_SKIP_NOTIFICATION ) ) { format_job_info( j, _( L"ended" ) ); found=1; } proc_fire_event( L"JOB_EXIT", EVENT_EXIT, -j->pgid, 0 ); proc_fire_event( L"JOB_EXIT", EVENT_JOB_ID, j->job_id, 0 ); job_free(j); } else if( job_is_stopped( j ) && !job_get_flag( j, JOB_NOTIFIED ) ) { /* Notify the user about newly stopped jobs. */ if( !job_get_flag( j, JOB_SKIP_NOTIFICATION ) ) { format_job_info( j, _( L"stopped" ) ); found=1; } job_set_flag( j, JOB_NOTIFIED, 1 ); } } if( found ) fflush( stdout ); locked = 0; return found; }
int main(int argc, char** args) { char* default_configfn = "astrometry.cfg"; char* default_config_path = "../etc"; int c; char* configfn = NULL; int i; engine_t* engine; char* mydir = NULL; char* basedir = NULL; char* me; anbool help = FALSE; sl* strings = sl_new(4); char* cancelfn = NULL; char* solvedfn = NULL; int loglvl = LOG_MSG; anbool tostderr = FALSE; char* infn = NULL; FILE* fin = NULL; anbool fromstdin = FALSE; bl* opts = opts_from_array(myopts, sizeof(myopts)/sizeof(an_option_t), NULL); sl* inds = sl_new(4); char* datalog = NULL; engine = engine_new(); while (1) { c = opts_getopt(opts, argc, args); if (c == -1) break; switch (c) { case 'D': datalog = optarg; break; case 'p': engine->inparallel = TRUE; break; case 'i': sl_append(inds, optarg); break; case 'd': basedir = optarg; break; case 'f': infn = optarg; fromstdin = streq(infn, "-"); break; case 'E': tostderr = TRUE; break; case 'h': help = TRUE; break; case 'v': loglvl++; break; case 's': solvedfn = optarg; case 'C': cancelfn = optarg; break; case 'c': configfn = strdup(optarg); break; case '?': break; default: printf("Unknown flag %c\n", c); exit( -1); } } if (optind == argc && !infn) { // Need extra args: filename printf("You must specify at least one input file!\n\n"); help = TRUE; } if (help) { print_help(args[0], opts); exit(0); } bl_free(opts); gslutils_use_error_system(); log_init(loglvl); if (tostderr) log_to(stderr); if (datalog) { datalogfid = fopen(datalog, "wb"); if (!datalogfid) { SYSERROR("Failed to open data log file \"%s\" for writing", datalog); return -1; } atexit(close_datalogfid); data_log_init(100); data_log_enable_all(); data_log_to(datalogfid); data_log_start(); } if (infn) { logverb("Reading input filenames from %s\n", (fromstdin ? "stdin" : infn)); if (!fromstdin) { fin = fopen(infn, "rb"); if (!fin) { ERROR("Failed to open file %s for reading input filenames", infn); exit(-1); } } else fin = stdin; } // directory containing the 'engine' executable: me = find_executable(args[0], NULL); if (!me) me = strdup(args[0]); mydir = sl_append(strings, dirname(me)); free(me); // Read config file if (!configfn) { int i; sl* trycf = sl_new(4); sl_appendf(trycf, "%s/%s/%s", mydir, default_config_path, default_configfn); // if I'm in /usr/bin, look for config file in /etc if (streq(mydir, "/usr/bin")) { sl_appendf(trycf, "/etc/%s", default_configfn); } sl_appendf(trycf, "%s/%s", mydir, default_configfn); sl_appendf(trycf, "./%s", default_configfn); sl_appendf(trycf, "./%s/%s", default_config_path, default_configfn); for (i=0; i<sl_size(trycf); i++) { char* cf = sl_get(trycf, i); if (file_exists(cf)) { configfn = strdup(cf); logverb("Using config file \"%s\"\n", cf); break; } else { logverb("Config file \"%s\" doesn't exist.\n", cf); } } if (!configfn) { char* cflist = sl_join(trycf, "\n "); logerr("Couldn't find config file: tried:\n %s\n", cflist); free(cflist); } sl_free2(trycf); } if (!streq(configfn, "none")) { if (engine_parse_config_file(engine, configfn)) { logerr("Failed to parse (or encountered an error while interpreting) config file \"%s\"\n", configfn); exit( -1); } } if (sl_size(inds)) { // Expand globs. for (i=0; i<sl_size(inds); i++) { char* s = sl_get(inds, i); glob_t myglob; int flags = GLOB_TILDE | GLOB_BRACE; if (glob(s, flags, NULL, &myglob)) { SYSERROR("Failed to expand wildcards in index-file path \"%s\"", s); exit(-1); } for (c=0; c<myglob.gl_pathc; c++) { if (engine_add_index(engine, myglob.gl_pathv[c])) { ERROR("Failed to add index \"%s\"", myglob.gl_pathv[c]); exit(-1); } } globfree(&myglob); } } if (!pl_size(engine->indexes)) { logerr("\n\n" "---------------------------------------------------------------------\n" "You must list at least one index in the config file (%s)\n\n" "See http://astrometry.net/use.html about how to get some index files.\n" "---------------------------------------------------------------------\n" "\n", configfn); exit(-1); } if (engine->minwidth <= 0.0 || engine->maxwidth <= 0.0) { logerr("\"minwidth\" and \"maxwidth\" in the config file %s must be positive!\n", configfn); exit(-1); } free(configfn); if (!il_size(engine->default_depths)) { parse_depth_string(engine->default_depths, "10 20 30 40 50 60 70 80 90 100 " "110 120 130 140 150 160 170 180 190 200"); } engine->cancelfn = cancelfn; engine->solvedfn = solvedfn; i = optind; while (1) { char* jobfn; job_t* job; struct timeval tv1, tv2; if (infn) { // Read name of next input file to be read. logverb("\nWaiting for next input filename...\n"); jobfn = read_string_terminated(fin, "\n\r\0", 3, FALSE); if (strlen(jobfn) == 0) break; } else { if (i == argc) break; jobfn = args[i]; i++; } gettimeofday(&tv1, NULL); logmsg("Reading file \"%s\"...\n", jobfn); job = engine_read_job_file(engine, jobfn); if (!job) { ERROR("Failed to read job file \"%s\"", jobfn); exit(-1); } if (basedir) { logverb("Setting job's output base directory to %s\n", basedir); job_set_output_base_dir(job, basedir); } if (engine_run_job(engine, job)) logerr("Failed to run_job()\n"); job_free(job); gettimeofday(&tv2, NULL); logverb("Spent %g seconds on this field.\n", millis_between(&tv1, &tv2)/1000.0); } engine_free(engine); sl_free2(strings); sl_free2(inds); if (fin && !fromstdin) fclose(fin); return 0; }
void job_purge( job *pjob) /* M */ { static char id[] = "job_purge"; job_file_delete_info *jfdi; jfdi = malloc(sizeof(job_file_delete_info)); if (jfdi == NULL) { log_err(ENOMEM,id,"No space to allocate info for job file deletion"); return; } /* initialize struct information */ if (pjob->ji_flags & MOM_HAS_TMPDIR) { jfdi->has_temp_dir = TRUE; pjob->ji_flags &= ~MOM_HAS_TMPDIR; } else jfdi->has_temp_dir = FALSE; if (pjob->ji_flags & MOM_HAS_NODEFILE) { jfdi->has_node_file = TRUE; pjob->ji_flags &= ~MOM_HAS_NODEFILE; } else jfdi->has_node_file = FALSE; strcpy(jfdi->jobid,pjob->ji_qs.ji_jobid); strcpy(jfdi->prefix,pjob->ji_qs.ji_fileprefix); if ((pjob->ji_wattr[JOB_ATR_checkpoint_dir].at_flags & ATR_VFLAG_SET) && (pjob->ji_wattr[JOB_ATR_checkpoint_name].at_flags & ATR_VFLAG_SET)) jfdi->checkpoint_dir = strdup(pjob->ji_wattr[JOB_ATR_checkpoint_dir].at_val.at_str); else jfdi->checkpoint_dir = NULL; jfdi->gid = pjob->ji_qs.ji_un.ji_momt.ji_exgid; jfdi->uid = pjob->ji_qs.ji_un.ji_momt.ji_exuid; if (thread_unlink_calls == TRUE) enqueue_threadpool_request(delete_job_files,jfdi); else delete_job_files(jfdi); /* remove this job from the global queue */ delete_link(&pjob->ji_jobque); delete_link(&pjob->ji_alljobs); if (LOGLEVEL >= 6) { sprintf(log_buffer,"removing job"); log_record(PBSEVENT_DEBUG,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buffer); } #if IBM_SP2==2 /* IBM SP PSSP 3.1 */ unload_sp_switch(pjob); #endif /* IBM SP */ job_free(pjob); /* if no jobs are left, check if MOM should be restarted */ if (((job *)GET_NEXT(svr_alljobs)) == NULL) MOMCheckRestart(); } /* END job_purge() */
static void elapse_callback(AVAHI_GCC_UNUSED AvahiTimeEvent *e, void* data) { AvahiProbeJob *pj = data, *next; AvahiProbeScheduler *s; AvahiDnsPacket *p; unsigned n; assert(pj); s = pj->scheduler; if (pj->done) { /* Lets remove it from the history */ job_free(s, pj); return; } if (!(p = avahi_dns_packet_new_query(s->interface->hardware->mtu))) return; /* OOM */ n = 1; /* Add the import probe */ if (!packet_add_probe_query(s, p, pj)) { size_t size; AvahiKey *k; int b; avahi_dns_packet_free(p); /* The probe didn't fit in the package, so let's allocate a larger one */ size = avahi_key_get_estimate_size(pj->record->key) + avahi_record_get_estimate_size(pj->record) + AVAHI_DNS_PACKET_HEADER_SIZE; if (size > AVAHI_DNS_PACKET_SIZE_MAX) size = AVAHI_DNS_PACKET_SIZE_MAX; if (!(p = avahi_dns_packet_new_query(size))) return; /* OOM */ if (!(k = avahi_key_new(pj->record->key->name, pj->record->key->clazz, AVAHI_DNS_TYPE_ANY))) { avahi_dns_packet_free(p); return; /* OOM */ } b = avahi_dns_packet_append_key(p, k, 0) && avahi_dns_packet_append_record(p, pj->record, 0, 0); avahi_key_unref(k); if (b) { avahi_dns_packet_set_field(p, AVAHI_DNS_FIELD_NSCOUNT, 1); avahi_dns_packet_set_field(p, AVAHI_DNS_FIELD_QDCOUNT, 1); avahi_interface_send_packet(s->interface, p); } else avahi_log_warn("Probe record too large, cannot send"); avahi_dns_packet_free(p); job_mark_done(s, pj); return; } /* Try to fill up packet with more probes, if available */ for (pj = s->jobs; pj; pj = pj->jobs_next) { if (pj->chosen) continue; if (!packet_add_probe_query(s, p, pj)) break; n++; } avahi_dns_packet_set_field(p, AVAHI_DNS_FIELD_QDCOUNT, n); n = 0; /* Now add the chosen records to the authorative section */ for (pj = s->jobs; pj; pj = next) { next = pj->jobs_next; if (!pj->chosen) continue; if (!avahi_dns_packet_append_record(p, pj->record, 0, 0)) { /* avahi_log_warn("Bad probe size estimate!"); */ /* Unmark all following jobs */ for (; pj; pj = pj->jobs_next) pj->chosen = 0; break; } job_mark_done(s, pj); n ++; } avahi_dns_packet_set_field(p, AVAHI_DNS_FIELD_NSCOUNT, n); /* Send it now */ avahi_interface_send_packet(s->interface, p); avahi_dns_packet_free(p); }
void* job_or_resv_recov_fs(char *filename, int objtype) { int fds; job *pj; void *pobj = NULL; #ifndef PBS_MOM resc_resv *presv; #endif void *p_fixed = NULL; int fixed_size; char *prefix = NULL; char *path = NULL; char *err_msg; char *ptcs; /*text control string for err msg*/ char *pobjID = NULL; char *pn; /*name of the file "root" (prefix)*/ attribute *wattr = NULL; attribute_def *p_attr_def = NULL; int final_attr; int attr_unkn; char namebuf[MAXPATHLEN]; char err_buf[80]; if (objtype == RESC_RESV_OBJECT) { #ifndef PBS_MOM /*MOM doesn't know about resource reservations*/ presv = resc_resv_alloc(); /* allocate & init resc_rescv struct */ if (presv == (resc_resv *)0) { return ((void *)0); } pobj = (void *)presv; path = path_resvs; err_msg = "error opening reservation file"; ptcs = "reservation Id %s does not match file name for %s"; pobjID = presv->ri_qs.ri_resvID; p_fixed = (void *)&presv->ri_qs; fixed_size = sizeof(struct resvfix); prefix = presv->ri_qs.ri_fileprefix; p_attr_def = resv_attr_def; wattr = presv->ri_wattr; attr_unkn = RESV_ATR_UNKN; final_attr = RESV_ATR_LAST; #else /* PBS_MOM only: This will never come here for MOM!!! */ return ((void *)0); #endif } else { pj = job_alloc(); /* allocate & initialize job struct */ if (pj == (job *)0) { return ((void *)0); } pobj = (void *)pj; path = path_jobs; err_msg = "error opening job file"; ptcs = "Job Id %s does not match file name for %s"; pobjID = pj->ji_qs.ji_jobid; p_fixed = (void *)&pj->ji_qs; fixed_size = sizeof(struct jobfix); if (*pj->ji_qs.ji_fileprefix != '\0') prefix = pj->ji_qs.ji_fileprefix; else prefix = pj->ji_qs.ji_jobid; p_attr_def = job_attr_def; wattr = pj->ji_wattr; attr_unkn = JOB_ATR_UNKN; final_attr = JOB_ATR_LAST; } (void)strcpy(namebuf, path); /* job (reservation) directory path */ (void)strcat(namebuf, filename); #ifdef WIN32 fix_perms(namebuf); #endif fds = open(namebuf, O_RDONLY, 0); if (fds < 0) { sprintf(log_buffer, "%s on %s", err_msg, namebuf); log_err(errno, "job_or_resv_recov", log_buffer); free((char *)pobj); return ((void *)0); } #ifdef WIN32 setmode(fds, O_BINARY); #endif /* read in job or resc_resv quick save sub-structure */ if (read(fds, (char *)p_fixed, fixed_size) != fixed_size) { (void)sprintf(err_buf, "problem reading %s", namebuf); log_err(errno, "job_or_resv_recov", err_buf); free((char *)pobj); (void)close(fds); return ((void *)0); } /* Does file name match the internal name? */ /* This detects ghost files */ #ifdef WIN32 pn = strrchr(namebuf, (int)'/'); if (pn == NULL) pn = strrchr(namebuf, (int)'\\'); if (pn == NULL) { sprintf(log_buffer, "bad path %s", namebuf); log_err(errno, "job_or_resv_recov", log_buffer); free((char *)pj); (void)close(fds); return ((job *)0); } pn++; #else pn = strrchr(namebuf, (int)'/') + 1; #endif if (strncmp(pn, prefix, strlen(prefix)) != 0) { /* mismatch, discard job (reservation) */ (void)sprintf(log_buffer, ptcs, pobjID, namebuf); log_err(-1, "job_or_resv_recov", log_buffer); free((char *)pobj); (void)close(fds); return ((void *)0); } /* read in working attributes */ if (recov_attr_fs(fds, pobj, p_attr_def, wattr, final_attr, attr_unkn) != 0) { log_err(errno, "job_or_resv_recov", "error from recov_attr"); if (objtype == RESC_RESV_OBJECT) { #ifndef PBS_MOM /*MOM doesn't know about resource reservations*/ resv_free((resc_resv *)pobj); #endif } else { job_free((job *)pobj); } (void)close(fds); return ((void *)0); } (void)close(fds); #if defined(PBS_MOM) && defined(WIN32) /* get a handle to the job (may not exist) */ pj->ji_hJob = OpenJobObject(JOB_OBJECT_ALL_ACCESS, FALSE, pj->ji_qs.ji_jobid); #endif /* all done recovering the job (reservation) */ return (pobj); }
int avahi_response_scheduler_post(AvahiResponseScheduler *s, AvahiRecord *record, int flush_cache, const AvahiAddress *querier, int immediately) { AvahiResponseJob *rj; struct timeval tv; /* char *t; */ assert(s); assert(record); assert(!avahi_key_is_pattern(record->key)); /* t = avahi_record_to_string(record); */ /* avahi_log_debug("post %i %s", immediately, t); */ /* avahi_free(t); */ /* Check whether this response is suppressed */ if (querier && (rj = find_suppressed_job(s, record, querier)) && avahi_record_is_goodbye(record) == avahi_record_is_goodbye(rj->record) && rj->record->ttl >= record->ttl/2) { /* avahi_log_debug("Response suppressed by known answer suppression."); */ return 0; } /* Check if we already sent this response recently */ if ((rj = find_history_job(s, record))) { if (avahi_record_is_goodbye(record) == avahi_record_is_goodbye(rj->record) && rj->record->ttl >= record->ttl/2 && (rj->flush_cache || !flush_cache)) { /* avahi_log_debug("Response suppressed by local duplicate suppression (history)"); */ return 0; } /* Outdated ... */ job_free(s, rj); } avahi_elapse_time(&tv, immediately ? 0 : AVAHI_RESPONSE_DEFER_MSEC, immediately ? 0 : AVAHI_RESPONSE_JITTER_MSEC); if ((rj = find_scheduled_job(s, record))) { /* avahi_log_debug("Response suppressed by local duplicate suppression (scheduled)"); */ /* Update a little ... */ /* Update the time if the new is prior to the old */ if (avahi_timeval_compare(&tv, &rj->delivery) < 0) { rj->delivery = tv; avahi_time_event_update(rj->time_event, &rj->delivery); } /* Update the flush cache bit */ if (flush_cache) rj->flush_cache = 1; /* Update the querier field */ if (!querier || (rj->querier_valid && avahi_address_cmp(querier, &rj->querier) != 0)) rj->querier_valid = 0; /* Update record data (just for the TTL) */ avahi_record_unref(rj->record); rj->record = avahi_record_ref(record); return 1; } else { /* avahi_log_debug("Accepted new response job."); */ /* Create a new job and schedule it */ if (!(rj = job_new(s, record, AVAHI_SCHEDULED))) return 0; /* OOM */ rj->delivery = tv; rj->time_event = avahi_time_event_new(s->time_event_queue, &rj->delivery, elapse_callback, rj); rj->flush_cache = flush_cache; if ((rj->querier_valid = !!querier)) rj->querier = *querier; return 1; } }
job *job_recov( char *filename) /* I */ /* pathname to job save file */ { int fds; job *pj; char *pn; char namebuf[MAXPATHLEN]; int qs_upgrade; #ifndef PBS_MOM char parent_id[PBS_MAXSVRJOBID + 1]; job_array *pa; #endif qs_upgrade = FALSE; pj = job_alloc(); /* allocate & initialize job structure space */ if (pj == NULL) { /* FAILURE - cannot alloc memory */ return(NULL); } strcpy(namebuf, path_jobs); /* job directory path */ strcat(namebuf, filename); fds = open(namebuf, O_RDONLY, 0); if (fds < 0) { sprintf(log_buffer, "unable to open %s", namebuf); log_err(errno, "job_recov", log_buffer); free((char *)pj); /* FAILURE - cannot open job file */ return(NULL); } /* read in job quick save sub-structure */ if (read(fds, (char *)&pj->ji_qs, quicksize) != (ssize_t)quicksize && pj->ji_qs.qs_version == PBS_QS_VERSION) { sprintf(log_buffer, "Unable to read %s", namebuf); log_err(errno, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } /* is ji_qs the version we expect? */ if (pj->ji_qs.qs_version != PBS_QS_VERSION) { /* ji_qs is older version */ sprintf(log_buffer, "%s appears to be from an old version. Attempting to convert.\n", namebuf); log_err(-1, "job_recov", log_buffer); if (job_qs_upgrade(pj, fds, namebuf, pj->ji_qs.qs_version) != 0) { sprintf(log_buffer, "unable to upgrade %s\n", namebuf); log_err(-1, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } qs_upgrade = TRUE; } /* END if (pj->ji_qs.qs_version != PBS_QS_VERSION) */ /* Does file name match the internal name? */ /* This detects ghost files */ pn = strrchr(namebuf, (int)'/') + 1; if (strncmp(pn, pj->ji_qs.ji_fileprefix, strlen(pj->ji_qs.ji_fileprefix)) != 0) { /* mismatch, discard job */ sprintf(log_buffer, "Job Id %s does not match file name for %s", pj->ji_qs.ji_jobid, namebuf); log_err(-1, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } /* read in working attributes */ if (recov_attr( fds, pj, job_attr_def, pj->ji_wattr, (int)JOB_ATR_LAST, (int)JOB_ATR_UNKN, TRUE) != 0) { sprintf(log_buffer, "unable to recover %s (file is likely corrupted)", namebuf); log_err(-1, "job_recov", log_buffer); job_free(pj); close(fds); return(NULL); } #ifdef PBS_MOM /* read in tm sockets and ips */ if (recov_tmsock(fds, pj) != 0) { sprintf(log_buffer, "warning: tmsockets not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, "job_recov", log_buffer); } if (recov_roottask(fds, pj) != 0) { sprintf(log_buffer, "warning: root task not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, "job_recov", log_buffer); } if (recov_jobflags(fds, pj) != 0) { sprintf(log_buffer, "warning: job flags not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, "job_recov", log_buffer); } #else /* PBS_MOM */ if (pj->ji_wattr[(int)JOB_ATR_job_array_request].at_flags & ATR_VFLAG_SET) { /* job is part of an array. We need to put a link back to the server job array struct for this array. We also have to link this job into the linked list of jobs belonging to the array. */ array_get_parent_id(pj->ji_qs.ji_jobid, parent_id); pa = get_array(parent_id); if (strcmp(parent_id, pj->ji_qs.ji_jobid) == 0) { pj->ji_isparent = TRUE; } else { if (pa == NULL) { /* couldn't find array struct, it must not have been recovered, treat job as indepentent job? perhaps we should delete the job XXX_JOB_ARRAY: should I unset this?*/ pj->ji_wattr[(int)JOB_ATR_job_array_request].at_flags &= ~ATR_VFLAG_SET; } else { CLEAR_LINK(pj->ji_arrayjobs); append_link(&pa->array_alljobs, &pj->ji_arrayjobs, (void*)pj); pj->ji_arraystruct = pa; pa->jobs_recovered++; } } } #endif close(fds); /* all done recovering the job */ if (qs_upgrade == TRUE) { job_save(pj, SAVEJOB_FULL); } return(pj); } /* END job_recov() */
job * job_recov_fs(char *filename, int recov_subjob) { int fds; char basen[MAXPATHLEN+1]; job *pj; char *pn; char *psuffix; pj = job_alloc(); /* allocate & initialize job structure space */ if (pj == (job *)0) { return ((job *)0); } (void)strcpy(pbs_recov_filename, path_jobs); /* job directory path */ (void)strcat(pbs_recov_filename, filename); #ifdef WIN32 fix_perms(pbs_recov_filename); #endif /* change file name in case recovery fails so we don't try same file */ (void)strcpy(basen, pbs_recov_filename); psuffix = basen + strlen(basen) - strlen(JOB_BAD_SUFFIX); (void)strcpy(psuffix, JOB_BAD_SUFFIX); #ifdef WIN32 if (MoveFileEx(pbs_recov_filename, basen, MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == 0) { errno = GetLastError(); sprintf(log_buffer, "MoveFileEx(%s, %s) failed!", pbs_recov_filename, basen); log_err(errno, "nodes", log_buffer); } secure_file(basen, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); #else if (rename(pbs_recov_filename, basen) == -1) { sprintf(log_buffer, "error renaming job file %s", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); free((char *)pj); return ((job *)0); } #endif fds = open(basen, O_RDONLY, 0); if (fds < 0) { sprintf(log_buffer, "error opening of job file %s", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); free((char *)pj); return ((job *)0); } #ifdef WIN32 setmode(fds, O_BINARY); #endif /* read in job fixed sub-structure */ errno = -1; if (read(fds, (char *)&pj->ji_qs, fixedsize) != (int)fixedsize) { sprintf(log_buffer, "error reading fixed portion of %s", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); free((char *)pj); (void)close(fds); return ((job *)0); } /* Does file name match the internal name? */ /* This detects ghost files */ #ifdef WIN32 pn = strrchr(pbs_recov_filename, (int)'/'); if (pn == NULL) pn = strrchr(pbs_recov_filename, (int)'\\'); if (pn == NULL) { sprintf(log_buffer, "bad path %s", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); free((char *)pj); (void)close(fds); return ((job *)0); } pn++; #else pn = strrchr(pbs_recov_filename, (int)'/') + 1; #endif if (strncmp(pn, pj->ji_qs.ji_jobid, strlen(pn)-3) != 0) { /* mismatch, discard job */ (void)sprintf(log_buffer, "Job Id %s does not match file name for %s", pj->ji_qs.ji_jobid, pbs_recov_filename); log_err(-1, "job_recov", log_buffer); free((char *)pj); (void)close(fds); return ((job *)0); } /* unless directed, don't recover Array Sub jobs */ if ((pj->ji_qs.ji_svrflags & JOB_SVFLG_SubJob) && (recov_subjob == NO_RECOV_SUBJOB)) { free((char *)pj); (void)close(fds); return ((job *)0); } /* read in extended save area depending on VERSION */ errno = -1; DBPRT(("Job save version %d\n", pj->ji_qs.ji_jsversion)) if (pj->ji_qs.ji_jsversion < JSVERSION_514) { /* If really old version, it wasn't there, abort out */ sprintf(log_buffer, "Job structure version cannot be recovered for job %s", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); free((char *)pj); (void)close(fds); return ((job *)0); } else if (pj->ji_qs.ji_jsversion < JSVERSION_80) { /* If older version, read and copy extended area */ if (recov_514_extend(fds, pj) != 0) { sprintf(log_buffer, "error reading extended portion" " of %s for prior version", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); free((char *)pj); (void)close(fds); return ((job *)0); } } else { /* If current version, JSVERSION_80, read into place */ if (read(fds, (char *)&pj->ji_extended, sizeof(union jobextend)) != sizeof(union jobextend)) { sprintf(log_buffer, "error reading extended portion of %s", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); free((char *)pj); (void)close(fds); return ((job *)0); } } #ifndef PBS_MOM if (pj->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) { size_t xs; if (read(fds, (char *)&xs, sizeof(xs)) != sizeof(xs)) { sprintf(log_buffer, "error reading array section of %s", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); free((char *)pj); (void)close(fds); return ((job *)0); } if ((pj->ji_ajtrk = (struct ajtrkhd *)malloc(xs)) == NULL) { free((char *)pj); (void)close(fds); return ((job *)0); } read(fds, (char *)pj->ji_ajtrk + sizeof(xs), xs - sizeof(xs)); pj->ji_ajtrk->tkm_size = xs; } #endif /* not PBS_MOM */ /* read in working attributes */ if (recov_attr_fs(fds, pj, job_attr_def, pj->ji_wattr, (int)JOB_ATR_LAST, (int)JOB_ATR_UNKN) != 0) { sprintf(log_buffer, "error reading attributes portion of %s", pbs_recov_filename); log_err(errno, "job_recov", log_buffer); job_free(pj); (void)close(fds); return ((job *)0); } (void)close(fds); #if defined(PBS_MOM) && defined(WIN32) /* get a handle to the job (may not exist) */ pj->ji_hJob = OpenJobObject(JOB_OBJECT_ALL_ACCESS, FALSE, pj->ji_qs.ji_jobid); #endif /* all done recovering the job, change file name back to .JB */ #ifdef WIN32 if (MoveFileEx(basen, pbs_recov_filename, MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == 0) { errno = GetLastError(); sprintf(log_buffer, "MoveFileEx(%s, %s) failed!", basen, pbs_recov_filename); log_err(errno, "nodes", log_buffer); } secure_file(pbs_recov_filename, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); #else (void)rename(basen, pbs_recov_filename); #endif return (pj); }
int job_reap(bool interactive) { ASSERT_IS_MAIN_THREAD(); job_t *jnext; int found=0; /* job_reap may fire an event handler, we do not want to call ourselves recursively (to avoid infinite recursion). */ static bool locked = false; if (locked) { return 0; } locked = true; process_mark_finished_children(false); /* Preserve the exit status */ const int saved_status = proc_get_last_status(); job_iterator_t jobs; const size_t job_count = jobs.count(); jnext = jobs.next(); while (jnext) { job_t *j = jnext; jnext = jobs.next(); /* If we are reaping only jobs who do not need status messages sent to the console, do not consider reaping jobs that need status messages */ if ((!job_get_flag(j, JOB_SKIP_NOTIFICATION)) && (!interactive) && (!job_get_flag(j, JOB_FOREGROUND))) { continue; } for (process_t *p = j->first_process; p; p=p->next) { int s; if (!p->completed) continue; if (!p->pid) continue; s = p->status; proc_fire_event(L"PROCESS_EXIT", EVENT_EXIT, p->pid, (WIFSIGNALED(s)?-1:WEXITSTATUS(s))); if (WIFSIGNALED(s)) { /* Ignore signal SIGPIPE.We issue it ourselves to the pipe writer when the pipe reader dies. */ if (WTERMSIG(s) != SIGPIPE) { int proc_is_job = ((p==j->first_process) && (p->next == 0)); if (proc_is_job) job_set_flag(j, JOB_NOTIFIED, 1); if (!job_get_flag(j, JOB_SKIP_NOTIFICATION)) { /* Print nothing if we get SIGINT in the foreground process group, to avoid spamming obvious stuff on the console (#1119). If we get SIGINT for the foreground process, assume the user typed ^C and can see it working. It's possible they didn't, and the signal was delivered via pkill, etc., but the SIGINT/SIGTERM distinction is precisely to allow INT to be from a UI and TERM to be programmatic, so this assumption is keeping with the design of signals. If echoctl is on, then the terminal will have written ^C to the console. If off, it won't have. We don't echo ^C either way, so as to respect the user's preference. */ if (WTERMSIG(p->status) != SIGINT || ! job_get_flag(j, JOB_FOREGROUND)) { if (proc_is_job) { // We want to report the job number, unless it's the only job, in which case we don't need to const wcstring job_number_desc = (job_count == 1) ? wcstring() : format_string(L"Job %d, ", j->job_id); fwprintf(stdout, _(L"%ls: %ls\'%ls\' terminated by signal %ls (%ls)"), program_name, job_number_desc.c_str(), truncate_command(j->command()).c_str(), sig2wcs(WTERMSIG(p->status)), signal_get_desc(WTERMSIG(p->status))); } else { const wcstring job_number_desc = (job_count == 1) ? wcstring() : format_string(L"from job %d, ", j->job_id); fwprintf(stdout, _(L"%ls: Process %d, \'%ls\' %ls\'%ls\' terminated by signal %ls (%ls)"), program_name, p->pid, p->argv0(), job_number_desc.c_str(), truncate_command(j->command()).c_str(), sig2wcs(WTERMSIG(p->status)), signal_get_desc(WTERMSIG(p->status))); } tputs(clr_eol,1,&writeb); fwprintf(stdout, L"\n"); } found=1; } /* Clear status so it is not reported more than once */ p->status = 0; } } } /* If all processes have completed, tell the user the job has completed and delete it from the active job list. */ if (job_is_completed(j)) { if (!job_get_flag(j, JOB_FOREGROUND) && !job_get_flag(j, JOB_NOTIFIED) && !job_get_flag(j, JOB_SKIP_NOTIFICATION)) { format_job_info(j, _(L"ended"), job_count); found=1; } proc_fire_event(L"JOB_EXIT", EVENT_EXIT, -j->pgid, 0); proc_fire_event(L"JOB_EXIT", EVENT_JOB_ID, j->job_id, 0); job_free(j); } else if (job_is_stopped(j) && !job_get_flag(j, JOB_NOTIFIED)) { /* Notify the user about newly stopped jobs. */ if (!job_get_flag(j, JOB_SKIP_NOTIFICATION)) { format_job_info(j, _(L"stopped"), job_count); found=1; } job_set_flag(j, JOB_NOTIFIED, 1); } } if (found) fflush(stdout); /* Restore the exit status. */ proc_set_last_status(saved_status); locked = false; return found; }
job *job_recov( char *filename) /* I */ /* pathname to job save file */ { int fds; job *pj; char *pn; char namebuf[MAXPATHLEN]; char log_buf[LOCAL_LOG_BUF_SIZE]; #ifndef PBS_MOM char parent_id[PBS_MAXSVRJOBID + 1]; job_array *pa; #endif pj = job_alloc(); /* allocate & initialize job structure space */ if (pj == NULL) { /* FAILURE - cannot alloc memory */ return(NULL); } snprintf(namebuf, MAXPATHLEN, "%s%s", path_jobs, filename); /* job directory path, filename */ fds = open(namebuf, O_RDONLY, 0); if (fds < 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unable to open %s", namebuf); log_err(errno, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "1", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); /* FAILURE - cannot open job file */ return(NULL); } /* read in job quick save sub-structure */ if (read_ac_socket(fds, (char *)&pj->ji_qs, sizeof(pj->ji_qs)) != sizeof(pj->ji_qs) && pj->ji_qs.qs_version == PBS_QS_VERSION) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Unable to read %s", namebuf); log_err(errno, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "2", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); close(fds); return(NULL); } /* is ji_qs the version we expect? */ if (pj->ji_qs.qs_version != PBS_QS_VERSION) { /* ji_qs is older version */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "%s appears to be from an old version. Attempting to convert.\n", namebuf); log_err(-1, __func__, log_buf); if (job_qs_upgrade(pj, fds, namebuf, pj->ji_qs.qs_version) != 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unable to upgrade %s\n", namebuf); log_err(-1, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "3", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); close(fds); return(NULL); } } /* END if (pj->ji_qs.qs_version != PBS_QS_VERSION) */ /* Does file name match the internal name? */ /* This detects ghost files */ pn = strrchr(namebuf, (int)'/') + 1; if (strncmp(pn, pj->ji_qs.ji_fileprefix, strlen(pj->ji_qs.ji_fileprefix)) != 0) { /* mismatch, discard job */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Job Id %s does not match file name for %s", pj->ji_qs.ji_jobid, namebuf); log_err(-1, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "4", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); close(fds); return(NULL); } /* read in working attributes */ if (recov_attr( fds, pj, job_attr_def, pj->ji_wattr, JOB_ATR_LAST, JOB_ATR_UNKN, TRUE) != 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unable to recover %s (file is likely corrupted)", namebuf); log_err(-1, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "5", LOGLEVEL); job_free(pj, FALSE); #else mom_job_free(pj); #endif close(fds); return(NULL); } #ifndef PBS_MOM /* Comment out the mother superior tracking. Will be debugged later if (pj->ji_wattr[JOB_ATR_exec_host].at_val.at_str != NULL) {*/ /* add job to the mother superior list for it's node */ /* char *ms = strdup(pj->ji_wattr[JOB_ATR_exec_host].at_val.at_str); char *end = strchr(ms, '/'); if (end != NULL) *end = '\0'; if ((end = strchr(ms, '+')) != NULL) *end = '\0'; add_to_ms_list(ms, pj); free(ms); }*/ #endif #ifdef PBS_MOM /* read in tm sockets and ips */ if (recov_tmsock(fds, pj) != 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "warning: tmsockets not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, __func__, log_buf); } #else /* not PBS_MOM */ if (strchr(pj->ji_qs.ji_jobid, '[') != NULL) { /* job is part of an array. We need to put a link back to the server job array struct for this array. We also have to link this job into the linked list of jobs belonging to the array. */ array_get_parent_id(pj->ji_qs.ji_jobid, parent_id); pa = get_array(parent_id); if (pa == NULL) { job_abt(&pj, (char *)"Array job missing array struct, aborting job"); close(fds); return NULL; } strcpy(pj->ji_arraystructid, parent_id); if (strcmp(parent_id, pj->ji_qs.ji_jobid) == 0) { pj->ji_is_array_template = TRUE; } else { pa->job_ids[(int)pj->ji_wattr[JOB_ATR_job_array_id].at_val.at_long] = strdup(pj->ji_qs.ji_jobid); pa->jobs_recovered++; /* This is a bit of a kluge, but for some reason if an array job was on hold when the server went down the ji_wattr[JOB_ATR_hold].at_val.at_long value is 0 on recovery even though pj->ji_qs.ji_state is JOB_STATE_HELD and the substate is JOB_SUBSTATE_HELD */ if ((pj->ji_qs.ji_state == JOB_STATE_HELD) && (pj->ji_qs.ji_substate == JOB_SUBSTATE_HELD)) { pj->ji_wattr[JOB_ATR_hold].at_val.at_long = HOLD_l; pj->ji_wattr[JOB_ATR_hold].at_flags = ATR_VFLAG_SET; } } if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } } #endif close(fds); pj->ji_commit_done = 1; /* all done recovering the job */ job_save(pj, SAVEJOB_FULL, 0); return(pj); } /* END job_recov() */
/* command_exec(cmd, pass_pipefd) * * Execute the single command specified in the 'cmd' command structure. * * The 'pass_pipefd' argument is used for pipes. * On input, '*pass_pipefd' is the file descriptor used to read the * previous command's output. That is, it's the read end of the previous * pipe. It equals STDIN_FILENO if there was no previous pipe. * On output, command_exec should set '*pass_pipefd' to the file descriptor * used for reading from THIS command's pipe. * If this command didn't have a pipe -- that is, if cmd->commandop != PIPE * -- then it should set '*pass_pipefd = STDIN_FILENO'. * * Returns the process ID of the forked child, or < 0 if some system call * fails. * * You must also handle the internal commands "cd" and "exit". * These are special because they must execute in the shell process, rather * than a child. (Why?) * * However, these special commands still have a status! * For example, "cd DIR" should return status 0 if we successfully change * to the DIR directory, and status 1 otherwise. * Thus, "cd /tmp && echo /tmp exists" should print "/tmp exists" to stdout * iff the /tmp directory exists. * Not only this, but redirections should work too! * For example, "cd /tmp > foo" should create an empty file named 'foo'; * and "cd /tmp 2> foo" should print any error messages to 'foo'. * * How can you return a status, and do redirections, for a command executed * in the parent shell? * Hint: It is easiest if you fork a child ANYWAY! * You should divide functionality between the parent and the child. * Some functions will be executed in each process. */ static pid_t command_exec(command_t *cmd, int *pass_pipefd) { pid_t pid = -1; // process ID for child int pipefd[2]; // file descriptors for this process's pipe int status; /* EXERCISE: Complete this function! * We've written some of the skeleton for you, but feel free to * change it. */ // Create a pipe, if this command is the left-hand side of a pipe. // Return -1 if the pipe fails. /* Your code here. */ if(cmd->subshell && cmd->argv[0] && !(strcmp(cmd->argv[0],"") == 0)){ goto error; } if (cmd->controlop == CMD_PIPE) { if(pipe(pipefd)){ printf("Error creating pipe\n"); return -1; } } // Fork the child and execute the command in that child. // You will handle all redirections by manipulating file descriptors. // // This section is fairly long. It is probably best to implement this // part in stages, checking it after each step. For instance, first // implement just the fork and the execute in the child. This should // allow you to execute simple commands like 'ls'. Then add support // for redirections: commands like 'ls > foo' and 'cat < foo'. Then // add parentheses, then pipes, and finally the internal commands // 'cd' and 'exit'. // // In the child, you should: // 1. Set up stdout to point to this command's pipe, if necessary. // 2. Set up stdin to point to the PREVIOUS command's pipe (that // is, *pass_pipefd), if appropriate. // 3. Close some file descriptors. Hint: Consider the read end // of this process's pipe. // 4. Set up redirections. // Hint: For output redirections (stdout and stderr), the 'mode' // argument of open() should be set to 0666. // 5. Execute the command. // There are some special cases: // a. Parentheses. Execute cmd->subshell. (How?) // b. A null command (no subshell, no arguments). // Exit with status 0. // c. "exit". // d. "cd". // // In the parent, you should: // 1. Close some file descriptors. Hint: Consider the write end // of this command's pipe, and one other fd as well. // 2. Handle the special "exit" and "cd" commands. // 3. Set *pass_pipefd as appropriate. // // "cd" error note: // - Upon syntax errors: Display the message // "cd: Syntax error on bad number of arguments" // - Upon system call errors: Call perror("cd") // // "cd" Hints: // For the "cd" command, you should change directories AFTER // the fork(), not before it. Why? // Design some tests with 'bash' that will tell you the answer. // For example, try "cd /tmp ; cd $HOME > foo". In which directory // does foo appear, /tmp or $HOME? If you chdir() BEFORE the fork, // in which directory would foo appear, /tmp or $HOME? // if(!cmd->subshell && strcmp(cmd->argv[0],"exit") == 0){ // execute exit exit(0); } if(!cmd->subshell && (strcmp(cmd->argv[0],"") == 0)) return -1; //printf("Spawning new process\n"); // Handle reaping of zombie processes automatically // struct sigaction sigact; // sigact.sa_handler = SIG_IGN; // sigemptyset(&sigact.sa_mask); // sigact.sa_flags = 0; // if (sigaction(SIGCHLD, &sigact, 0) == -1) { // printf("Triggered\n"); // perror(0); // exit(1); // } // if (signal(SIGCHLD, SIG_IGN) == SIG_ERR) { // perror(0); // exit(1); // } /* Your code here. */ pid = fork(); // parent process if (pid > 0){ if(cmd->controlop == CMD_PIPE) { *pass_pipefd = pipefd[0]; close(pipefd[1]); } if(cmd->argv[0]){ if(strcmp(cmd->argv[0],"cd") == 0){ if(!cmd->argv[1]){ if(chdir(getenv("HOME")) != 0){ int lsts; waitpid(pid, &lsts, WNOHANG); perror("cd"); return -1; } return 0; } else if ((cmd->argv[1])[0] == '~'){ cmd->argv[1] = strcat(getenv("HOME"),(cmd->argv[1])+1); } if(chdir(cmd->argv[1]) != 0){ int lsts; waitpid(pid, &lsts, WNOHANG); perror("cd"); return -1; } } else if(strcmp(cmd->argv[0],"jobs") == 0){ if(jobindex) { printf("PID PPID STATUS\n"); int removelist[100]; int j, k = 0; for(j = 0 ; j<jobindex;j++){ char *sts; int stspid; // printf("Current Status: %d\n",waitpid(joblist[j]->pid,&stspid,WNOHANG)); joblist[j]->status = waitpid(joblist[j]->pid,&stspid,WNOHANG) ? DONE : RUNNING ; switch(joblist[j]->status){ default: printf("Error Setting value\n"); break; case RUNNING: sts = "RUNNING"; break; case DONE: sts = "DONE"; joblist[j]->printstatus = 1; removelist[k++] = j; break; case KILLED: sts = "KILLED"; break; } // print the status of background jobs printf("%d %d %s\n",joblist[j]->pid,joblist[j]->ppid,sts); } int l, m = 0; for(l = 0 ;l<k;l++){ job_free(removelist[l]-m++); } } } } return pid; } // child process else if (pid == 0){ FILE *fp; dup2(*pass_pipefd, STDIN_FILENO); if(cmd->redirect_filename[0]){ fp = fopen(cmd->redirect_filename[0], "r"); dup2(fileno(fp),0); } if(cmd->redirect_filename[1]){ fp =fopen(cmd->redirect_filename[1], "w"); dup2(fileno(fp), 1); } if(cmd->redirect_filename[2]){ //close(2); fp = fopen(cmd->redirect_filename[2], "w"); dup2(fileno(fp), 2); } if(cmd->controlop == CMD_PIPE) { dup2(pipefd[1],STDOUT_FILENO); close(pipefd[1]); } if(cmd->subshell){ //printf("Entered inside: %s\n",(cmd->subshell)->argv[0]); status = command_line_exec(cmd->subshell); //printf("Status: %d\n",status); // Return teh status of the last executed command if(status) kill(getpid(),SIGKILL); //printf("Reached exit error\n"); exit(status); } // printf("In child process [0]:%s, %s\n",cmd->argv[0], cmd->redirect_filename[1]); if(strcmp(cmd->argv[0],"cd") == 0){ // Exit exit(0); } if(strcmp(cmd->argv[0],"jobs") == 0){ // Exit exit(0); } //handle wildcard entries int z,m=0; char *local_cmd[512]; local_cmd[m++] = cmd->argv[0]; for(z = 1; z < 512 ;z++){ //printf("Reached %d\n",z ); local_cmd[m++] = cmd->argv[z]?cmd->argv[z]:0; //printf("Reached %d\n",z ); if(!cmd->argv[z]) break; // if((strcmp(local_cmd[0],"ls") == 0) && (strstr(cmd->argv[z],"*") != NULL)) { if(strstr(cmd->argv[z],"*") != NULL) { m--; const char *pattern = cmd->argv[z]; glob_t results; // glob function : glob(const char *pattern, int flags, int (*errfunc) (const char *epath, int eerrno), glob_t *pglob); if(glob(pattern,0,glob_error, &results) != 0) exit(1); unsigned int k; for (k = 0; k < results.gl_pathc; k++){ //printf("%s\n", results.gl_pathv[k]); local_cmd[m++] = results.gl_pathv[k]; } break; globfree(& results); } } local_cmd[m] = 0; execvp(local_cmd[0],local_cmd); // execvp(cmd->argv[0],cmd->argv); return -1; } error: printf("Error\n"); return -1; }
// Readrec5 is like readrec, but it reads a record in "version 5" // of the log format. static int readrec5(File *f, job l, int *err) { int r, sz = 0; size_t namelen; Jobrec5 jr; job j; tube t; char tubename[MAX_TUBE_NAME_LEN]; r = read(f->fd, &namelen, sizeof(namelen)); if (r == -1) { twarn("read"); warnpos(f, 0, "error"); *err = 1; return 0; } if (r != sizeof(namelen)) { return 0; } sz += r; if (namelen >= MAX_TUBE_NAME_LEN) { warnpos(f, -r, "namelen %zu exceeds maximum of %d", namelen, MAX_TUBE_NAME_LEN - 1); *err = 1; return 0; } if (namelen) { r = readfull(f, tubename, namelen, err, "v5 tube name"); if (!r) { return 0; } sz += r; } tubename[namelen] = '\0'; r = readfull(f, &jr, Jobrec5size, err, "v5 job struct"); if (!r) { return 0; } sz += r; // are we reading trailing zeroes? if (!jr.id) return 0; j = job_find(jr.id); if (!(j || namelen)) { // We read a short record without having seen a // full record for this job, so the full record // was in an eariler file that has been deleted. // Therefore the job itself has either been // deleted or migrated; either way, this record // should be ignored. return 1; } switch (jr.state) { case Reserved: jr.state = Ready; case Ready: case Buried: case Delayed: if (!j) { if (jr.body_size > job_data_size_limit) { warnpos(f, -r, "job %"PRIu64" is too big (%"PRId32" > %zu)", jr.id, jr.body_size, job_data_size_limit); goto Error; } t = tube_find_or_make(tubename); j = make_job_with_id(jr.pri, jr.delay, jr.ttr, jr.body_size, t, jr.id); j->next = j->prev = j; j->r.created_at = jr.created_at; } j->r.id = jr.id; j->r.pri = jr.pri; j->r.delay = jr.delay * 1000; // us => ns j->r.ttr = jr.ttr * 1000; // us => ns j->r.body_size = jr.body_size; j->r.created_at = jr.created_at * 1000; // us => ns j->r.deadline_at = jr.deadline_at * 1000; // us => ns j->r.reserve_ct = jr.reserve_ct; j->r.timeout_ct = jr.timeout_ct; j->r.release_ct = jr.release_ct; j->r.bury_ct = jr.bury_ct; j->r.kick_ct = jr.kick_ct; j->r.state = jr.state; job_insert(l, j); // full record; read the job body if (namelen) { if (jr.body_size != j->r.body_size) { warnpos(f, -r, "job %"PRIu64" size changed", j->r.id); warnpos(f, -r, "was %"PRId32", now %"PRId32, j->r.body_size, jr.body_size); goto Error; } r = readfull(f, j->body, j->r.body_size, err, "v5 job body"); if (!r) { goto Error; } sz += r; // since this is a full record, we can move // the file pointer and decref the old // file, if any filermjob(j->file, j); fileaddjob(f, j); } j->walused += sz; f->w->alive += sz; return 1; case Invalid: if (j) { job_remove(j); filermjob(j->file, j); job_free(j); } return 1; } Error: *err = 1; if (j) { job_remove(j); filermjob(j->file, j); job_free(j); } return 0; }
job *job_recov( char *filename) /* I */ /* pathname to job save file */ { int fds; job *pj; char *pn; char namebuf[MAXPATHLEN]; int qs_upgrade; #ifndef PBS_MOM char parent_id[PBS_MAXSVRJOBID + 1]; job_array *pa; #endif qs_upgrade = FALSE; pj = job_alloc(); /* allocate & initialize job structure space */ if (pj == NULL) { /* FAILURE - cannot alloc memory */ return(NULL); } strcpy(namebuf, path_jobs); /* job directory path */ strcat(namebuf, filename); fds = open(namebuf, O_RDONLY, 0); if (fds < 0) { sprintf(log_buffer, "unable to open %s", namebuf); log_err(errno, "job_recov", log_buffer); free((char *)pj); /* FAILURE - cannot open job file */ return(NULL); } /* read in job quick save sub-structure */ if (read(fds, (char *)&pj->ji_qs, quicksize) != (ssize_t)quicksize && pj->ji_qs.qs_version == PBS_QS_VERSION) { sprintf(log_buffer, "Unable to read %s", namebuf); log_err(errno, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } /* is ji_qs the version we expect? */ if (pj->ji_qs.qs_version != PBS_QS_VERSION) { /* ji_qs is older version */ sprintf(log_buffer, "%s appears to be from an old version. Attempting to convert.\n", namebuf); log_err(-1, "job_recov", log_buffer); if (job_qs_upgrade(pj, fds, namebuf, pj->ji_qs.qs_version) != 0) { sprintf(log_buffer, "unable to upgrade %s\n", namebuf); log_err(-1, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } qs_upgrade = TRUE; } /* END if (pj->ji_qs.qs_version != PBS_QS_VERSION) */ /* Does file name match the internal name? */ /* This detects ghost files */ pn = strrchr(namebuf, (int)'/') + 1; if (strncmp(pn, pj->ji_qs.ji_fileprefix, strlen(pj->ji_qs.ji_fileprefix)) != 0) { /* mismatch, discard job */ sprintf(log_buffer, "Job Id %s does not match file name for %s", pj->ji_qs.ji_jobid, namebuf); log_err(-1, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } /* read in working attributes */ if (recov_attr( fds, pj, job_attr_def, pj->ji_wattr, (int)JOB_ATR_LAST, (int)JOB_ATR_UNKN, TRUE) != 0) { sprintf(log_buffer, "unable to recover %s (file is likely corrupted)", namebuf); log_err(-1, "job_recov", log_buffer); job_free(pj); close(fds); return(NULL); } #ifdef PBS_MOM /* read in tm sockets and ips */ if (recov_tmsock(fds, pj) != 0) { sprintf(log_buffer, "warning: tmsockets not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, "job_recov", log_buffer); } #else /* PBS_MOM */ if (pj->ji_wattr[(int)JOB_ATR_job_array_request].at_flags & ATR_VFLAG_SET) { /* job is part of an array. We need to put a link back to the server job array struct for this array. We also have to link this job into the linked list of jobs belonging to the array. */ array_get_parent_id(pj->ji_qs.ji_jobid, parent_id); pa = get_array(parent_id); if (pa == NULL) { job_abt(&pj, "Array job missing array struct, aborting job"); close(fds); return NULL; } if (strcmp(parent_id, pj->ji_qs.ji_jobid) == 0) { pj->ji_is_array_template = TRUE; pj->ji_arraystruct = pa; } else { pa->jobs[(int)pj->ji_wattr[JOB_ATR_job_array_id].at_val.at_long] = (void *)pj; pj->ji_arraystruct = pa; pa->jobs_recovered++; /* This is a bit of a kluge, but for some reason if an array job was on hold when the server went down the ji_wattr[JOB_ATR_hold].at_val.at_long value is 0 on recovery even though pj->ji_qs.ji_state is JOB_STATE_HELD and the substate is JOB_SUBSTATE_HELD */ if ((pj->ji_qs.ji_state == JOB_STATE_HELD) && (pj->ji_qs.ji_substate == JOB_SUBSTATE_HELD)) { pj->ji_wattr[JOB_ATR_hold].at_val.at_long = HOLD_l; pj->ji_wattr[JOB_ATR_hold].at_flags = ATR_VFLAG_SET; } } } #endif close(fds); /* all done recovering the job */ job_save(pj, SAVEJOB_FULL); return(pj); } /* END job_recov() */