INT64_T chirp_alloc_pwrite(int fd, const void *data, INT64_T length, INT64_T offset) { struct alloc_state *a; int result; if(!alloc_enabled) return cfs->pwrite(fd, data, length, offset); if(!fd_table) fd_table = itable_create(0); a = alloc_state_cache(itable_lookup(fd_table, fd)); if(a) { INT64_T filesize = cfs_fd_size(fd); if(filesize >= 0) { INT64_T newfilesize = MAX(length + offset, filesize); INT64_T alloc_change = space_consumed(newfilesize) - space_consumed(filesize); if(a->avail >= alloc_change) { result = cfs->pwrite(fd, data, length, offset); if(result > 0) alloc_state_update(a, alloc_change); } else { errno = ENOSPC; result = -1; } } else { result = -1; } } else { result = -1; } return result; }
int histogram_insert(struct histogram *h, double value) { uint64_t bucket = bucket_of(h, value); struct box_count *box = itable_lookup(h->buckets, bucket); if(!box) { box = calloc(1, sizeof(*box)); itable_insert(h->buckets, bucket, box); } h->total_count++; box->count++; int mode_count = histogram_count(h, histogram_mode(h)); if(value > h->max_value || h->total_count < 1) { h->max_value = value; } if(value < h->min_value || h->total_count < 1) { h->min_value = value; } if(box->count > mode_count) { h->mode = end_of(h, bucket); } return box->count; }
INT64_T chirp_alloc_fstatfs(int fd, struct chirp_statfs * info) { struct alloc_state *a; int result; if(!alloc_enabled) return cfs->fstatfs(fd, info); if(!fd_table) fd_table = itable_create(0); a = alloc_state_cache(itable_lookup(fd_table, fd)); if(a) { result = cfs->fstatfs(fd, info); if(result == 0) { info->f_blocks = a->size / info->f_bsize; info->f_bfree = a->avail / info->f_bsize; info->f_bavail = a->avail / info->f_bsize; } } else { result = -1; } return result; }
INT64_T chirp_alloc_ftruncate(int fd, INT64_T length) { struct alloc_state *a; int result; if(!alloc_enabled) return cfs->ftruncate(fd, length); if(!fd_table) fd_table = itable_create(0); a = alloc_state_cache(itable_lookup(fd_table, fd)); if(a) { INT64_T filesize = cfs_fd_size(fd); if(filesize >= 0) { INT64_T alloc_change = space_consumed(length) - space_consumed(filesize); if(a->avail >= alloc_change) { result = cfs->ftruncate(fd, length); if(result == 0) alloc_state_update(a, alloc_change); } else { errno = ENOSPC; result = -1; } } else { result = -1; } } else { result = -1; } return result; }
static int batch_job_amazon_batch_remove(struct batch_queue *q, batch_job_id_t jobid){ struct internal_amazon_batch_amazon_ids amazon_ids = initialize(q); char* env_var = amazon_ids.master_env_prefix; if(itable_lookup(done_jobs,jobid)==NULL){ char* name = string_format("%s_%i",queue_name,(int)jobid); itable_insert(done_jobs,jobid+1,name); } char* amazon_id; if((amazon_id=itable_lookup(amazon_job_ids,jobid))==NULL){ return -1; } char* cmd = string_format("%s aws batch terminate-job --job-id %s --reason \"Makeflow Killed\"",env_var,amazon_id); debug(D_BATCH,"Terminating the job: %s\n",cmd); sh_system(cmd); free(cmd); return 0; }
void histogram_set_bucket(struct histogram *h, double value, int count) { uint64_t bucket = bucket_of(h, value); struct box_count *box = itable_lookup(h->buckets, bucket); if(!box) { box = calloc(1, sizeof(*box)); itable_insert(h->buckets, bucket, box); } }
/* Returns the remotename used in wrapper for local name filename */ const char *makeflow_wrapper_get_remote_name(struct makeflow_wrapper *w, struct dag *d, const char *filename) { struct dag_file *f; char *name; f = dag_file_from_name(d, filename); name = (char *) itable_lookup(w->remote_names, (uintptr_t) f); return name; }
void *histogram_get_data(struct histogram *h, double value) { uint64_t bucket = bucket_of(h, value); struct box_count *box = itable_lookup(h->buckets, bucket); if(!box) { return NULL; } return box->data; }
/* Returns the remotename used in rule n for local name filename */ char *dag_file_remote_name(struct dag_node *n, const char *filename) { struct dag_file *f; char *name; f = dag_file_from_name(n->d, filename); name = (char *) itable_lookup(n->remote_names, (uintptr_t) f); return name; }
void histogram_attach_data(struct histogram *h, double value, void *data) { uint64_t bucket = bucket_of(h, value); struct box_count *box = itable_lookup(h->buckets, bucket); if(!box) { box = calloc(1, sizeof(*box)); itable_insert(h->buckets, bucket, box); } box->data = data; }
int histogram_count(struct histogram *h, double value) { uint64_t bucket = bucket_of(h, value); struct box_count *box = itable_lookup(h->buckets, bucket); if(!box) { return 0; } return box->count; }
int batch_job_remove_local(struct batch_queue *q, batch_job_id_t jobid) { if(itable_lookup(q->job_table, jobid)) { if(kill(jobid, SIGTERM) == 0) { debug(D_BATCH, "signalled process %d", jobid); return 1; } else { debug(D_BATCH, "could not signal process %d: %s\n", jobid, strerror(errno)); return 0; } } else { debug(D_BATCH, "process %d is not under my control.\n", jobid); return 0; } }
/* * To remove a batch job, we mark the task state as "aborting", and return. Then * the mesos scheduler will try to terminate the corresponding executors. This * method does not guarantee the termination of executors, but all executors would be * terminated before the mesos scheduler stop. */ static int batch_job_mesos_remove (struct batch_queue *q, batch_job_id_t jobid) { struct batch_job_info *info = itable_lookup(q->job_table, jobid); info->finished = time(0); info->exited_normally = 0; info->exit_signal = 0; // append the new task state to the "mesos_task_info" file FILE *task_info_fp; task_info_fp = fopen(FILE_TASK_INFO, "a+"); if(task_info_fp == NULL) { fatal("can not open \"mesos_task_info\n "); } fprintf(task_info_fp, "%" PRIbjid ",,,,,,,aborting\n", jobid); fclose(task_info_fp); return 0; }
static int chirp_fuse_release(const char *path, struct fuse_file_info *fi) { struct chirp_file *file; int result; pthread_mutex_lock(&mutex); file = itable_lookup(file_table, fi->fh); if(file) { chirp_global_close(file, time(0) + chirp_fuse_timeout); itable_remove(file_table, fi->fh); fi->fh = 0; result = 0; } else { result = -EBADF; } pthread_mutex_unlock(&mutex); return result; }
static int batch_job_cluster_remove (struct batch_queue *q, batch_job_id_t jobid) { struct batch_job_info *info; info = itable_lookup(q->job_table, jobid); if(!info) return 0; if(!info->started) info->started = time(0); info->finished = time(0); info->exited_normally = 0; info->exit_signal = 1; char *command = string_format("%s %" PRIbjid, cluster_remove_cmd, jobid); system(command); free(command); return 1; }
static int chirp_fuse_write(const char *path, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi) { struct chirp_file *file; INT64_T result; pthread_mutex_lock(&mutex); file = itable_lookup(file_table, fi->fh); if(file) { result = chirp_global_pwrite(file, buf, size, offset, time(0) + chirp_fuse_timeout); } else { result = -1; errno = EBADF; } pthread_mutex_unlock(&mutex); if(result < 0) return -errno; return result; }
struct list *makeflow_wrapper_generate_files( struct list *result, struct list *input, struct dag_node *n, struct makeflow_wrapper *w) { char *f; char *nodeid = string_format("%d",n->nodeid); struct list *files = list_create(); list_first_item(input); while((f = list_next_item(input))) { char *filename = string_replace_percents(f, nodeid); char *f = xxstrdup(filename); free(filename); char *remote, *p; struct dag_file *file; p = strchr(f, '='); if(p) { *p = 0; file = dag_file_lookup_or_create(n->d, f); if(!n->local_job && !itable_lookup(w->remote_names, (uintptr_t) file)){ remote = xxstrdup(p+1); itable_insert(w->remote_names, (uintptr_t) file, (void *)remote); hash_table_insert(w->remote_names_inv, remote, (void *)file); } *p = '='; } else { file = dag_file_lookup_or_create(n->d, f); } free(f); list_push_tail(files, file); } free(nodeid); result = list_splice(result, files); return result; }
/** The clean_mode variable was added so that we could better print out error messages * apply in the situation. Currently only used to silence node rerun checking. */ void makeflow_log_recover(struct dag *d, const char *filename, int verbose_mode, struct batch_queue *queue, makeflow_clean_depth clean_mode) { char *line, *name, file[MAX_BUFFER_SIZE]; int nodeid, state, jobid, file_state; int first_run = 1; struct dag_node *n; struct dag_file *f; struct stat buf; timestamp_t previous_completion_time; d->logfile = fopen(filename, "r"); if(d->logfile) { int linenum = 0; first_run = 0; printf("recovering from log file %s...\n",filename); while((line = get_line(d->logfile))) { linenum++; if(sscanf(line, "# %d %s %" SCNu64 "", &file_state, file, &previous_completion_time) == 3) { f = dag_file_lookup_or_create(d, file); f->state = file_state; if(file_state == DAG_FILE_STATE_EXISTS){ d->completed_files += 1; f->creation_logged = (time_t) (previous_completion_time / 1000000); } else if(file_state == DAG_FILE_STATE_DELETE){ d->deleted_files += 1; } continue; } if(line[0] == '#') continue; if(sscanf(line, "%" SCNu64 " %d %d %d", &previous_completion_time, &nodeid, &state, &jobid) == 4) { n = itable_lookup(d->node_table, nodeid); if(n) { n->state = state; n->jobid = jobid; /* Log timestamp is in microseconds, we need seconds for diff. */ n->previous_completion = (time_t) (previous_completion_time / 1000000); continue; } } fprintf(stderr, "makeflow: %s appears to be corrupted on line %d\n", filename, linenum); exit(1); } fclose(d->logfile); } d->logfile = fopen(filename, "a"); if(!d->logfile) { fprintf(stderr, "makeflow: couldn't open logfile %s: %s\n", filename, strerror(errno)); exit(1); } if(setvbuf(d->logfile, NULL, _IOLBF, BUFSIZ) != 0) { fprintf(stderr, "makeflow: couldn't set line buffer on logfile %s: %s\n", filename, strerror(errno)); exit(1); } if(first_run && verbose_mode) { struct dag_file *f; struct dag_node *p; for(n = d->nodes; n; n = n->next) { /* Record node information to log */ fprintf(d->logfile, "# NODE\t%d\t%s\n", n->nodeid, n->command); /* Record the node category to the log */ fprintf(d->logfile, "# CATEGORY\t%d\t%s\n", n->nodeid, n->category->label); fprintf(d->logfile, "# SYMBOL\t%d\t%s\n", n->nodeid, n->category->label); /* also write the SYMBOL as alias of CATEGORY, deprecated. */ /* Record node parents to log */ fprintf(d->logfile, "# PARENTS\t%d", n->nodeid); list_first_item(n->source_files); while( (f = list_next_item(n->source_files)) ) { p = f->created_by; if(p) fprintf(d->logfile, "\t%d", p->nodeid); } fputc('\n', d->logfile); /* Record node inputs to log */ fprintf(d->logfile, "# SOURCES\t%d", n->nodeid); list_first_item(n->source_files); while( (f = list_next_item(n->source_files)) ) { fprintf(d->logfile, "\t%s", f->filename); } fputc('\n', d->logfile); /* Record node outputs to log */ fprintf(d->logfile, "# TARGETS\t%d", n->nodeid); list_first_item(n->target_files); while( (f = list_next_item(n->target_files)) ) { fprintf(d->logfile, "\t%s", f->filename); } fputc('\n', d->logfile); /* Record translated command to log */ fprintf(d->logfile, "# COMMAND\t%d\t%s\n", n->nodeid, n->command); } } dag_count_states(d); // Check for log consistency if(!first_run) { hash_table_firstkey(d->files); while(hash_table_nextkey(d->files, &name, (void **) &f)) { if(dag_file_should_exist(f) && !dag_file_is_source(f) && !(batch_fs_stat(queue, f->filename, &buf) >= 0)){ fprintf(stderr, "makeflow: %s is reported as existing, but does not exist.\n", f->filename); makeflow_log_file_state_change(d, f, DAG_FILE_STATE_UNKNOWN); continue; } if(S_ISDIR(buf.st_mode)) continue; if(dag_file_should_exist(f) && !dag_file_is_source(f) && difftime(buf.st_mtime, f->creation_logged) > 0) { fprintf(stderr, "makeflow: %s is reported as existing, but has been modified (%" SCNu64 " ,%" SCNu64 ").\n", f->filename, (uint64_t)buf.st_mtime, (uint64_t)f->creation_logged); makeflow_clean_file(d, queue, f, 0); makeflow_log_file_state_change(d, f, DAG_FILE_STATE_UNKNOWN); } } } int silent = 0; if(clean_mode != MAKEFLOW_CLEAN_NONE) silent = 1; // Decide rerun tasks if(!first_run) { struct itable *rerun_table = itable_create(0); for(n = d->nodes; n; n = n->next) { makeflow_node_decide_rerun(rerun_table, d, n, silent); } itable_delete(rerun_table); } //Update file reference counts from nodes in log for(n = d->nodes; n; n = n->next) { if(n->state == DAG_NODE_STATE_COMPLETE) { struct dag_file *f; list_first_item(n->source_files); while((f = list_next_item(n->source_files))) f->ref_count += -1; } } }
static batch_job_id_t batch_job_condor_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime) { static FILE *logfile = 0; if(!logfile) { logfile = fopen(q->logfile, "r"); if(!logfile) { debug(D_NOTICE, "couldn't open logfile %s: %s\n", q->logfile, strerror(errno)); return -1; } } while(1) { /* Note: clearerr is necessary to clear any cached end-of-file condition, otherwise some implementations of fgets (i.e. darwin) will read to end of file once and then never look for any more data. */ clearerr(logfile); char line[BATCH_JOB_LINE_MAX]; while(fgets(line, sizeof(line), logfile)) { int type, proc, subproc; batch_job_id_t jobid; time_t current; struct tm tm; struct batch_job_info *info; int logcode, exitcode; if(sscanf(line, "%d (%" SCNbjid ".%d.%d) %d/%d %d:%d:%d", &type, &jobid, &proc, &subproc, &tm.tm_mon, &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec) == 9) { tm.tm_year = 2008 - 1900; tm.tm_isdst = 0; current = mktime(&tm); info = itable_lookup(q->job_table, jobid); if(!info) { info = malloc(sizeof(*info)); memset(info, 0, sizeof(*info)); itable_insert(q->job_table, jobid, info); } debug(D_BATCH, "line: %s", line); if(type == 0) { info->submitted = current; } else if(type == 1) { info->started = current; debug(D_BATCH, "job %" PRIbjid " running now", jobid); } else if(type == 9) { itable_remove(q->job_table, jobid); info->finished = current; info->exited_normally = 0; info->exit_signal = SIGKILL; debug(D_BATCH, "job %" PRIbjid " was removed", jobid); memcpy(info_out, info, sizeof(*info)); free(info); return jobid; } else if(type == 5) { itable_remove(q->job_table, jobid); info->finished = current; fgets(line, sizeof(line), logfile); if(sscanf(line, " (%d) Normal termination (return value %d)", &logcode, &exitcode) == 2) { debug(D_BATCH, "job %" PRIbjid " completed normally with status %d.", jobid, exitcode); info->exited_normally = 1; info->exit_code = exitcode; } else if(sscanf(line, " (%d) Abnormal termination (signal %d)", &logcode, &exitcode) == 2) { debug(D_BATCH, "job %" PRIbjid " completed abnormally with signal %d.", jobid, exitcode); info->exited_normally = 0; info->exit_signal = exitcode; } else { debug(D_BATCH, "job %" PRIbjid " completed with unknown status.", jobid); info->exited_normally = 0; info->exit_signal = 0; } memcpy(info_out, info, sizeof(*info)); free(info); return jobid; } } } if(itable_size(q->job_table) <= 0) return 0; if(stoptime != 0 && time(0) >= stoptime) return -1; if(process_pending()) return -1; sleep(1); } return -1; }
int master_main(const char *host, int port, const char *addr) { time_t idle_stoptime; struct link *master = NULL; int num_workers, i; struct mpi_queue_job **workers; struct itable *active_jobs = itable_create(0); struct itable *waiting_jobs = itable_create(0); struct list *complete_jobs = list_create(); MPI_Comm_size(MPI_COMM_WORLD, &num_workers); workers = malloc(num_workers * sizeof(*workers)); memset(workers, 0, num_workers * sizeof(*workers)); idle_stoptime = time(0) + idle_timeout; while(!abort_flag) { char line[MPI_QUEUE_LINE_MAX]; if(time(0) > idle_stoptime) { if(master) { printf("mpi master: gave up after waiting %ds to receive a task.\n", idle_timeout); } else { printf("mpi master: gave up after waiting %ds to connect to %s port %d.\n", idle_timeout, host, port); } break; } if(!master) { char working_dir[MPI_QUEUE_LINE_MAX]; master = link_connect(addr, port, idle_stoptime); if(!master) { sleep(5); continue; } link_tune(master, LINK_TUNE_INTERACTIVE); link_readline(master, line, sizeof(line), time(0) + active_timeout); memset(working_dir, 0, MPI_QUEUE_LINE_MAX); if(sscanf(line, "workdir %s", working_dir) == 1) { MPI_Bcast(working_dir, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); master = NULL; continue; } } if(link_readline(master, line, sizeof(line), time(0) + short_timeout)) { struct mpi_queue_operation *op; int jobid, mode; INT64_T length; char path[MPI_QUEUE_LINE_MAX]; op = NULL; debug(D_MPI, "received: %s\n", line); if(!strcmp(line, "get results")) { struct mpi_queue_job *job; debug(D_MPI, "results requested: %d available\n", list_size(complete_jobs)); link_putfstring(master, "num results %d\n", time(0) + active_timeout, list_size(complete_jobs)); while(list_size(complete_jobs)) { job = list_pop_head(complete_jobs); link_putfstring(master, "result %d %d %d %lld\n", time(0) + active_timeout, job->jobid, job->status, job->result, job->output_length); if(job->output_length) { link_write(master, job->output, job->output_length, time(0)+active_timeout); } mpi_queue_job_delete(job); } } else if(sscanf(line, "work %d %lld", &jobid, &length)) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_WORK; op->buffer_length = length+1; op->buffer = malloc(length+1); op->buffer[op->buffer_length] = 0; link_read(master, op->buffer, length, time(0) + active_timeout); op->result = -1; } else if(sscanf(line, "stat %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_STAT; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "unlink %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_UNLINK; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "mkdir %d %s %o", &jobid, path, &mode) == 3) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_MKDIR; sprintf(op->args, "%s %o", path, mode); op->result = -1; } else if(sscanf(line, "close %d", &jobid) == 1) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_CLOSE; op->result = -1; // } else if(sscanf(line, "symlink %d %s %s", &jobid, path, filename) == 3) { // } else if(sscanf(line, "put %d %s %lld %o", &jobid, filename, &length, &mode) == 4) { // } else if(sscanf(line, "rget %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "get %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "thirdget %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { // } else if(sscanf(line, "thirdput %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { } else if(!strcmp(line, "exit")) { break; } else { abort_flag = 1; continue; } if(op) { struct mpi_queue_job *job; job = itable_lookup(active_jobs, jobid); if(!job) { job = itable_lookup(waiting_jobs, jobid); } if(!job) { job = malloc(sizeof(*job)); memset(job, 0, sizeof(*job)); job->jobid = jobid; job->operations = list_create(); job->status = MPI_QUEUE_JOB_WAITING; job->worker_rank = -1; itable_insert(waiting_jobs, jobid, job); } list_push_tail(job->operations, op); } idle_stoptime = time(0) + idle_timeout; } else { link_close(master); master = 0; sleep(5); } int num_waiting_jobs = itable_size(waiting_jobs); int num_unvisited_jobs = itable_size(active_jobs); for(i = 1; i < num_workers && (num_unvisited_jobs > 0 || num_waiting_jobs > 0); i++) { struct mpi_queue_job *job; struct mpi_queue_operation *op; int flag = 0; UINT64_T jobid; if(!workers[i]) { if(num_waiting_jobs) { itable_firstkey(waiting_jobs); itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); itable_insert(active_jobs, jobid, job); workers[i] = job; num_waiting_jobs--; job->worker_rank = i; job->status = MPI_QUEUE_JOB_READY; } else { continue; } } else { num_unvisited_jobs--; if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Test(&workers[i]->request, &flag, &workers[i]->mpi_status); if(flag) { op = list_pop_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } workers[i]->status = MPI_QUEUE_JOB_READY; if(op->type == MPI_QUEUE_OP_WORK || op->result < 0) { if(workers[i]->output) free(workers[i]->output); workers[i]->output = op->output_buffer; op->output_buffer = NULL; workers[i]->output_length = op->output_length; workers[i]->result = op->result; if(op->result < 0) { workers[i]->status = MPI_QUEUE_JOB_FAILED | op->type; op->type = MPI_QUEUE_OP_CLOSE; list_push_head(workers[i]->operations, op); op = NULL; } } if(op) { if(op->buffer) free(op->buffer); if(op->output_buffer) free(op->output_buffer); free(op); } } } } if( workers[i]->status != MPI_QUEUE_JOB_BUSY && list_size(workers[i]->operations)) { op = list_peek_head(workers[i]->operations); if(op->type == MPI_QUEUE_OP_CLOSE) { itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); if(!(workers[i]->status & MPI_QUEUE_JOB_FAILED)) workers[i]->status = MPI_QUEUE_JOB_COMPLETE; workers[i] = NULL; i--; continue; } MPI_Send(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); if(op->buffer_length) { MPI_Send(op->buffer, op->buffer_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); free(op->buffer); op->buffer_length = 0; op->buffer = NULL; } MPI_Irecv(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->request); workers[i]->status = MPI_QUEUE_JOB_BUSY; } } } /** Clean up waiting & complete jobs, send Exit commands to each worker */ if(!master) { // If the master link hasn't been set up yet // the workers will be waiting for the working directory char line[MPI_QUEUE_LINE_MAX]; memset(line, 0, MPI_QUEUE_LINE_MAX); MPI_Bcast(line, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); } for(i = 1; i < num_workers; i++) { struct mpi_queue_operation *op, close; memset(&close, 0, sizeof(close)); close.type = MPI_QUEUE_OP_EXIT; if(workers[i]) { if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Wait(&workers[i]->request, &workers[i]->mpi_status); op = list_peek_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } } itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); } MPI_Send(&close, sizeof(close), MPI_BYTE, i, 0, MPI_COMM_WORLD); } itable_firstkey(waiting_jobs); while(itable_size(waiting_jobs)) { struct mpi_queue_job *job; UINT64_T jobid; itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); list_push_tail(complete_jobs, job); } while(list_size(complete_jobs)) { mpi_queue_job_delete(list_pop_head(complete_jobs)); } MPI_Finalize(); return abort_flag; }
static batch_job_id_t batch_job_mesos_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime) { char line[MAX_BUF_SIZE]; FILE *task_state_fp; int last_pos = 0; int curr_pos = 0; int read_len = 0; if(!finished_tasks) { finished_tasks = itable_create(0); } while(access(FILE_TASK_STATE, F_OK) == -1) {} task_state_fp = fopen(FILE_TASK_STATE, "r"); while(1) { char *task_id_str; char *task_stat_str; const char *task_exit_code; int task_id; while(fgets(line, MAX_BUF_SIZE, task_state_fp) != NULL) { curr_pos = ftell(task_state_fp); read_len = curr_pos - last_pos; last_pos = curr_pos; // trim the newline character if (line[read_len-1] == '\n') { line[read_len-1] = '\0'; --read_len; } task_id_str = strtok(line, ","); task_id = atoi(task_id_str); // There is a new task finished if(itable_lookup(finished_tasks, task_id) == NULL) { struct batch_job_info *info = itable_remove(q->job_table, task_id); info->finished = time(0); task_stat_str = strtok(NULL, ","); if (strcmp(task_stat_str, "finished") == 0) { info->exited_normally = 1; } else if (strcmp(task_stat_str, "failed") == 0) { info->exited_normally = 0; task_exit_code = strtok(NULL, ","); // 444 is an arbitrary exit code set in mf_mesos_scheduler, // which means the task failed to retrieve the outpus if(atoi(task_exit_code) == 444) { info->exit_code = 444; debug(D_BATCH, "Task %s failed to retrieve the output.", task_id_str); } info->exit_code = atoi(task_exit_code); } else { info->exited_normally = 0; } memcpy(info_out, info, sizeof(*info)); free(info); fclose(task_state_fp); int itable_val = 1; itable_insert(finished_tasks, task_id, &itable_val); return task_id; } } sleep(1); if(stoptime != 0 && time(0) >= stoptime) { fclose(task_state_fp); return -1; } } }
static void mainloop( struct batch_queue *queue, const char *project_regex, const char *foremen_regex ) { int workers_submitted = 0; struct itable *job_table = itable_create(0); struct list *masters_list = NULL; struct list *foremen_list = NULL; const char *submission_regex = foremen_regex ? foremen_regex : project_regex; while(!abort_flag) { masters_list = work_queue_catalog_query(catalog_host,catalog_port,project_regex); debug(D_WQ,"evaluating master list..."); int workers_needed = count_workers_needed(masters_list, 0); debug(D_WQ,"%d total workers needed across %d masters", workers_needed, masters_list ? list_size(masters_list) : 0); if(foremen_regex) { debug(D_WQ,"evaluating foremen list..."); foremen_list = work_queue_catalog_query(catalog_host,catalog_port,foremen_regex); workers_needed += count_workers_needed(foremen_list, 1); debug(D_WQ,"%d total workers needed across %d foremen",workers_needed,list_size(foremen_list)); } debug(D_WQ,"raw workers needed: %d", workers_needed); if(workers_needed > workers_max) { debug(D_WQ,"applying maximum of %d workers",workers_max); workers_needed = workers_max; } if(workers_needed < workers_min) { debug(D_WQ,"applying minimum of %d workers",workers_min); workers_needed = workers_min; } int new_workers_needed = workers_needed - workers_submitted; debug(D_WQ,"workers needed: %d",workers_needed); debug(D_WQ,"workers in queue: %d",workers_submitted); print_stats(masters_list, foremen_list, workers_submitted, workers_needed, new_workers_needed); if(new_workers_needed>0) { debug(D_WQ,"submitting %d new workers to reach target",new_workers_needed); workers_submitted += submit_workers(queue,job_table,new_workers_needed,submission_regex); } else if(new_workers_needed<0) { debug(D_WQ,"too many workers, will wait for some to exit"); } else { debug(D_WQ,"target number of workers is reached."); } debug(D_WQ,"checking for exited workers..."); time_t stoptime = time(0)+5; while(1) { struct batch_job_info info; batch_job_id_t jobid; jobid = batch_job_wait_timeout(queue,&info,stoptime); if(jobid>0) { if(itable_lookup(job_table,jobid)) { itable_remove(job_table,jobid); debug(D_WQ,"worker job %"PRId64" exited",jobid); workers_submitted--; } else { // it may have been a job from a previous run. } } else { break; } } delete_projects_list(masters_list); delete_projects_list(foremen_list); sleep(30); } remove_all_workers(queue,job_table); itable_delete(job_table); }
static batch_job_id_t batch_job_amazon_batch_wait(struct batch_queue *q, struct batch_job_info *info_out, time_t stoptime){ struct internal_amazon_batch_amazon_ids amazon_ids = initialize(q); //succeeded check int done = 0; char* env_var = amazon_ids.master_env_prefix; itable_firstkey(amazon_job_ids); char* jaid; UINT64_T jobid; while(itable_nextkey(amazon_job_ids,&jobid,(void**)&jaid)){ done = describe_aws_job(jaid,env_var); char* jobname = string_format("%s_%u",queue_name,(unsigned int)jobid); unsigned int id = (unsigned int)jobid; if(done == DESCRIBE_AWS_JOB_SUCCESS){ if(itable_lookup(done_jobs,id+1) == NULL){ //id is done, returning here debug(D_BATCH,"Inserting id: %u into done_jobs",id); itable_insert(done_jobs,id+1,jobname); itable_remove(amazon_job_ids,jobid); //pull files from s3 char* output_files = itable_lookup(done_files,id); struct list* file_list = extract_file_names_from_list(output_files); if(list_size(file_list)> 0){ list_first_item(file_list); char* cur_file = NULL; while((cur_file=list_next_item(file_list)) != NULL){ debug(D_BATCH,"Copying over %s",cur_file); char* get_from_s3_cmd = string_format("%s aws s3 cp s3://%s/%s.txz ./%s.txz && tar -xvf %s.txz && rm %s.txz",env_var,bucket_name,cur_file,cur_file, cur_file, cur_file); int outputcode = sh_system(get_from_s3_cmd); debug(D_BATCH,"output code from calling S3 to pull file %s: %i",cur_file,outputcode); FILE* tmpOut = fopen(cur_file,"r"); if(tmpOut){ debug(D_BATCH,"File does indeed exist: %s",cur_file); fclose(tmpOut); }else{ debug(D_BATCH,"File doesn't exist: %s",cur_file); } free(get_from_s3_cmd); } } list_free(file_list); list_delete(file_list); //Let Makeflow know we're all done! debug(D_BATCH,"Removing the job from the job_table"); struct batch_job_info* info = itable_remove(q->job_table, id);//got from batch_job_amazon.c info->finished = time(0);//get now info->exited_normally=1; info->exit_code=finished_aws_job_exit_code(jaid,env_var); debug(D_BATCH,"copying over the data to info_out"); memcpy(info_out, info, sizeof(struct batch_job_info)); free(info); char* jobdef = aws_job_def(jaid); del_job_def(jobdef); free(jobdef); return id; } }else if(done == DESCRIBE_AWS_JOB_FAILED || done == DESCRIBE_AWS_JOB_NON_EXIST){ if(itable_lookup(done_jobs,id+1)==NULL){ //id is done, returning here itable_insert(done_jobs,id+1,jobname); itable_remove(amazon_job_ids,jobid); debug(D_BATCH,"Failed job: %i",id); struct batch_job_info* info = itable_remove(q->job_table, id);//got from batch_job_amazon.c info->finished = time(0); //get now info->exited_normally=0; int exc = finished_aws_job_exit_code(jaid,env_var); info->exit_code= exc == 0 ? -1 : exc; memcpy(info_out, info, sizeof(*info)); free(info); char* jobdef = aws_job_def(jaid); del_job_def(jobdef); free(jobdef); return id; } }else{ continue; } } return -1; }
static void mainloop( struct batch_queue *queue ) { int workers_submitted = 0; struct itable *job_table = itable_create(0); struct list *masters_list = NULL; struct list *foremen_list = NULL; int64_t factory_timeout_start = time(0); while(!abort_flag) { if(config_file && !read_config_file(config_file)) { debug(D_NOTICE, "Error re-reading '%s'. Using previous values.", config_file); } else { set_worker_resources_options( queue ); batch_queue_set_option(queue, "autosize", autosize ? "yes" : NULL); } submission_regex = foremen_regex ? foremen_regex : project_regex; if(using_catalog) { masters_list = work_queue_catalog_query(catalog_host,catalog_port,project_regex); } else { masters_list = do_direct_query(master_host,master_port); } if(masters_list && list_size(masters_list) > 0) { factory_timeout_start = time(0); } else { // check to see if factory timeout is triggered, factory timeout will be 0 if flag isn't set if(factory_timeout > 0) { if(time(0) - factory_timeout_start > factory_timeout) { fprintf(stderr, "There have been no masters for longer then the factory timeout, exiting\n"); abort_flag=1; break; } } } debug(D_WQ,"evaluating master list..."); int workers_needed = count_workers_needed(masters_list, 0); int workers_connected = count_workers_connected(masters_list); debug(D_WQ,"%d total workers needed across %d masters", workers_needed, masters_list ? list_size(masters_list) : 0); if(foremen_regex) { debug(D_WQ,"evaluating foremen list..."); foremen_list = work_queue_catalog_query(catalog_host,catalog_port,foremen_regex); /* add workers on foremen. Also, subtract foremen from workers * connected, as they were not deployed by the pool. */ workers_needed += count_workers_needed(foremen_list, 1); workers_connected += MAX(count_workers_connected(foremen_list) - list_size(foremen_list), 0); debug(D_WQ,"%d total workers needed across %d foremen",workers_needed,list_size(foremen_list)); } debug(D_WQ,"raw workers needed: %d", workers_needed); if(workers_needed > workers_max) { debug(D_WQ,"applying maximum of %d workers",workers_max); workers_needed = workers_max; } if(workers_needed < workers_min) { debug(D_WQ,"applying minimum of %d workers",workers_min); workers_needed = workers_min; } int new_workers_needed = workers_needed - workers_submitted; if(workers_per_cycle > 0 && new_workers_needed > workers_per_cycle) { debug(D_WQ,"applying maximum workers per cycle of %d",workers_per_cycle); new_workers_needed = workers_per_cycle; } if(workers_per_cycle > 0 && workers_submitted > new_workers_needed + workers_connected) { debug(D_WQ,"waiting for %d previously submitted workers to connect", workers_submitted - workers_connected); new_workers_needed = 0; } debug(D_WQ,"workers needed: %d", workers_needed); debug(D_WQ,"workers submitted: %d", workers_submitted); debug(D_WQ,"workers requested: %d", new_workers_needed); print_stats(masters_list, foremen_list, workers_submitted, workers_needed, new_workers_needed, workers_connected); update_blacklisted_workers(queue, masters_list); if(new_workers_needed>0) { debug(D_WQ,"submitting %d new workers to reach target",new_workers_needed); workers_submitted += submit_workers(queue,job_table,new_workers_needed); } else if(new_workers_needed<0) { debug(D_WQ,"too many workers, will wait for some to exit"); } else { debug(D_WQ,"target number of workers is reached."); } debug(D_WQ,"checking for exited workers..."); time_t stoptime = time(0)+5; while(1) { struct batch_job_info info; batch_job_id_t jobid; jobid = batch_job_wait_timeout(queue,&info,stoptime); if(jobid>0) { if(itable_lookup(job_table,jobid)) { itable_remove(job_table,jobid); debug(D_WQ,"worker job %"PRId64" exited",jobid); workers_submitted--; } else { // it may have been a job from a previous run. } } else { break; } } delete_projects_list(masters_list); delete_projects_list(foremen_list); sleep(factory_period); } remove_all_workers(queue,job_table); itable_delete(job_table); }
void makeflow_wrapper_generate_files( struct batch_task *task, struct list *input, struct list *output, struct dag_node *n, struct makeflow_wrapper *w) { char *f; char *nodeid = string_format("%d",n->nodeid); list_first_item(input); while((f = list_next_item(input))) { char *filename = string_replace_percents(f, nodeid); char *f = xxstrdup(filename); free(filename); char *remote, *p; struct dag_file *file; p = strchr(f, '='); if(p) { *p = 0; file = dag_file_lookup_or_create(n->d, f); if(!n->local_job && !itable_lookup(w->remote_names, (uintptr_t) file)){ remote = xxstrdup(p+1); itable_insert(w->remote_names, (uintptr_t) file, (void *)remote); hash_table_insert(w->remote_names_inv, remote, (void *)file); makeflow_hook_add_input_file(n->d, task, f, remote, file->type); } else { makeflow_hook_add_output_file(n->d, task, f, NULL, file->type); } *p = '='; } else { file = dag_file_lookup_or_create(n->d, f); makeflow_hook_add_input_file(n->d, task, f, NULL, file->type); } free(f); } list_first_item(output); while((f = list_next_item(output))) { char *filename = string_replace_percents(f, nodeid); char *f = xxstrdup(filename); free(filename); char *remote, *p; struct dag_file *file; p = strchr(f, '='); if(p) { *p = 0; file = dag_file_lookup_or_create(n->d, f); if(!n->local_job && !itable_lookup(w->remote_names, (uintptr_t) file)){ remote = xxstrdup(p+1); itable_insert(w->remote_names, (uintptr_t) file, (void *)remote); hash_table_insert(w->remote_names_inv, remote, (void *)file); makeflow_hook_add_output_file(n->d, task, f, remote, file->type); } else { makeflow_hook_add_output_file(n->d, task, f, NULL, file->type); } *p = '='; } else { file = dag_file_lookup_or_create(n->d, f); makeflow_hook_add_output_file(n->d, task, f, NULL, file->type); } free(f); } free(nodeid); }