int spits_job_manager_next_task(void *user_data, struct byte_array *ba) { struct pi_jm *self = user_data; if (self->numpoints == 0) return 0; double x = drand48(); double y = drand48(); byte_array_pack64(ba, x); byte_array_pack64(ba, y); self->numpoints--; return 1; }
// Function responsible for the workers on current TM node. void *worker(void *ptr) { int my_rank = COMM_get_rank_id(); int task_id, j_id=0; // j_id = journal id for current thread. struct tm_thread_data *d = (struct tm_thread_data *) ptr; struct byte_array * task; struct result_node * result; uint64_t buffer; struct j_entry * entry; workerid = d->id; void* (*worker_new) (int, char **); worker_new = dlsym(d->handle, "spits_worker_new"); void (*execute_pit) (void *, struct byte_array *, struct byte_array *); execute_pit = dlsym(d->handle, "spits_worker_run"); void* (*worker_free) (void *); worker_free = dlsym(d->handle, "spits_worker_free"); void *user_data = worker_new ? worker_new(d->argc, d->argv) : NULL; if(TM_KEEP_JOURNAL > 0) { j_id = JOURNAL_get_id(d->dia, 'W'); } sem_wait (&d->tcount); // wait for the first task to arrive. while (d->running) { pthread_mutex_lock(&d->tlock); // Get a new task. cfifo_pop(&d->f, &task); pthread_mutex_unlock(&d->tlock); // Warn the Task Manager about the new space available. sem_post(&d->sem); byte_array_unpack64(task, &buffer); task_id = (int) buffer; debug("[worker] Received TASK %d", task_id); //_byte_array_pack64(task, (uint64_t) task_id); // Put it back, might use in execute_pit. result = (struct result_node *) malloc(sizeof(struct result_node)); byte_array_init(&result->ba, 10); byte_array_pack64(&result->ba, task_id); // Pack the ID in the result byte_array. byte_array_pack64(&result->ba, my_rank); if(TM_KEEP_JOURNAL > 0) { entry = JOURNAL_new_entry(d->dia, j_id); entry->action = 'P'; gettimeofday(&entry->start, NULL); } debug("[--WORKER] task: %d", task); debug("[--WORKER] &result->ba: %d", &result->ba); execute_pit(user_data, task, &result->ba); // Do the computation. if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); } byte_array_free(task); // Free memory used in task and pointer. free(task); // For now, each pointer is allocated in master thread. debug("Appending task %d.", task_id); pthread_mutex_lock(&d->rlock); // Pack the result to send it later. result->next = d->results; result->before = NULL; result->task_id = task_id; if(d->results != NULL) { d->results->before = result; } d->results = result; if(d->is_blocking_flush==1) { if(TM_NO_WAIT_FINAL_FLUSH > 0) { sem_post(&d->no_wait_sem); } else { d->bf_remaining_tasks--; if(d->bf_remaining_tasks==0) { pthread_mutex_unlock(&d->bf_mutex); } } } pthread_mutex_unlock(&d->rlock); sem_wait (&d->tcount); // wait for the next task to arrive. } if (worker_free) { worker_free(user_data); } //free(result); pthread_exit(NULL); }
/* Send results to the committer, blocking or not. * Returns the number of tasks sent or -1 if found a connection problem. */ int flush_results(struct tm_thread_data *d, int min_results, enum blocking b, int j_id) { int i, temp, len = 0; uint64_t buffer; struct result_node *aux, *n = d->results; struct j_entry * entry; struct byte_array * perm = NULL; enum message_type mtype; if(n) { len++; for (aux = n; aux->next; aux = aux->next) { len++; } } if (len < min_results && b == NONBLOCKING) { return 0; } else { if(TM_ASK_TO_SEND_RESULT>0) { perm = (struct byte_array *) malloc (sizeof(struct byte_array)); byte_array_init(perm , 10); } if (len >= min_results && b == NONBLOCKING) { /* DEBUG int i=0; for (i=0; i<max_clients; i++) { if(COMM_client_socket[i] == socket_manager) { COMM_client_socket[i] = 0; close(socket_manager); } } int tm_retries = 3; if(COMM_connect_to_job_manager(COMM_addr_manager, &tm_retries)!=0) { info("Couldn't reconnect to the Job Manager. Closing Task Manager."); } else { info("Reconnected to the Job Manager."); } */ len = 0; while(aux) { pthread_mutex_lock(&d->rlock); n = aux; if(aux->before != NULL) { aux->before->next = NULL; aux = aux->before; } else { d->results = NULL; aux = NULL; } pthread_mutex_unlock(&d->rlock); if(TM_KEEP_JOURNAL > 0) { entry = JOURNAL_new_entry(d->dia, j_id); entry->action = 'S'; gettimeofday(&entry->start, NULL); } if(TM_ASK_TO_SEND_RESULT > 0) { //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID : %d", n->task_id); byte_array_clear(perm); buffer = (uint64_t) n->task_id; //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID(uint64_t) %" PRIu64 "\n", buffer); byte_array_pack64(perm, buffer); if(COMM_send_message(perm, MSG_OFFER_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem to send result to committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } byte_array_clear(perm); if(COMM_read_message(perm, &mtype, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem receiving data from committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } byte_array_unpack64(perm, &buffer); temp = (int) buffer; //debug("TM_ASK_TO_SEND_RESULT -> temp: %d", temp); if(temp > 0) { if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem sending result to committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } } } else { if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem sending result to committer. Aborting flush_results."); return -1; } } if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); } byte_array_free(&n->ba); free(n); len++; } if(TM_ASK_TO_SEND_RESULT > 0) { byte_array_free(perm); free(perm); } return len; } else if (b == BLOCKING) { // Optional optimization. Will flush everything it got, as soon it arrives at the end. if(TM_NO_WAIT_FINAL_FLUSH > 0) { // If it's blocking and not yet complete. len = 0; n = d->results; pthread_mutex_lock(&d->rlock); // Count and get a pointer to the last (older) result. if(n) { len++; for (aux = n; aux->next; aux = aux->next) { len++; } } d->is_blocking_flush=1; d->bf_remaining_tasks = min_results - len; pthread_mutex_unlock(&d->rlock); // Will send everyone. for(i=0; i<min_results; i++) { // But first the ones already here (i < len). Don't have to wait. Then, wait (i >= len) if(i >= len) { sem_wait(&d->no_wait_sem); for (aux = d->results; aux->next; aux = aux->next); } // Send message and update list, all standard. n = aux; if(TM_KEEP_JOURNAL > 0) { entry = JOURNAL_new_entry(d->dia, j_id); entry->action = 'S'; gettimeofday(&entry->start, NULL); } if(TM_ASK_TO_SEND_RESULT > 0) { //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID : %d", n->task_id); buffer = (uint64_t) n->task_id; byte_array_clear(perm); //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID(uint64_t) %" PRIu64 "\n", buffer); byte_array_pack64(perm, buffer); if(COMM_send_message(perm, MSG_OFFER_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem to send result to committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } byte_array_clear(perm); if(COMM_read_message(perm, &mtype, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem receiving data from committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } byte_array_unpack64(perm, &buffer); temp = (int) buffer; //debug("TM_ASK_TO_SEND_RESULT -> temp : %d", temp); if(temp > 0) { if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem sending result to committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } } } else { if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem sending result to committer. Aborting flush_results."); return -1; } } if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); } pthread_mutex_lock(&d->rlock); if(aux->before != NULL) { aux->before->next = NULL; aux = aux->before; } else { d->results = NULL; aux = NULL; } pthread_mutex_unlock(&d->rlock); byte_array_free(&n->ba); free(n); } } else { if(len<min_results) { // If it's blocking and not yet complete. len = 0; n = d->results; pthread_mutex_lock(&d->rlock); for (aux = n; aux; aux = aux->next) { len++; } d->is_blocking_flush=1; d->bf_remaining_tasks = min_results - len; pthread_mutex_unlock(&d->rlock); pthread_mutex_lock(&d->bf_mutex); } for (aux = d->results; aux->next; aux = aux->next); len = 0; while(aux) { pthread_mutex_lock(&d->rlock); n = aux; if(aux->before != NULL) { aux->before->next = NULL; aux = aux->before; } else { d->results = NULL; aux = NULL; } pthread_mutex_unlock(&d->rlock); if(TM_KEEP_JOURNAL > 0) { entry = JOURNAL_new_entry(d->dia, j_id); entry->action = 'S'; gettimeofday(&entry->start, NULL); } if(TM_ASK_TO_SEND_RESULT > 0) { //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID : %d", n->task_id); buffer = (uint64_t) n->task_id; byte_array_clear(perm); //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID(uint64_t) %" PRIu64 "\n", buffer); byte_array_pack64(perm, buffer); if(COMM_send_message(perm, MSG_OFFER_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem to send result to committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } byte_array_clear(perm); if(COMM_read_message(perm, &mtype, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem receiving data from committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } byte_array_unpack64(perm, &buffer); temp = (int) buffer; //debug("TM_ASK_TO_SEND_RESULT -> temp : %d", temp); if(temp > 0) { if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem sending result to committer. Aborting flush_results."); byte_array_free(perm); free(perm); return -1; } } } else { if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) { if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); if(COMM_get_actor_type() == VM_TASK_MANAGER) { error("Dumping VM journal"); vm_dump_journal(d); } } error("Problem sending result to committer. Aborting flush_results."); return -1; } } if(TM_KEEP_JOURNAL > 0) { gettimeofday(&entry->end, NULL); } byte_array_free(&n->ba); free(n); len++; } } if(TM_ASK_TO_SEND_RESULT > 0) { byte_array_free(perm); free(perm); } return len; } if(TM_ASK_TO_SEND_RESULT > 0) { byte_array_free(perm); free(perm); } } return 0; }