Beispiel #1
0
Datei: pi.c Projekt: ianliu/spitz
int spits_job_manager_next_task(void *user_data, struct byte_array *ba)
{
	struct pi_jm *self = user_data;
	if (self->numpoints == 0)
		return 0;

	double x = drand48();
	double y = drand48();
	byte_array_pack64(ba, x);
	byte_array_pack64(ba, y);
	self->numpoints--;

	return 1;
}
Beispiel #2
0
// Function responsible for the workers on current TM node.
void *worker(void *ptr)
{
    int my_rank = COMM_get_rank_id(); 
    int task_id, j_id=0;                                        // j_id = journal id for current thread.
    struct tm_thread_data *d = (struct tm_thread_data *) ptr;
    struct byte_array * task;
    struct result_node * result;
    uint64_t buffer;
    struct j_entry * entry;

    workerid = d->id;

    void* (*worker_new) (int, char **);
    worker_new = dlsym(d->handle, "spits_worker_new");

    void (*execute_pit) (void *, struct byte_array *, struct byte_array *);
    execute_pit = dlsym(d->handle, "spits_worker_run");

    void* (*worker_free) (void *);
    worker_free = dlsym(d->handle, "spits_worker_free");

    void *user_data = worker_new ? worker_new(d->argc, d->argv) : NULL;

    if(TM_KEEP_JOURNAL > 0) {
        j_id = JOURNAL_get_id(d->dia, 'W');
    }

    sem_wait (&d->tcount);                                      // wait for the first task to arrive.
    while (d->running) {
        pthread_mutex_lock(&d->tlock);                          // Get a new task.
        cfifo_pop(&d->f, &task);
        pthread_mutex_unlock(&d->tlock);

        // Warn the Task Manager about the new space available.
        sem_post(&d->sem);

        byte_array_unpack64(task, &buffer);
        task_id = (int) buffer;
        debug("[worker] Received TASK %d", task_id);
        
        //_byte_array_pack64(task, (uint64_t) task_id);           // Put it back, might use in execute_pit.
        result = (struct result_node *) malloc(sizeof(struct result_node));
        byte_array_init(&result->ba, 10);
        byte_array_pack64(&result->ba, task_id);                // Pack the ID in the result byte_array.
        byte_array_pack64(&result->ba, my_rank);

        if(TM_KEEP_JOURNAL > 0) {
            entry = JOURNAL_new_entry(d->dia, j_id);
            entry->action = 'P';
            gettimeofday(&entry->start, NULL);
        }

        debug("[--WORKER] task: %d", task);
        debug("[--WORKER] &result->ba: %d", &result->ba);
        execute_pit(user_data, task, &result->ba);              // Do the computation.

        if(TM_KEEP_JOURNAL > 0) {
            gettimeofday(&entry->end, NULL);
        }

        byte_array_free(task);                                  // Free memory used in task and pointer.
        free(task);                                             // For now, each pointer is allocated in master thread.

        debug("Appending task %d.", task_id);
        pthread_mutex_lock(&d->rlock);                          // Pack the result to send it later.
        result->next = d->results; 
        result->before = NULL;
        result->task_id = task_id;
        if(d->results != NULL) {
            d->results->before = result;
        }
        d->results = result;
        
        if(d->is_blocking_flush==1) {
            if(TM_NO_WAIT_FINAL_FLUSH > 0) {
                sem_post(&d->no_wait_sem);
            }
            else {
                d->bf_remaining_tasks--;
                if(d->bf_remaining_tasks==0) {
                    pthread_mutex_unlock(&d->bf_mutex);
                }
            }
        }
            
        pthread_mutex_unlock(&d->rlock);

        sem_wait (&d->tcount);                                  // wait for the next task to arrive.
    }

    if (worker_free) {
        worker_free(user_data);
    }

    //free(result);
    pthread_exit(NULL);
}
Beispiel #3
0
/* Send results to the committer, blocking or not.
 * Returns the number of tasks sent or -1 if found a connection problem. */
int flush_results(struct tm_thread_data *d, int min_results, enum blocking b, int j_id)
{
    int i, temp, len = 0;
    uint64_t buffer;
    struct result_node *aux, *n = d->results;
    struct j_entry * entry;
    struct byte_array * perm = NULL;
    enum message_type mtype;

    if(n) {
        len++;
        for (aux = n; aux->next; aux = aux->next) {
            len++;
        }
    }

    if (len < min_results && b == NONBLOCKING) {
        return 0;
    }

    else { 

        if(TM_ASK_TO_SEND_RESULT>0) {
            perm = (struct byte_array *) malloc (sizeof(struct byte_array));
            byte_array_init(perm , 10);
        }

        if (len >= min_results && b == NONBLOCKING) {

            /* DEBUG
            int i=0;
            for (i=0; i<max_clients; i++) {
               if(COMM_client_socket[i] == socket_manager) {
                   COMM_client_socket[i] = 0;
                   close(socket_manager);
               } 
            }
            int tm_retries = 3;
            if(COMM_connect_to_job_manager(COMM_addr_manager, &tm_retries)!=0) {
                info("Couldn't reconnect to the Job Manager. Closing Task Manager.");
            }
            else {
                info("Reconnected to the Job Manager.");
            }
             */  

            len = 0;
            while(aux) {
                pthread_mutex_lock(&d->rlock);
                n = aux;
                if(aux->before != NULL) {
                    aux->before->next = NULL;
                    aux = aux->before;
                }
                else {
                    d->results = NULL;
                    aux = NULL;
                }
                pthread_mutex_unlock(&d->rlock);

                if(TM_KEEP_JOURNAL > 0) {
                    entry = JOURNAL_new_entry(d->dia, j_id);
                    entry->action = 'S';
                    gettimeofday(&entry->start, NULL);
                }

                if(TM_ASK_TO_SEND_RESULT > 0) {
                    //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID : %d", n->task_id);
                    byte_array_clear(perm);
                    buffer = (uint64_t) n->task_id;
                    //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID(uint64_t) %" PRIu64 "\n", buffer);
                    byte_array_pack64(perm, buffer);
                    if(COMM_send_message(perm, MSG_OFFER_RESULT, socket_committer)<0) {
                        if(TM_KEEP_JOURNAL > 0) {
                            gettimeofday(&entry->end, NULL);

                            if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                error("Dumping VM journal");
                                vm_dump_journal(d);
                            }
                        }

                        error("Problem to send result to committer. Aborting flush_results.");

                        byte_array_free(perm);
                        free(perm);
                        return -1;
                    }

                    byte_array_clear(perm);
                    if(COMM_read_message(perm, &mtype, socket_committer)<0) {
                        if(TM_KEEP_JOURNAL > 0) {
                            gettimeofday(&entry->end, NULL);

                            if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                error("Dumping VM journal");
                                vm_dump_journal(d);
                            }
                        }

                        error("Problem receiving data from committer. Aborting flush_results.");

                        byte_array_free(perm);
                        free(perm);
                        return -1;
                    }
                    
                    byte_array_unpack64(perm, &buffer);
                    temp = (int) buffer;
                    //debug("TM_ASK_TO_SEND_RESULT -> temp: %d", temp);

                    if(temp > 0) {
                        if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) {
                            if(TM_KEEP_JOURNAL > 0) {
                                gettimeofday(&entry->end, NULL);

                                if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                    error("Dumping VM journal");
                                    vm_dump_journal(d);
                                }
                            }

                            error("Problem sending result to committer. Aborting flush_results.");

                            byte_array_free(perm);
                            free(perm);
                            return -1;
                        }
                    }

                }
                else {
                    if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) {
                        if(TM_KEEP_JOURNAL > 0) {
                            gettimeofday(&entry->end, NULL);

                            if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                error("Dumping VM journal");
                                vm_dump_journal(d);
                            }
                        }

                        error("Problem sending result to committer. Aborting flush_results.");

                        return -1;
                    }
                }

                if(TM_KEEP_JOURNAL > 0) {
                    gettimeofday(&entry->end, NULL);
                }

                byte_array_free(&n->ba);
                free(n);
            len++;
            }

            if(TM_ASK_TO_SEND_RESULT > 0) {
                byte_array_free(perm);
                free(perm);
            }

            return len;
        }

        else if (b == BLOCKING) {
            // Optional optimization. Will flush everything it got, as soon it arrives at the end.
            if(TM_NO_WAIT_FINAL_FLUSH > 0) {

                // If it's blocking and not yet complete.
                len = 0;
                n = d->results;

                pthread_mutex_lock(&d->rlock); 
                
                // Count and get a pointer to the last (older) result.
                if(n) {
                    len++;
                    for (aux = n; aux->next; aux = aux->next) {
                        len++;
                    }
                }
                d->is_blocking_flush=1;
                d->bf_remaining_tasks = min_results - len;
                pthread_mutex_unlock(&d->rlock);

                // Will send everyone.
                for(i=0; i<min_results; i++) {
                    // But first the ones already here (i < len). Don't have to wait. Then, wait (i >= len)
                    if(i >= len) {
                        sem_wait(&d->no_wait_sem);
                        for (aux = d->results; aux->next; aux = aux->next);
                    }

                    // Send message and update list, all standard.
                    n = aux;

                    if(TM_KEEP_JOURNAL > 0) {
                        entry = JOURNAL_new_entry(d->dia, j_id);
                        entry->action = 'S';
                        gettimeofday(&entry->start, NULL);
                    }

                    if(TM_ASK_TO_SEND_RESULT > 0) {
                        //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID : %d", n->task_id);
                        buffer = (uint64_t) n->task_id;
                        byte_array_clear(perm);
                        //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID(uint64_t) %" PRIu64 "\n", buffer);
                        byte_array_pack64(perm, buffer);
                        if(COMM_send_message(perm, MSG_OFFER_RESULT, socket_committer)<0) {
                            if(TM_KEEP_JOURNAL > 0) {
                                gettimeofday(&entry->end, NULL);

                                if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                    error("Dumping VM journal");
                                    vm_dump_journal(d);
                                }
                            }

                            error("Problem to send result to committer. Aborting flush_results.");

                            byte_array_free(perm);
                            free(perm);
                            return -1;
                        }

                        byte_array_clear(perm);
                        if(COMM_read_message(perm, &mtype, socket_committer)<0) {
                            if(TM_KEEP_JOURNAL > 0) {
                                gettimeofday(&entry->end, NULL);

                                if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                    error("Dumping VM journal");
                                    vm_dump_journal(d);
                                }
                            }

                            error("Problem receiving data from committer. Aborting flush_results.");

                            byte_array_free(perm);
                            free(perm);
                            return -1;
                        }
                        
                        byte_array_unpack64(perm, &buffer);
                        temp = (int) buffer;
                        //debug("TM_ASK_TO_SEND_RESULT -> temp : %d", temp);

                        if(temp > 0) {

                            if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) {
                                if(TM_KEEP_JOURNAL > 0) {
                                    gettimeofday(&entry->end, NULL);

                                    if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                        error("Dumping VM journal");
                                        vm_dump_journal(d);
                                    }
                                }

                                error("Problem sending result to committer. Aborting flush_results.");

                                byte_array_free(perm);
                                free(perm);
                                return -1;
                            }
                        }

                    }
                    else {
                        if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) {
                            if(TM_KEEP_JOURNAL > 0) {
                                gettimeofday(&entry->end, NULL);

                                if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                    error("Dumping VM journal");
                                    vm_dump_journal(d);
                                }
                            }

                            error("Problem sending result to committer. Aborting flush_results.");

                            return -1;
                        }
                    }

                    if(TM_KEEP_JOURNAL > 0) {
                        gettimeofday(&entry->end, NULL);
                    }

                    pthread_mutex_lock(&d->rlock);
                    if(aux->before != NULL) {
                        aux->before->next = NULL;
                        aux = aux->before;
                    }
                    else {
                        d->results = NULL;
                        aux = NULL;
                    }
                    pthread_mutex_unlock(&d->rlock);
                    byte_array_free(&n->ba);
                    free(n);
                }

            }
            else {
                if(len<min_results) {
                    // If it's blocking and not yet complete.
                    len = 0;
                    n = d->results;

                    pthread_mutex_lock(&d->rlock); 
                    for (aux = n; aux; aux = aux->next) {
                        len++;
                    }
                    d->is_blocking_flush=1;
                    d->bf_remaining_tasks = min_results - len;
                    pthread_mutex_unlock(&d->rlock);

                    pthread_mutex_lock(&d->bf_mutex);
                }
                
                for (aux = d->results; aux->next; aux = aux->next);

                len = 0;
                while(aux) {
                    pthread_mutex_lock(&d->rlock);
                    n = aux;
                    if(aux->before != NULL) {
                        aux->before->next = NULL;
                        aux = aux->before;
                    }
                    else {
                        d->results = NULL;
                        aux = NULL;
                    }
                    pthread_mutex_unlock(&d->rlock);

                    if(TM_KEEP_JOURNAL > 0) {
                        entry = JOURNAL_new_entry(d->dia, j_id);
                        entry->action = 'S';
                        gettimeofday(&entry->start, NULL);
                    }

                    if(TM_ASK_TO_SEND_RESULT > 0) {
                        //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID : %d", n->task_id);
                        buffer = (uint64_t) n->task_id;
                        byte_array_clear(perm);
                        //debug("TM_ASK_TO_SEND_RESULT -> TASK_ID(uint64_t) %" PRIu64 "\n", buffer);
                        byte_array_pack64(perm, buffer);
                        if(COMM_send_message(perm, MSG_OFFER_RESULT, socket_committer)<0) {
                            if(TM_KEEP_JOURNAL > 0) {
                                gettimeofday(&entry->end, NULL);

                                if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                    error("Dumping VM journal");
                                    vm_dump_journal(d);
                                }
                            }

                            error("Problem to send result to committer. Aborting flush_results.");

                            byte_array_free(perm);
                            free(perm);
                            return -1;
                        }

                        byte_array_clear(perm);
                        if(COMM_read_message(perm, &mtype, socket_committer)<0) {
                            if(TM_KEEP_JOURNAL > 0) {
                                gettimeofday(&entry->end, NULL);

                                if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                    error("Dumping VM journal");
                                    vm_dump_journal(d);
                                }
                            }

                            error("Problem receiving data from committer. Aborting flush_results.");

                            byte_array_free(perm);
                            free(perm);
                            return -1;
                        }
                        
                        byte_array_unpack64(perm, &buffer);
                        temp = (int) buffer;
                        //debug("TM_ASK_TO_SEND_RESULT -> temp : %d", temp);

                        if(temp > 0) {

                            if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) {
                                if(TM_KEEP_JOURNAL > 0) {
                                    gettimeofday(&entry->end, NULL);

                                    if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                        error("Dumping VM journal");
                                        vm_dump_journal(d);
                                    }
                                }

                                error("Problem sending result to committer. Aborting flush_results.");

                                byte_array_free(perm);
                                free(perm);
                                return -1;
                            }
                        }

                    }
                    else {
                        if(COMM_send_message(&n->ba, MSG_RESULT, socket_committer)<0) {
                            if(TM_KEEP_JOURNAL > 0) {
                                gettimeofday(&entry->end, NULL);

                                if(COMM_get_actor_type() == VM_TASK_MANAGER) {
                                    error("Dumping VM journal");
                                    vm_dump_journal(d);
                                }
                            }

                            error("Problem sending result to committer. Aborting flush_results.");

                            return -1;
                        }
                    }

                    if(TM_KEEP_JOURNAL > 0) {
                        gettimeofday(&entry->end, NULL);
                    }

                    byte_array_free(&n->ba);
                    free(n);
                len++;
                }
            }

            if(TM_ASK_TO_SEND_RESULT > 0) {
                byte_array_free(perm);
                free(perm);
            }
            return len;
        }

        if(TM_ASK_TO_SEND_RESULT > 0) {
            byte_array_free(perm);
            free(perm);
        }
    }

    return 0;
}