static void identity_reduce(void *key, void **vals, int vals_len) { int i; for (i = 0; i < vals_len; i++) { emit_inline(key, vals[i]); } }
void emit(void *key, void *val) { emit_inline(key, val); }
/** schedule_tasks() * thread_func - function pointer to process splitter data * splitter_func - splitter function pointer * splitter_init - splitter_init function pointer * runs map tasks in a new thread on each the available processors. * returns pointer intermediate value array */ static inline void schedule_tasks(thread_wrapper_arg_t *th_arg) { assert(th_arg); pthread_attr_t attr; // parameter for pthread creation thread_wrapper_arg_t * curr_th_arg; // arg for thread_wrapper() int thread_cnt; // counter of number threads assigned assigned int curr_proc; int curr_thread; int num_threads = getNumTaskThreads(th_arg->func_type); int threads_per_proc = num_threads / g_state.num_procs; int threads_mod_procs = num_threads % g_state.num_procs; int pos = 0; // position of next result in the array pthread_mutex_t splitter_lock; // lock for splitter function g_state.tinfo = (thread_info_t *)CALLOC(num_threads, sizeof(thread_info_t)); CHECK_ERROR(pthread_mutex_init(&splitter_lock, NULL) != 0); dprintf("Number of available processors = %d\n", g_state.num_procs); dprintf("Number of Threads to schedule = %d per(%d) mod(%d)\n", num_threads, threads_per_proc, threads_mod_procs); th_arg->pos = &pos; th_arg->splitter_lock = &splitter_lock; // thread must be scheduled systemwide pthread_attr_init(&attr); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); #ifdef _LINUX_ unsigned long cpu_set; // bit array of available processors // Create a thread for each availble processor to handle the split data CHECK_ERROR(sched_getaffinity(0, sizeof(cpu_set), &cpu_set) == -1); for (thread_cnt = curr_proc = 0; curr_proc < sizeof(cpu_set) && thread_cnt < num_threads; curr_proc++) { if (isCpuAvailable(cpu_set, curr_proc)) { #endif #ifdef _SOLARIS_ int max_procs = sysconf(_SC_NPROCESSORS_ONLN); for (thread_cnt = curr_proc = 0; thread_cnt < num_threads; curr_proc++) { if (P_ONLINE == p_online(curr_proc, P_STATUS)) { #endif for (curr_thread = !(threads_mod_procs-- > 0); curr_thread <= threads_per_proc && thread_cnt < num_threads; curr_thread++, thread_cnt++) { // Setup data to be passed to each thread curr_th_arg = (thread_wrapper_arg_t*)MALLOC(sizeof(thread_wrapper_arg_t)); memcpy(curr_th_arg, th_arg, sizeof(thread_wrapper_arg_t)); curr_th_arg->cpu_id = curr_proc; g_state.tinfo[thread_cnt].cpuid = curr_proc; //fprintf(stderr, "Starting thread %d on cpu %d\n", thread_cnt, curr_th_arg->cpu_id); switch (th_arg->func_type) { case MAP: CHECK_ERROR(pthread_create(&g_state.tinfo[thread_cnt].tid, &attr, map_worker, curr_th_arg) != 0); break; case REDUCE: CHECK_ERROR(pthread_create(&g_state.tinfo[thread_cnt].tid, &attr, reduce_worker, curr_th_arg) != 0); break; case MERGE: CHECK_ERROR(pthread_create(&g_state.tinfo[thread_cnt].tid, &attr, merge_worker, curr_th_arg) != 0); break; default: assert(0); break; } } } /*** ADDED BY RAM TO ASSIGN EACH PTHREAD TO HARDWARE THREADS ON DIFFERENT PROCESSORS ON THE ULTRASPARC T1 ****/ if (getenv("MR_AFARA") != NULL) { //fprintf(stderr, "Using sparse threads\n"); curr_proc += 3; if (curr_proc >= max_procs-1) { curr_proc++; curr_proc = curr_proc % max_procs; } } } dprintf("Status: All %d threads have been created\n", num_threads); // barrier, wait for all threads to finish for (thread_cnt = 0; thread_cnt < num_threads; thread_cnt++) { int ret_val; CHECK_ERROR(pthread_join(g_state.tinfo[thread_cnt].tid, (void **)(void *)&ret_val) != 0); // The thread returned and error. Restart the thread. //if (ret_val != 0) //{ //} } pthread_attr_destroy(&attr); free(g_state.tinfo); dprintf("Status: All tasks have completed\n"); return; } /** map_worker() * args - pointer to thread_wrapper_arg_t * returns 0 on success * This runs thread_func() until there is no more data from the splitter(). * The pointer to results are stored in return_values array. */ static void *map_worker(void *args) { thread_wrapper_arg_t *th_arg = (thread_wrapper_arg_t *)args; int thread_index = getCurrThreadIndex(MAP); map_args_t thread_func_arg; int num_assigned = 0; int ret; // return value of splitter func. 0 = no more data to provide int isOneQueuePerTask = g_state.isOneQueuePerTask; assert(th_arg); #ifdef _LINUX_ // Bind thread to run on cpu_id unsigned long cpu_set = 0; setCpuAvailable(&cpu_set, th_arg->cpu_id); CHECK_ERROR(sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0); #endif #ifdef _SOLARIS_ dprintf("Binding thread to processor %d\n", th_arg->cpu_id); CHECK_ERROR(processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0); /*if (processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0) { switch(errno) { case EFAULT: dprintf("EFAULT\n"); break; case EINVAL: dprintf("EINVAL\n"); break; case EPERM: dprintf("EPERM\n"); break; case ESRCH: dprintf("ESRCH\n"); break; default: dprintf("Errno is %d\n",errno); } }*/ #endif while (1) { pthread_mutex_lock(th_arg->splitter_lock); ret = g_state.splitter(g_state.args->task_data, g_state.chunk_size, &thread_func_arg); if (ret != 0) { int alloc_len = g_state.intermediate_task_alloc_len; g_state.tinfo[thread_index].curr_task = g_state.map_tasks++; num_assigned++; if (isOneQueuePerTask && g_state.map_tasks > alloc_len) { dprintf("MAP TASK QUEUE REALLOC\n"); int i; g_state.intermediate_task_alloc_len *= 2; for (i = 0; i < g_state.reduce_tasks; i++) { g_state.intermediate_vals[i] = (keyvals_arr_t *)REALLOC( g_state.intermediate_vals[i], g_state.intermediate_task_alloc_len*sizeof(keyvals_arr_t)); memset(&g_state.intermediate_vals[i][alloc_len], 0, alloc_len*sizeof(keyvals_arr_t)); } } } pthread_mutex_unlock(th_arg->splitter_lock); // Stop if there is no more data if (ret == 0) break; dprintf("Task %d: cpu_id -> %d - Started\n", num_assigned, th_arg->cpu_id); g_state.args->map(&thread_func_arg); dprintf("Task %d: cpu_id -> %d - Done\n", num_assigned, th_arg->cpu_id); } dprintf("Status: Total of %d tasks were assigned to cpu_id %d\n", num_assigned, th_arg->cpu_id); free(args); return (void *)0; } static void *reduce_worker(void *args) { thread_wrapper_arg_t *th_arg = (thread_wrapper_arg_t *)args; int thread_index = getCurrThreadIndex(REDUCE); int isOneQueuePerTask = g_state.isOneQueuePerTask; assert(th_arg); #ifdef _LINUX_ // Bind thread to run on cpu_id unsigned long cpu_set = 0; setCpuAvailable(&cpu_set, th_arg->cpu_id); CHECK_ERROR(sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0); #endif #ifdef _SOLARIS_ CHECK_ERROR(processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0); /*if (processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0) { switch(errno) { case EFAULT: dprintf("EFAULT\n"); break; case EINVAL: dprintf("EINVAL\n"); break; case EPERM: dprintf("EPERM\n"); break; case ESRCH: dprintf("ESRCH\n"); break; default: dprintf("Errno is %d\n",errno); } }*/ #endif int curr_thread, done; int curr_reduce_task = 0; int ret; int num_map_threads; if (isOneQueuePerTask) num_map_threads = g_state.map_tasks; else num_map_threads = g_state.num_map_threads; int startsize = DEFAULT_VALS_ARR_LEN; keyvals_arr_t* thread_array; int vals_len, max_len, next_min_pos; keyvals_t *curr_key_val, *min_key_val, *next_min; int * thread_position = (int *)MALLOC(num_map_threads * sizeof(int)); void** vals = MALLOC(sizeof(char*)*startsize); while (1) { // Get the next reduce task pthread_mutex_lock(th_arg->splitter_lock); ret = (*th_arg->pos >= g_state.reduce_tasks); if (!ret) { g_state.tinfo[thread_index].curr_task = curr_reduce_task = (*th_arg->pos)++; } pthread_mutex_unlock(th_arg->splitter_lock); // No more reduce tasks if(ret) break; bzero((char *)thread_position, num_map_threads*sizeof(int)); vals_len = 0; max_len = startsize; min_key_val = NULL; next_min = NULL; done = 0; while (!done) { for (curr_thread = 0; curr_thread < num_map_threads; curr_thread++) { /* Find the next array to search */ thread_array = &g_state.intermediate_vals[curr_reduce_task][curr_thread]; /* Check if the current processor array has been completely searched */ if (thread_position[curr_thread] >= thread_array->len) continue; /* Get the next key in the processor array */ curr_key_val = &thread_array->arr[thread_position[curr_thread]]; /* If the key matches the minimum value. Then add the value to the list of values for that key */ if (min_key_val != NULL && !g_state.args->key_cmp(curr_key_val->key, min_key_val->key)) { if (g_state.reduce == identity_reduce) { int j; for (j = 0; j < curr_key_val->len; j++) { emit_inline(min_key_val->key, curr_key_val->vals[j]); } } else { if (vals_len + curr_key_val->len >= max_len) { while (vals_len + curr_key_val->len >= max_len) max_len *= 2; vals = REALLOC(vals, sizeof(char*)*(max_len)); } memcpy(&vals[vals_len], curr_key_val->vals, curr_key_val->len*sizeof(char*)); vals_len += curr_key_val->len; } thread_position[curr_thread--]++; } /* Find the location of the next min */ else if (next_min == NULL || g_state.args->key_cmp(curr_key_val->key, next_min->key) < 0) { next_min = curr_key_val; next_min_pos = curr_thread; } } if(min_key_val != NULL) { if (g_state.reduce != identity_reduce) { g_state.reduce(min_key_val->key, vals, vals_len); } vals_len = 0; min_key_val = NULL; } if (next_min != NULL) { min_key_val = next_min; next_min = NULL; } // See if there are any elements left for(curr_thread = 0; curr_thread < num_map_threads && thread_position[curr_thread] >= g_state.intermediate_vals[curr_reduce_task][curr_thread].len; curr_thread++); done = (curr_thread == num_map_threads); } for (curr_thread = 0; curr_thread < num_map_threads; curr_thread++) { keyvals_arr_t * arr = &g_state.intermediate_vals[curr_reduce_task][curr_thread]; int j; for(j = 0; j < arr->len; j++) { free(arr->arr[j].vals); } free(arr->arr); } free(g_state.intermediate_vals[curr_reduce_task]); } free(thread_position); free(vals); free(args); return (void *)0; }
static void emit_struct (definition * def) { decl_list *dl; int i, j, size, flag; decl_list *cur = NULL, *psav; bas_type *ptr; char *sizestr, *plus; char ptemp[256]; int can_inline; if (doinline == 0) { for (dl = def->def.st.decls; dl != NULL; dl = dl->next) print_stat (1, &dl->decl); return; } for (dl = def->def.st.decls; dl != NULL; dl = dl->next) if (dl->decl.rel == REL_VECTOR) { f_print (fout, "\t int i;\n"); break; } size = 0; can_inline = 0; for (dl = def->def.st.decls; dl != NULL; dl = dl->next) if ((dl->decl.prefix == NULL) && ((ptr = find_type (dl->decl.type)) != NULL) && ((dl->decl.rel == REL_ALIAS) || (dl->decl.rel == REL_VECTOR))) { if (dl->decl.rel == REL_ALIAS) size += ptr->length; else { can_inline = 1; break; /* can be inlined */ }; } else { if (size >= doinline) { can_inline = 1; break; /* can be inlined */ } size = 0; } if (size > doinline) can_inline = 1; if (can_inline == 0) { /* can not inline, drop back to old mode */ for (dl = def->def.st.decls; dl != NULL; dl = dl->next) print_stat (1, &dl->decl); return; }; flag = PUT; for (j = 0; j < 2; j++) { if (flag == PUT) f_print (fout, "\n\t if (xdrs->x_op == XDR_ENCODE) {\n"); else f_print (fout, "\n \t return (TRUE);\n\t} else if (xdrs->x_op == XDR_DECODE) {\n"); i = 0; size = 0; sizestr = NULL; for (dl = def->def.st.decls; dl != NULL; dl = dl->next) { /* xxx */ /* now walk down the list and check for basic types */ if ((dl->decl.prefix == NULL) && ((ptr = find_type (dl->decl.type)) != NULL) && ((dl->decl.rel == REL_ALIAS) || (dl->decl.rel == REL_VECTOR))) { if (i == 0) cur = dl; i++; if (dl->decl.rel == REL_ALIAS) size += ptr->length; else { /* this is required to handle arrays */ if (sizestr == NULL) plus = " "; else plus = "+"; if (ptr->length != 1) s_print (ptemp, " %s %s * %d", plus, dl->decl.array_max, ptr->length); else s_print (ptemp, " %s %s ", plus, dl->decl.array_max); /* now concatenate to sizestr !!!! */ if (sizestr == NULL) sizestr = strdup (ptemp); else { sizestr = (char *) realloc (sizestr, strlen (sizestr) + strlen (ptemp) + 1); if (sizestr == NULL) { f_print (stderr, "Fatal error : no memory \n"); crash (); }; sizestr = strcat (sizestr, ptemp); /* build up length of * array */ } } } else { if (i > 0) { if (sizestr == NULL && size < doinline) { /* don't expand into inline * code if size < doinline */ while (cur != dl) { print_stat (1, &cur->decl); cur = cur->next; } } else { /* were already looking at a * xdr_inlineable structure */ if (sizestr == NULL) f_print (fout, "\t buf = (int32_t *)XDR_INLINE(xdrs,%d * BYTES_PER_XDR_UNIT);", size); else if (size == 0) f_print (fout, "\t buf = (int32_t *)XDR_INLINE(xdrs,%s * BYTES_PER_XDR_UNIT);", sizestr); else f_print (fout, "\t buf = (int32_t *)XDR_INLINE(xdrs,(%d + %s)* BYTES_PER_XDR_UNIT);", size, sizestr); f_print (fout, "\n\t if (buf == NULL) {\n"); psav = cur; while (cur != dl) { print_stat (2, &cur->decl); cur = cur->next; } f_print (fout, "\n\t }\n\t else {\n"); cur = psav; while (cur != dl) { emit_inline (&cur->decl, flag); cur = cur->next; } f_print (fout, "\t }\n"); } } size = 0; i = 0; sizestr = NULL; print_stat (1, &dl->decl); } } if (i > 0) { if (sizestr == NULL && size < doinline) { /* don't expand into inline code if size < * doinline */ while (cur != dl) { print_stat (1, &cur->decl); cur = cur->next; } } else { /* were already looking at a xdr_inlineable * structure */ if (sizestr == NULL) f_print (fout, "\t\tbuf = (int32_t *)XDR_INLINE(xdrs,%d * BYTES_PER_XDR_UNIT);", size); else if (size == 0) f_print (fout, "\t\tbuf = (int32_t *)XDR_INLINE(xdrs,%s * BYTES_PER_XDR_UNIT);", sizestr); else f_print (fout, "\t\tbuf = (int32_t *)XDR_INLINE(xdrs,(%d + %s)* BYTES_PER_XDR_UNIT);", size, sizestr); f_print (fout, "\n\t\tif (buf == NULL) {\n"); psav = cur; while (cur != NULL) { print_stat (2, &cur->decl); cur = cur->next; } f_print (fout, "\n\t }\n\t else {\n"); cur = psav; while (cur != dl) { emit_inline (&cur->decl, flag); cur = cur->next; } f_print (fout, "\t }\n"); } } flag = GET; } f_print (fout, "\t return(TRUE);\n\t}\n\n"); /* now take care of XDR_FREE case */ for (dl = def->def.st.decls; dl != NULL; dl = dl->next) print_stat (1, &dl->decl); }