static void threadWait (void* argPtr) { long threadId = *(long*)argPtr; int i; for (i = 0; i < LOCKS; i++) { flags[i] = 0; } THREAD_LOCAL_SET(global_threadId, (long)threadId); cpu_set_t my_set; CPU_ZERO(&my_set); CPU_SET(threadId % 8, &my_set); sched_setaffinity(0, sizeof(cpu_set_t), &my_set); while (1) { THREAD_BARRIER(global_barrierPtr, threadId); /* wait for start parallel */ if (global_doShutdown) { break; } global_funcPtr(global_argPtr); THREAD_BARRIER(global_barrierPtr, threadId); /* wait for end parallel */ if (threadId == 0) { endEnergy(); break; } } }
static void threadWait (void* argPtr) { long threadId = *(long*)argPtr; THREAD_LOCAL_SET(global_threadId, (long)threadId); phys_id = the_cores[(int)threadId]; cluster_id = get_cluster(phys_id); unsigned int i = 0; for (; i < LOCKS; i++) { local_th_data[i][phys_id] = init_lock_local(phys_id, the_lock[i]); } cpu_set_t my_set; CPU_ZERO(&my_set); CPU_SET(threadId % 8, &my_set); sched_setaffinity(0, sizeof(cpu_set_t), &my_set); while (1) { THREAD_BARRIER(global_barrierPtr, threadId); /* wait for start parallel */ if (global_doShutdown) { break; } global_funcPtr(global_argPtr); THREAD_BARRIER(global_barrierPtr, threadId); /* wait for end parallel */ if (threadId == 0) { endEnergy(); break; } } }
/* ============================================================================= * main * ============================================================================= */ MAIN(argc, argv) { int max_nclusters = 13; int min_nclusters = 4; char* filename = 0; float* buf; float** attributes; float** cluster_centres = NULL; int i; int j; int best_nclusters; int* cluster_assign; int numAttributes; int numObjects; int use_zscore_transform = 1; char* line; int isBinaryFile = 0; int nloops; int len; int nthreads; float threshold = 0.001; int opt; GOTO_REAL(); line = (char*)malloc(MAX_LINE_LENGTH); /* reserve memory line */ nthreads = 1; while ((opt = getopt(argc,(char**)argv,"p:i:m:n:t:bz")) != EOF) { switch (opt) { case 'i': filename = optarg; break; case 'b': isBinaryFile = 1; break; case 't': threshold = atof(optarg); break; case 'm': max_nclusters = atoi(optarg); break; case 'n': min_nclusters = atoi(optarg); break; case 'z': use_zscore_transform = 0; break; case 'p': nthreads = atoi(optarg); break; case '?': usage((char*)argv[0]); break; default: usage((char*)argv[0]); break; } } if (filename == 0) { usage((char*)argv[0]); } if (max_nclusters < min_nclusters) { fprintf(stderr, "Error: max_clusters must be >= min_clusters\n"); usage((char*)argv[0]); } SIM_GET_NUM_CPU(nthreads); numAttributes = 0; numObjects = 0; /* * From the input file, get the numAttributes and numObjects */ if (isBinaryFile) { int infile; if ((infile = open(filename, O_RDONLY, "0600")) == -1) { fprintf(stderr, "Error: no such file (%s)\n", filename); exit(1); } read(infile, &numObjects, sizeof(int)); read(infile, &numAttributes, sizeof(int)); /* Allocate space for attributes[] and read attributes of all objects */ buf = (float*)malloc(numObjects * numAttributes * sizeof(float)); assert(buf); attributes = (float**)malloc(numObjects * sizeof(float*)); assert(attributes); attributes[0] = (float*)malloc(numObjects * numAttributes * sizeof(float)); assert(attributes[0]); for (i = 1; i < numObjects; i++) { attributes[i] = attributes[i-1] + numAttributes; } read(infile, buf, (numObjects * numAttributes * sizeof(float))); close(infile); } else { FILE *infile; if ((infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Error: no such file (%s)\n", filename); exit(1); } while (fgets(line, MAX_LINE_LENGTH, infile) != NULL) { if (strtok(line, " \t\n") != 0) { numObjects++; } } rewind(infile); while (fgets(line, MAX_LINE_LENGTH, infile) != NULL) { if (strtok(line, " \t\n") != 0) { /* Ignore the id (first attribute): numAttributes = 1; */ while (strtok(NULL, " ,\t\n") != NULL) { numAttributes++; } break; } } /* Allocate space for attributes[] and read attributes of all objects */ buf = (float*)malloc(numObjects * numAttributes * sizeof(float)); assert(buf); attributes = (float**)malloc(numObjects * sizeof(float*)); assert(attributes); attributes[0] = (float*)malloc(numObjects * numAttributes * sizeof(float)); assert(attributes[0]); for (i = 1; i < numObjects; i++) { attributes[i] = attributes[i-1] + numAttributes; } rewind(infile); i = 0; while (fgets(line, MAX_LINE_LENGTH, infile) != NULL) { if (strtok(line, " \t\n") == NULL) { continue; } for (j = 0; j < numAttributes; j++) { buf[i] = atof(strtok(NULL, " ,\t\n")); i++; } } fclose(infile); } TM_STARTUP(nthreads); thread_startup(nthreads); /* * The core of the clustering */ cluster_assign = (int*)malloc(numObjects * sizeof(int)); assert(cluster_assign); nloops = 1; len = max_nclusters - min_nclusters + 1; #ifdef STM_ENERGY_MONITOR startEnergy(); #endif /* STM_ENERGY_MONITOR */ for (i = 0; i < nloops; i++) { /* * Since zscore transform may perform in cluster() which modifies the * contents of attributes[][], we need to re-store the originals */ memcpy(attributes[0], buf, (numObjects * numAttributes * sizeof(float))); cluster_centres = NULL; cluster_exec(nthreads, numObjects, numAttributes, attributes, /* [numObjects][numAttributes] */ use_zscore_transform, /* 0 or 1 */ min_nclusters, /* pre-define range from min to max */ max_nclusters, threshold, &best_nclusters, /* return: number between min and max */ &cluster_centres, /* return: [best_nclusters][numAttributes] */ cluster_assign); /* return: [numObjects] cluster id for each object */ } #ifdef GNUPLOT_OUTPUT { FILE** fptr; char outFileName[1024]; fptr = (FILE**)malloc(best_nclusters * sizeof(FILE*)); for (i = 0; i < best_nclusters; i++) { sprintf(outFileName, "group.%d", i); fptr[i] = fopen(outFileName, "w"); } for (i = 0; i < numObjects; i++) { fprintf(fptr[cluster_assign[i]], "%6.4f %6.4f\n", attributes[i][0], attributes[i][1]); } for (i = 0; i < best_nclusters; i++) { fclose(fptr[i]); } free(fptr); } #endif /* GNUPLOT_OUTPUT */ #ifdef OUTPUT_TO_FILE { /* Output: the coordinates of the cluster centres */ FILE* cluster_centre_file; FILE* clustering_file; char outFileName[1024]; sprintf(outFileName, "%s.cluster_centres", filename); cluster_centre_file = fopen(outFileName, "w"); for (i = 0; i < best_nclusters; i++) { fprintf(cluster_centre_file, "%d ", i); for (j = 0; j < numAttributes; j++) { fprintf(cluster_centre_file, "%f ", cluster_centres[i][j]); } fprintf(cluster_centre_file, "\n"); } fclose(cluster_centre_file); /* Output: the closest cluster centre to each of the data points */ sprintf(outFileName, "%s.cluster_assign", filename); clustering_file = fopen(outFileName, "w"); for (i = 0; i < numObjects; i++) { fprintf(clustering_file, "%d %d\n", i, cluster_assign[i]); } fclose(clustering_file); } #endif /* OUTPUT TO_FILE */ #ifdef OUTPUT_TO_STDOUT { /* Output: the coordinates of the cluster centres */ for (i = 0; i < best_nclusters; i++) { //printf("%d ", i); for (j = 0; j < numAttributes; j++) { //printf("%f ", cluster_centres[i][j]); } //printf("\n"); } } #endif /* OUTPUT TO_STDOUT */ #ifdef STM_ENERGY_MONITOR float joule=endEnergy(); printf("Threads: %i\tElapsed time: %f Energy: %f",nthreads, global_time, joule); #else printf("Threads: %i\tElapsed time: %f", nthreads, global_time); #endif /* STM_ENERGY_MONITOR */ free(cluster_assign); free(attributes); free(cluster_centres[0]); free(cluster_centres); free(buf); TM_SHUTDOWN(); if (getenv("STM_STATS") != NULL) { unsigned long u; if (stm_get_global_stats("global_nb_commits", &u) != 0){ printf("\tThroughput: %f\n",u/global_time); } } GOTO_SIM(); thread_shutdown(); MAIN_RETURN(0); }
/* ============================================================================= * main * ============================================================================= */ MAIN(argc, argv) { GOTO_REAL(); /* * Initialization */ parseArgs(argc, (char** const)argv); long numThread = global_params[PARAM_THREAD]; long numVar = global_params[PARAM_VAR]; long numRecord = global_params[PARAM_RECORD]; long randomSeed = global_params[PARAM_SEED]; long maxNumParent = global_params[PARAM_NUMBER]; long percentParent = global_params[PARAM_PERCENT]; global_insertPenalty = global_params[PARAM_INSERT]; global_maxNumEdgeLearned = global_params[PARAM_EDGE]; SIM_GET_NUM_CPU(numThread); TM_STARTUP(numThread); P_MEMORY_STARTUP(numThread); thread_startup(numThread); /* printf("Random seed = %li\n", randomSeed); printf("Number of vars = %li\n", numVar); printf("Number of records = %li\n", numRecord); printf("Max num parents = %li\n", maxNumParent); printf("%% chance of parent = %li\n", percentParent); printf("Insert penalty = %li\n", global_insertPenalty); printf("Max num edge learned / var = %li\n", global_maxNumEdgeLearned); printf("Operation quality factor = %f\n", global_operationQualityFactor); fflush(stdout); */ /* * Generate data */ //printf("Generating data... "); //fflush(stdout); random_t* randomPtr = random_alloc(); assert(randomPtr); random_seed(randomPtr, randomSeed); data_t* dataPtr = data_alloc(numVar, numRecord, randomPtr); assert(dataPtr); net_t* netPtr = data_generate(dataPtr, -1, maxNumParent, percentParent); //puts("done."); //fflush(stdout); /* * Generate adtree */ adtree_t* adtreePtr = adtree_alloc(); assert(adtreePtr); //printf("Generating adtree... "); //fflush(stdout); TIMER_T adtreeStartTime; TIMER_READ(adtreeStartTime); adtree_make(adtreePtr, dataPtr); TIMER_T adtreeStopTime; TIMER_READ(adtreeStopTime); //puts("done."); //fflush(stdout); //printf("Adtree time = %f\n",TIMER_DIFF_SECONDS(adtreeStartTime, adtreeStopTime)); //fflush(stdout); /* * Score original network */ float actualScore = score(netPtr, adtreePtr); net_free(netPtr); /* * Learn structure of Bayesian network */ learner_t* learnerPtr = learner_alloc(dataPtr, adtreePtr, numThread); assert(learnerPtr); data_free(dataPtr); /* save memory */ //printf("Learning structure..."); //fflush(stdout); #ifdef STM_ENERGY_MONITOR startEnergy(); #endif /* STM_ENERGY_MONITOR */ TIMER_T learnStartTime; TIMER_READ(learnStartTime); GOTO_SIM(); learner_run(learnerPtr); GOTO_REAL(); TIMER_T learnStopTime; TIMER_READ(learnStopTime); #ifdef STM_ENERGY_MONITOR float delta_energy = endEnergy(); printf("Threads: %i\tElapsed time: %f Energy: %f",numThread, TIMER_DIFF_SECONDS(learnStartTime, learnStopTime), delta_energy); #else printf("Threads: %i\tElapsed time: %f",numThread, TIMER_DIFF_SECONDS(learnStartTime, learnStopTime)); #endif /* STM_ENERGY_MONITOR */ fflush(stdout); /* * Check solution */ bool_t status = net_isCycle(learnerPtr->netPtr); assert(!status); #ifndef SIMULATOR float learnScore = learner_score(learnerPtr); //printf("Learn score = %f\n", learnScore); #endif //printf("Actual score = %f\n", actualScore); /* * Clean up */ fflush(stdout); random_free(randomPtr); #ifndef SIMULATOR adtree_free(adtreePtr); # if 0 learner_free(learnerPtr); # endif #endif TM_SHUTDOWN(); if (getenv("STM_STATS") != NULL) { unsigned long u; if (stm_get_global_stats("global_nb_commits", &u) != 0){ printf("\tThroughput: %f\n",u/TIMER_DIFF_SECONDS(learnStartTime, learnStopTime)); } } P_MEMORY_SHUTDOWN(); GOTO_SIM(); thread_shutdown(); MAIN_RETURN(0); }
inline void stm_tune_scheduler(){ TX_GET; int m=max_allowed_running_transactions; endEnergy(); stm_time_t now=STM_TIMER_READ(); stm_time_t total_tx_wasted_time=0; stm_time_t total_tx_time=0; stm_time_t total_no_tx_time=0; stm_time_t total_tx_spin_time=0; stm_time_t *wasted_time_k=(stm_time_t *)malloc((max_concurrent_threads+1)*sizeof(stm_time_t)); stm_time_t *useful_time_k=(stm_time_t *)malloc((max_concurrent_threads+1)*sizeof(stm_time_t)); long * conflict_active_threads=(long *)malloc((max_concurrent_threads + 1) * sizeof(long)); long * commit_active_threads=(long *)malloc((max_concurrent_threads + 1) * sizeof(long)); memset(conflict_active_threads, 0, (max_concurrent_threads+1) * sizeof(long)); memset(commit_active_threads, 0, (max_concurrent_threads+1) * sizeof(long)); memset(wasted_time_k, 0, (max_concurrent_threads+1) * sizeof(stm_time_t)); memset(useful_time_k, 0, (max_concurrent_threads+1) * sizeof(stm_time_t)); long total_committed_transactions_by_collector_threads=0; long total_committed_transactions=0; long tx_conflict_table_times=0; float avg_running_tx=0; tx->total_no_tx_time+=now - tx->start_no_tx_time ; stm_tx_t *thread=_tinystm.threads; int i=0; while(thread!=NULL){ total_tx_time+=thread->total_useful_time; total_no_tx_time+=thread->total_no_tx_time; total_tx_wasted_time+=thread->total_wasted_time; total_tx_spin_time+=thread->total_spin_time; total_committed_transactions_by_collector_threads+=thread->committed_transactions_as_a_collector_thread; total_committed_transactions+=thread->committed_transactions; tx_conflict_table_times+=thread->aborted_transactions; for(i=0;i<max_concurrent_threads+1;i++){ wasted_time_k[i]+=thread->total_tx_wasted_per_active_transactions[i]; //printf("\nwasted_time_k[%i] %llu", i, thread->total_tx_wasted_per_active_transactions[i]); useful_time_k[i]+=thread->total_tx_useful_per_active_transactions[i]; commit_active_threads[i]+=thread->total_tx_committed_per_active_transactions[i]; avg_running_tx+=(float)i * (float) thread->total_tx_committed_per_active_transactions[i]; conflict_active_threads[i]+=thread->total_conflict_per_active_transactions[i]; } reset_local_stats(thread); thread=thread->next; } for(i=0;i<max_concurrent_threads+1;i++) printf("\nwasted_time_k[%i] %llu", i, wasted_time_k[i]); printf("\ntotal_tx_time %llu, total_tx_wasted_time %llu, total_no_tx_time %llu, total_committed_transactions_by_collector_threads %i", total_tx_time, total_tx_wasted_time, total_no_tx_time, total_committed_transactions_by_collector_threads); avg_running_tx=avg_running_tx/(float)total_committed_transactions_by_collector_threads; float *mu_k=(float*)malloc((max_concurrent_threads+1) * sizeof(float)); float lambda = 1.0 / (((float) total_no_tx_time/(float)1000000000)/(float) total_committed_transactions_by_collector_threads); for (i=0;i<max_concurrent_threads+1;i++){ if((wasted_time_k[i]>0 || useful_time_k[i]>0) && commit_active_threads[i] > 0){ mu_k[i]= 1.0 / ((((float) wasted_time_k[i] / (float)1000000000) / (float)commit_active_threads[i]) + (((float) useful_time_k[i]/(float)1000000000) / (float) commit_active_threads[i])); printf("\nk:%i\tmu_k: %f, %llu, %llu, %llu", i, mu_k[i], wasted_time_k[i], useful_time_k[i], commit_active_threads[i]); }else{ mu_k[i]= 1.0 / ((((float)total_tx_wasted_time/(float)1000000000)/(float)total_committed_transactions_by_collector_threads)+(((float)total_tx_time/(float)1000000000) / (float) total_committed_transactions_by_collector_threads)); printf("\nk:%i\tmu_k: %f - average", i, mu_k[i]); } }//[email protected] float th = get_throughput(lambda,mu_k,m); float th_minus_1=0.0,th_plus_1=0.0,th_minus_2=0.0; if(m>3){ th_minus_1=get_throughput(lambda,mu_k,m-1); th_minus_2=get_throughput(lambda,mu_k,m-2); }else if(m>2)th_minus_1=get_throughput(lambda,mu_k,m-1); if(th_minus_2 >= th && th_minus_2 >= th_minus_1 && m>3) { max_allowed_running_transactions-=2; //printf("\nSelected th_minus_2"); }else if(th_minus_1>=th){ max_allowed_running_transactions--; //printf("\nSelected th_minus_1"); }else if(m<max_concurrent_threads){ float avg_restart_k= (float)conflict_active_threads[m]/(float)commit_active_threads[m]; float p_a_k = avg_restart_k /(1.0 + avg_restart_k); float p_a_1 = 1- pow(1-p_a_k,1.0/(double)(m-1)); float avg_restart_k_plus_1 = ((1.0 - pow((1.0 - p_a_1),m))/ pow((1-p_a_1),m)); float w_m=0.0,u_m=0.0; if(conflict_active_threads[m]>0) w_m=((float)wasted_time_k[m]/(float)1000000000)/(float)conflict_active_threads[m]; else if(tx_conflict_table_times>0)w_m=((float)total_tx_wasted_time/(float)1000000000)/(float)tx_conflict_table_times; if(commit_active_threads[m]>0) u_m = ((float)useful_time_k[m]/(float)1000000000)/(float)commit_active_threads[m]; else u_m = ((float)total_tx_time/(float)1000000000)/(float)total_committed_transactions_by_collector_threads; mu_k[m + 1]= 1.0/((w_m * avg_restart_k_plus_1) + u_m ); th_plus_1 = get_throughput(lambda,mu_k,m + 1); if(th_plus_1 > th) { max_allowed_running_transactions++; //printf("\nSelected th_plus_1"); } else { //printf("\nSelected th"); } }// tx->start_no_tx_time=STM_TIMER_READ(); printf("\nPredicted: %f|%f|%f|%f, measured: %f, max txs: %i", th_minus_2, th_minus_1, th, th_plus_1, (float)total_committed_transactions/((float)(now-last_tuning_time)/(float)1000000000), max_allowed_running_transactions); printf("\tTotal commits: %i (as a collector: %i)",total_committed_transactions, total_committed_transactions_by_collector_threads); printf("\nlambda: %f mu: %f", lambda, 1.0 / ((((float)total_tx_wasted_time/(float)1000000000)/(float)total_committed_transactions_by_collector_threads)+(((float)total_tx_time/(float)1000000000) / (float) total_committed_transactions_by_collector_threads))); printf("\nAvg_running_tx: %f", avg_running_tx, 1.0); fflush(stdout); startEnergy(); last_tuning_time=STM_TIMER_READ(); }