예제 #1
0
static void threadWait (void* argPtr)
{
    long threadId = *(long*)argPtr;

	int i;
	for (i = 0; i < LOCKS; i++) {
		flags[i] = 0;
	}

    THREAD_LOCAL_SET(global_threadId, (long)threadId);

    cpu_set_t my_set;
    CPU_ZERO(&my_set);
    CPU_SET(threadId % 8, &my_set);
    sched_setaffinity(0, sizeof(cpu_set_t), &my_set);

    while (1) {
        THREAD_BARRIER(global_barrierPtr, threadId); /* wait for start parallel */
        if (global_doShutdown) {
            break;
        }
        global_funcPtr(global_argPtr);
        THREAD_BARRIER(global_barrierPtr, threadId); /* wait for end parallel */
        if (threadId == 0) {
        	endEnergy();
            break;
        }
    }
}
예제 #2
0
static void threadWait (void* argPtr)
{
    long threadId = *(long*)argPtr;

    THREAD_LOCAL_SET(global_threadId, (long)threadId);

    phys_id = the_cores[(int)threadId];
    cluster_id = get_cluster(phys_id);
    unsigned int i = 0;
    for (; i < LOCKS; i++) {
        local_th_data[i][phys_id] = init_lock_local(phys_id, the_lock[i]);
    }

    cpu_set_t my_set;
    CPU_ZERO(&my_set);
    CPU_SET(threadId % 8, &my_set);
    sched_setaffinity(0, sizeof(cpu_set_t), &my_set);

    while (1) {
        THREAD_BARRIER(global_barrierPtr, threadId); /* wait for start parallel */
        if (global_doShutdown) {
            break;
        }
        global_funcPtr(global_argPtr);
        THREAD_BARRIER(global_barrierPtr, threadId); /* wait for end parallel */
        if (threadId == 0) {
        	endEnergy();
            break;
        }
    }
}
예제 #3
0
파일: kmeans.c 프로젝트: HPDCS/stmF2C2
/* =============================================================================
 * main
 * =============================================================================
 */
MAIN(argc, argv)
{
    int     max_nclusters = 13;
    int     min_nclusters = 4;
    char*   filename = 0;
    float*  buf;
    float** attributes;
    float** cluster_centres = NULL;
    int     i;
    int     j;
    int     best_nclusters;
    int*    cluster_assign;
    int     numAttributes;
    int     numObjects;
    int     use_zscore_transform = 1;
    char*   line;
    int     isBinaryFile = 0;
    int     nloops;
    int     len;
    int     nthreads;
    float   threshold = 0.001;
    int     opt;

    GOTO_REAL();

    line = (char*)malloc(MAX_LINE_LENGTH); /* reserve memory line */

    nthreads = 1;
    while ((opt = getopt(argc,(char**)argv,"p:i:m:n:t:bz")) != EOF) {
        switch (opt) {
            case 'i': filename = optarg;
                      break;
            case 'b': isBinaryFile = 1;
                      break;
            case 't': threshold = atof(optarg);
                      break;
            case 'm': max_nclusters = atoi(optarg);
                      break;
            case 'n': min_nclusters = atoi(optarg);
                      break;
            case 'z': use_zscore_transform = 0;
                      break;
            case 'p': nthreads = atoi(optarg);
                      break;
            case '?': usage((char*)argv[0]);
                      break;
            default: usage((char*)argv[0]);
                      break;
        }
    }

    if (filename == 0) {
        usage((char*)argv[0]);
    }

    if (max_nclusters < min_nclusters) {
        fprintf(stderr, "Error: max_clusters must be >= min_clusters\n");
        usage((char*)argv[0]);
    }

    SIM_GET_NUM_CPU(nthreads);

    numAttributes = 0;
    numObjects = 0;

    /*
     * From the input file, get the numAttributes and numObjects
     */
    if (isBinaryFile) {
        int infile;
        if ((infile = open(filename, O_RDONLY, "0600")) == -1) {
            fprintf(stderr, "Error: no such file (%s)\n", filename);
            exit(1);
        }
        read(infile, &numObjects, sizeof(int));
        read(infile, &numAttributes, sizeof(int));

        /* Allocate space for attributes[] and read attributes of all objects */
        buf = (float*)malloc(numObjects * numAttributes * sizeof(float));
        assert(buf);
        attributes = (float**)malloc(numObjects * sizeof(float*));
        assert(attributes);
        attributes[0] = (float*)malloc(numObjects * numAttributes * sizeof(float));
        assert(attributes[0]);
        for (i = 1; i < numObjects; i++) {
            attributes[i] = attributes[i-1] + numAttributes;
        }
        read(infile, buf, (numObjects * numAttributes * sizeof(float)));
        close(infile);
    } else {
        FILE *infile;
        if ((infile = fopen(filename, "r")) == NULL) {
            fprintf(stderr, "Error: no such file (%s)\n", filename);
            exit(1);
        }
        while (fgets(line, MAX_LINE_LENGTH, infile) != NULL) {
            if (strtok(line, " \t\n") != 0) {
                numObjects++;
            }
        }
        rewind(infile);
        while (fgets(line, MAX_LINE_LENGTH, infile) != NULL) {
            if (strtok(line, " \t\n") != 0) {
                /* Ignore the id (first attribute): numAttributes = 1; */
                while (strtok(NULL, " ,\t\n") != NULL) {
                    numAttributes++;
                }
                break;
            }
        }

        /* Allocate space for attributes[] and read attributes of all objects */
        buf = (float*)malloc(numObjects * numAttributes * sizeof(float));
        assert(buf);
        attributes = (float**)malloc(numObjects * sizeof(float*));
        assert(attributes);
        attributes[0] = (float*)malloc(numObjects * numAttributes * sizeof(float));
        assert(attributes[0]);
        for (i = 1; i < numObjects; i++) {
            attributes[i] = attributes[i-1] + numAttributes;
        }
        rewind(infile);
        i = 0;
        while (fgets(line, MAX_LINE_LENGTH, infile) != NULL) {
            if (strtok(line, " \t\n") == NULL) {
                continue;
            }
            for (j = 0; j < numAttributes; j++) {
                buf[i] = atof(strtok(NULL, " ,\t\n"));
                i++;
            }
        }
        fclose(infile);
    }

    TM_STARTUP(nthreads);
    thread_startup(nthreads);

    /*
     * The core of the clustering
     */
    cluster_assign = (int*)malloc(numObjects * sizeof(int));
    assert(cluster_assign);

    nloops = 1;
    len = max_nclusters - min_nclusters + 1;

#ifdef STM_ENERGY_MONITOR
	startEnergy();
#endif /* STM_ENERGY_MONITOR */

    for (i = 0; i < nloops; i++) {
        /*
         * Since zscore transform may perform in cluster() which modifies the
         * contents of attributes[][], we need to re-store the originals
         */
        memcpy(attributes[0], buf, (numObjects * numAttributes * sizeof(float)));

        cluster_centres = NULL;
        cluster_exec(nthreads,
                     numObjects,
                     numAttributes,
                     attributes,           /* [numObjects][numAttributes] */
                     use_zscore_transform, /* 0 or 1 */
                     min_nclusters,        /* pre-define range from min to max */
                     max_nclusters,
                     threshold,
                     &best_nclusters,      /* return: number between min and max */
                     &cluster_centres,     /* return: [best_nclusters][numAttributes] */
                     cluster_assign);      /* return: [numObjects] cluster id for each object */

    }

#ifdef GNUPLOT_OUTPUT
    {
        FILE** fptr;
        char outFileName[1024];
        fptr = (FILE**)malloc(best_nclusters * sizeof(FILE*));
        for (i = 0; i < best_nclusters; i++) {
            sprintf(outFileName, "group.%d", i);
            fptr[i] = fopen(outFileName, "w");
        }
        for (i = 0; i < numObjects; i++) {
            fprintf(fptr[cluster_assign[i]],
                    "%6.4f %6.4f\n",
                    attributes[i][0],
                    attributes[i][1]);
        }
        for (i = 0; i < best_nclusters; i++) {
            fclose(fptr[i]);
        }
        free(fptr);
    }
#endif /* GNUPLOT_OUTPUT */

#ifdef OUTPUT_TO_FILE
    {
        /* Output: the coordinates of the cluster centres */
        FILE* cluster_centre_file;
        FILE* clustering_file;
        char outFileName[1024];

        sprintf(outFileName, "%s.cluster_centres", filename);
        cluster_centre_file = fopen(outFileName, "w");
        for (i = 0; i < best_nclusters; i++) {
            fprintf(cluster_centre_file, "%d ", i);
            for (j = 0; j < numAttributes; j++) {
                fprintf(cluster_centre_file, "%f ", cluster_centres[i][j]);
            }
            fprintf(cluster_centre_file, "\n");
        }
        fclose(cluster_centre_file);

        /* Output: the closest cluster centre to each of the data points */
        sprintf(outFileName, "%s.cluster_assign", filename);
        clustering_file = fopen(outFileName, "w");
        for (i = 0; i < numObjects; i++) {
            fprintf(clustering_file, "%d %d\n", i, cluster_assign[i]);
        }
        fclose(clustering_file);
    }
#endif /* OUTPUT TO_FILE */

#ifdef OUTPUT_TO_STDOUT
    {
        /* Output: the coordinates of the cluster centres */
        for (i = 0; i < best_nclusters; i++) {
            //printf("%d ", i);
            for (j = 0; j < numAttributes; j++) {
                //printf("%f ", cluster_centres[i][j]);
            }
            //printf("\n");
        }
    }
#endif /* OUTPUT TO_STDOUT */


#ifdef STM_ENERGY_MONITOR
    float joule=endEnergy();
	printf("Threads: %i\tElapsed time: %f Energy: %f",nthreads, global_time, joule);
#else
	printf("Threads: %i\tElapsed time: %f", nthreads, global_time);
#endif /* STM_ENERGY_MONITOR */

    free(cluster_assign);
    free(attributes);
    free(cluster_centres[0]);
    free(cluster_centres);
    free(buf);

    TM_SHUTDOWN();
	if (getenv("STM_STATS") != NULL) {
		unsigned long u;
		if (stm_get_global_stats("global_nb_commits", &u) != 0){
			printf("\tThroughput: %f\n",u/global_time);
		}
	}

    GOTO_SIM();

    thread_shutdown();

    MAIN_RETURN(0);
}
예제 #4
0
파일: bayes.c 프로젝트: HPDCS/stmF2C2
/* =============================================================================
 * main
 * =============================================================================
 */
MAIN(argc, argv)
{
    GOTO_REAL();

    /*
     * Initialization
     */

    parseArgs(argc, (char** const)argv);
    long numThread     = global_params[PARAM_THREAD];
    long numVar        = global_params[PARAM_VAR];
    long numRecord     = global_params[PARAM_RECORD];
    long randomSeed    = global_params[PARAM_SEED];
    long maxNumParent  = global_params[PARAM_NUMBER];
    long percentParent = global_params[PARAM_PERCENT];
    global_insertPenalty = global_params[PARAM_INSERT];
    global_maxNumEdgeLearned = global_params[PARAM_EDGE];
    SIM_GET_NUM_CPU(numThread);


    TM_STARTUP(numThread);

    P_MEMORY_STARTUP(numThread);
    thread_startup(numThread);

    /*
    printf("Random seed                = %li\n", randomSeed);
    printf("Number of vars             = %li\n", numVar);
    printf("Number of records          = %li\n", numRecord);
    printf("Max num parents            = %li\n", maxNumParent);
    printf("%% chance of parent         = %li\n", percentParent);
    printf("Insert penalty             = %li\n", global_insertPenalty);
    printf("Max num edge learned / var = %li\n", global_maxNumEdgeLearned);
    printf("Operation quality factor   = %f\n", global_operationQualityFactor);
    fflush(stdout);
    */

    /*
     * Generate data
     */

    //printf("Generating data... ");
    //fflush(stdout);

    random_t* randomPtr = random_alloc();
    assert(randomPtr);
    random_seed(randomPtr, randomSeed);

    data_t* dataPtr = data_alloc(numVar, numRecord, randomPtr);
    assert(dataPtr);
    net_t* netPtr = data_generate(dataPtr, -1, maxNumParent, percentParent);
    //puts("done.");
    //fflush(stdout);

    /*
     * Generate adtree
     */

    adtree_t* adtreePtr = adtree_alloc();
    assert(adtreePtr);

    //printf("Generating adtree... ");
    //fflush(stdout);

    TIMER_T adtreeStartTime;
    TIMER_READ(adtreeStartTime);

    adtree_make(adtreePtr, dataPtr);

    TIMER_T adtreeStopTime;
    TIMER_READ(adtreeStopTime);

    //puts("done.");
    //fflush(stdout);
    //printf("Adtree time = %f\n",TIMER_DIFF_SECONDS(adtreeStartTime, adtreeStopTime));
    //fflush(stdout);

    /*
     * Score original network
     */

    float actualScore = score(netPtr, adtreePtr);
    net_free(netPtr);

    /*
     * Learn structure of Bayesian network
     */

    learner_t* learnerPtr = learner_alloc(dataPtr, adtreePtr, numThread);
    assert(learnerPtr);
    data_free(dataPtr); /* save memory */

    //printf("Learning structure...");
    //fflush(stdout);

    #ifdef STM_ENERGY_MONITOR
	startEnergy();
    #endif /* STM_ENERGY_MONITOR */

    TIMER_T learnStartTime;
    TIMER_READ(learnStartTime);
    GOTO_SIM();

    learner_run(learnerPtr);

    GOTO_REAL();
    TIMER_T learnStopTime;
    TIMER_READ(learnStopTime);

    #ifdef STM_ENERGY_MONITOR
	float delta_energy = endEnergy();
    printf("Threads: %i\tElapsed time: %f Energy: %f",numThread, TIMER_DIFF_SECONDS(learnStartTime, learnStopTime), delta_energy);
#else
    printf("Threads: %i\tElapsed time: %f",numThread, TIMER_DIFF_SECONDS(learnStartTime, learnStopTime));
#endif /* STM_ENERGY_MONITOR */

    fflush(stdout);

    /*
     * Check solution
     */

    bool_t status = net_isCycle(learnerPtr->netPtr);
    assert(!status);

#ifndef SIMULATOR
    float learnScore = learner_score(learnerPtr);
    //printf("Learn score  = %f\n", learnScore);
#endif
    //printf("Actual score = %f\n", actualScore);

    /*
     * Clean up
     */

    fflush(stdout);
    random_free(randomPtr);
#ifndef SIMULATOR
    adtree_free(adtreePtr);
#  if 0    
    learner_free(learnerPtr);
#  endif    
#endif

    TM_SHUTDOWN();
	if (getenv("STM_STATS") != NULL) {
		unsigned long u;
		if (stm_get_global_stats("global_nb_commits", &u) != 0){
			printf("\tThroughput: %f\n",u/TIMER_DIFF_SECONDS(learnStartTime, learnStopTime));
		}
	}
    P_MEMORY_SHUTDOWN();

    GOTO_SIM();

    thread_shutdown();

    MAIN_RETURN(0);
}
예제 #5
0
inline void stm_tune_scheduler(){
	TX_GET;
	int m=max_allowed_running_transactions;
    endEnergy();
	stm_time_t now=STM_TIMER_READ();
	stm_time_t total_tx_wasted_time=0;
	stm_time_t total_tx_time=0;
	stm_time_t total_no_tx_time=0;
	stm_time_t total_tx_spin_time=0;
	stm_time_t *wasted_time_k=(stm_time_t *)malloc((max_concurrent_threads+1)*sizeof(stm_time_t));
	stm_time_t *useful_time_k=(stm_time_t *)malloc((max_concurrent_threads+1)*sizeof(stm_time_t));
	long * conflict_active_threads=(long *)malloc((max_concurrent_threads + 1) * sizeof(long));
	long * commit_active_threads=(long *)malloc((max_concurrent_threads + 1) * sizeof(long));
	memset(conflict_active_threads, 0, (max_concurrent_threads+1) * sizeof(long));
	memset(commit_active_threads, 0, (max_concurrent_threads+1) * sizeof(long));
	memset(wasted_time_k, 0, (max_concurrent_threads+1) * sizeof(stm_time_t));
	memset(useful_time_k, 0, (max_concurrent_threads+1) * sizeof(stm_time_t));
	long total_committed_transactions_by_collector_threads=0;
	long total_committed_transactions=0;
	long tx_conflict_table_times=0;
	float avg_running_tx=0;

	tx->total_no_tx_time+=now - tx->start_no_tx_time ;
	stm_tx_t *thread=_tinystm.threads;
	int i=0;
	while(thread!=NULL){
		total_tx_time+=thread->total_useful_time;
		total_no_tx_time+=thread->total_no_tx_time;
		total_tx_wasted_time+=thread->total_wasted_time;
		total_tx_spin_time+=thread->total_spin_time;
		total_committed_transactions_by_collector_threads+=thread->committed_transactions_as_a_collector_thread;
		total_committed_transactions+=thread->committed_transactions;
		tx_conflict_table_times+=thread->aborted_transactions;

		for(i=0;i<max_concurrent_threads+1;i++){
			wasted_time_k[i]+=thread->total_tx_wasted_per_active_transactions[i];
			//printf("\nwasted_time_k[%i] %llu", i, thread->total_tx_wasted_per_active_transactions[i]);
			useful_time_k[i]+=thread->total_tx_useful_per_active_transactions[i];
			commit_active_threads[i]+=thread->total_tx_committed_per_active_transactions[i];
			avg_running_tx+=(float)i * (float) thread->total_tx_committed_per_active_transactions[i];
			conflict_active_threads[i]+=thread->total_conflict_per_active_transactions[i];
		}
		reset_local_stats(thread);
		thread=thread->next;
	}
	for(i=0;i<max_concurrent_threads+1;i++) printf("\nwasted_time_k[%i] %llu", i, wasted_time_k[i]);
	printf("\ntotal_tx_time %llu, total_tx_wasted_time %llu, total_no_tx_time %llu, total_committed_transactions_by_collector_threads %i", total_tx_time, total_tx_wasted_time, total_no_tx_time, total_committed_transactions_by_collector_threads);
	avg_running_tx=avg_running_tx/(float)total_committed_transactions_by_collector_threads;
	float *mu_k=(float*)malloc((max_concurrent_threads+1) * sizeof(float));
	float lambda = 1.0 / (((float) total_no_tx_time/(float)1000000000)/(float) total_committed_transactions_by_collector_threads);
	for (i=0;i<max_concurrent_threads+1;i++){
		if((wasted_time_k[i]>0 || useful_time_k[i]>0) && commit_active_threads[i] > 0){
			mu_k[i]= 1.0 / ((((float) wasted_time_k[i] / (float)1000000000) / (float)commit_active_threads[i]) + (((float) useful_time_k[i]/(float)1000000000) / (float) commit_active_threads[i]));
			printf("\nk:%i\tmu_k: %f, %llu, %llu, %llu", i, mu_k[i], wasted_time_k[i], useful_time_k[i], commit_active_threads[i]);
		}else{
			mu_k[i]= 1.0 / ((((float)total_tx_wasted_time/(float)1000000000)/(float)total_committed_transactions_by_collector_threads)+(((float)total_tx_time/(float)1000000000) / (float) total_committed_transactions_by_collector_threads));
			printf("\nk:%i\tmu_k: %f - average", i, mu_k[i]);
		}
	}//[email protected]

	float th = get_throughput(lambda,mu_k,m);
	float th_minus_1=0.0,th_plus_1=0.0,th_minus_2=0.0;
	if(m>3){
		th_minus_1=get_throughput(lambda,mu_k,m-1);
		th_minus_2=get_throughput(lambda,mu_k,m-2);
	}else if(m>2)th_minus_1=get_throughput(lambda,mu_k,m-1);
	if(th_minus_2 >= th && th_minus_2 >= th_minus_1 && m>3) {
		max_allowed_running_transactions-=2;
		//printf("\nSelected th_minus_2");
	}else if(th_minus_1>=th){
		max_allowed_running_transactions--;
		//printf("\nSelected th_minus_1");
	}else if(m<max_concurrent_threads){
		float avg_restart_k= (float)conflict_active_threads[m]/(float)commit_active_threads[m];
		float p_a_k = avg_restart_k /(1.0 + avg_restart_k);
		float p_a_1 = 1- pow(1-p_a_k,1.0/(double)(m-1));
		float avg_restart_k_plus_1 = ((1.0 - pow((1.0 - p_a_1),m))/ pow((1-p_a_1),m));
		float w_m=0.0,u_m=0.0;
		if(conflict_active_threads[m]>0)
			w_m=((float)wasted_time_k[m]/(float)1000000000)/(float)conflict_active_threads[m];
		else if(tx_conflict_table_times>0)w_m=((float)total_tx_wasted_time/(float)1000000000)/(float)tx_conflict_table_times;
		if(commit_active_threads[m]>0)
			u_m = ((float)useful_time_k[m]/(float)1000000000)/(float)commit_active_threads[m];
		else u_m = ((float)total_tx_time/(float)1000000000)/(float)total_committed_transactions_by_collector_threads;
		mu_k[m + 1]= 1.0/((w_m * avg_restart_k_plus_1) + u_m );
		th_plus_1 = get_throughput(lambda,mu_k,m + 1);
		if(th_plus_1 > th) {
			max_allowed_running_transactions++;
			//printf("\nSelected th_plus_1");
		} else {
			//printf("\nSelected th");
		}
	}//

	tx->start_no_tx_time=STM_TIMER_READ();
	printf("\nPredicted: %f|%f|%f|%f, measured: %f, max txs: %i", th_minus_2, th_minus_1, th, th_plus_1, (float)total_committed_transactions/((float)(now-last_tuning_time)/(float)1000000000), max_allowed_running_transactions);
	printf("\tTotal commits: %i (as a collector: %i)",total_committed_transactions, total_committed_transactions_by_collector_threads);
	printf("\nlambda: %f mu: %f", lambda, 1.0 / ((((float)total_tx_wasted_time/(float)1000000000)/(float)total_committed_transactions_by_collector_threads)+(((float)total_tx_time/(float)1000000000) / (float) total_committed_transactions_by_collector_threads)));
	printf("\nAvg_running_tx: %f", avg_running_tx, 1.0);
	fflush(stdout);
    startEnergy();
	last_tuning_time=STM_TIMER_READ();

}