Beispiel #1
0
/*******************************************************************
 Subroutine to compute the Variance of Matrix
   matrix *X:     the pointer to the matrix
   char direction: 'c' - compute the mean of each column
                   'r' - compute the mean of each row
   vector *mean:  the pointer to the mean vector
*******************************************************************/
int mvar(matrix *X, char direction, vector *var)
{
	int row_l, row_n;	
	int i;
    int result;
	vector col_vec;
	vector row_vec;

    row_l = X->n;
	row_n = X->m;
 
	vnew(&col_vec, row_n);
	vnew(&row_vec, row_l);

	if (direction == 'c') {  // compute the variance of each column
		var->l = row_l;
        for (i=0; i<row_l; i++) {
			getcolvec(X, i, &col_vec);
            *(var->pr + i) = vcovar(&col_vec, &col_vec);
        }
        result = 1;
    } else if (direction == 'r') { // compute the variance of each row
		var->l = row_n;
        for (i=0; i<row_n; i++) {
			getrowvec(X, i, &row_vec);
	        *(var->pr + i) = vcovar(&row_vec, &row_vec);;		
        }
        result = 1;
    } else {
        result = 0;
        printf("the direction parameter should be 'c' or 'r'");
    }

	vdelete(&col_vec);
	vdelete(&row_vec);

    return result;
   
}
Beispiel #2
0
/*******************************************************************
 Subroutine to do the EM algorithm
   matrix *D:       the pointer to the matrix data
   matrix *mean0_x: the pointer to a matrix containing the initial Means of clusters
   vector *w0:		the pointer to a vector containing the initial mixing proportion of clusters
   double vv:       the value for initializing the Covariance matrix of clusters
   double error:    the error threshold
   vector *Zjk_up:  the pointer to a vector containing Posterior probabilities of the up-level 
                         cluster samples
   matrix *mean1_x: the pointer to a matrix containing the Means of clusters in t-space
   vector *w0_t:	the pointer to a vector containing the mixing proportions of the identified 
                         clusters in t-space
   matrix *cov_mat: the pointer to a group of matrixs containing the Covariance
                         matrix of clusters in t-space
   matrix *Zjk:     the pointer to a matrix containing Posterior probabilities of all samples 
                         belonging to all the sub-level clusters, each column is for one cluster.
   
 return value: '1' - successfully exit
               '0' - exit with waring/error
*******************************************************************/
int veSubEM(matrix *D, matrix *mean0_x, vector *w0, double vv, double error, vector *Zjk_up, //input
			matrix *mean1_x, vector *w0_t, matrix *cov_mat, matrix *Zjk)  //output
{
	int k0, kc, n, p;
	int i, j, k, u, s;
	matrix *Var0;
	matrix Gxn;
	vector Fx;
	matrix MUK;
	matrix MU1;
	int zeroFx_num = 1;
	//double error = 0.01;
	double err = error + (double)1;
	vector Zjk_temp;

	n = D->m;
	p = D->n;
	k0 = mean0_x->m;
	kc = mean0_x->n;
	
	Var0 = new matrix[k0];
	for(i=0; i<k0; i++) {
		mnew(Var0+i, p, p);
	}
	mnew(&Gxn, n, k0);
	vnew(&Fx, n);
	vnew(&Zjk_temp, n);
	mnew(&MUK, k0, p);
	mcopy(mean0_x, &MUK);
	mnew(&MU1, k0, p);

	vector D_j;
	vector Zjk_k;
	double sum_tmp = 0;
	matrix Ck;
	vector D_i;
	vector MUK_k;
	vector cen_D_i;
	matrix mtmp;
	vector vtmp;

	vnew(&D_j, n);
	vnew(&Zjk_k, n);
	mnew(&Ck, p, p);
	vnew(&D_i, p);
	vnew(&MUK_k, p);
	vnew(&cen_D_i, p);
	mnew(&mtmp, p, p);
	vnew(&vtmp, n);

	//Initializing the parameters of mixture of Gaussians
	//Initinalize the covariance matrix
	//Use EM algorithm to perform the local training.
	
	//Test intialization of covarinace matrix 
	//printf("Testing covariance matrix initialization... \n");

	while (zeroFx_num != 0) {
		for(i=0; i<k0; i++) {
			meye(Var0+i);
			for (j=0; j<p; j++) {
				*((Var0+i)->pr+j*p+j) = vv;
			}
		}
	
		veModel(D, mean0_x, Var0, w0, &Gxn, &Fx);
		//printf("\n Gxn = :\n");
		//mprint(&Gxn);
		//printf("\n Fx = :\n");
		//vprint(&Fx);

		zeroFx_num = 0;
		for (i=0; i<n; i++) {
			if (*(Fx.pr+i) == 0) {
				zeroFx_num++;
			}
		}

		vv *= 2;
	
	}

	vones(&Zjk_temp);

	//printf("\n EM in t-space starts ... \n");
	//printf("\n Data = \n");
	//mprint(D);

	int l = 0;
	while (err > error) {
		
		#ifdef _DEBUG
		printf(" \n...... in EM loop %d ......\n", ++l);

		printf("\n L%d: w0 = \n", l);
		vprint(w0);
		printf("\n L%d: MUK = \n", l);
		mprint(&MUK);
		printf("\n L%d: Var0 = \n", l);
		for(i=0; i<k0; i++) {
			mprint(Var0+i);
			printf("\n");
		}
		printf("\n L%d: Zjk = \n", l);
		mprint(Zjk);
		#endif

		veModel(D, &MUK, Var0, w0, &Gxn, &Fx);
		
		#ifdef _DEBUG
		printf("\n L%d: Gxn = \n", l);
		mprint(&Gxn);
		printf("\n L%d: Fx = \n", l);
		vprint(&Fx);
		#endif

		for (k=0; k<k0; k++) {
			u = k*p;

			double zz = 0;
			double zz_up = 0;
			for (i=0; i<n; i++) {
				*(Zjk->pr+i*k0+k) = (*(w0->pr+k)) * Zjk_up->pr[i] * (*(Gxn.pr+i*k0+k)) / (*(Fx.pr+i));
				zz += *(Zjk->pr+i*k0+k);
				zz_up += Zjk_up->pr[i];
			}
			*(w0->pr+k) = zz/zz_up;

			for (j=0; j<p; j++) {
				getcolvec(D, j, &D_j);
				getcolvec(Zjk, k, &Zjk_k);
				sum_tmp = 0;
				for (i=0; i<n; i++) {
					sum_tmp += (*(Zjk_k.pr+i)) * (*(D_j.pr+i));
				}
				*(MU1.pr+u+j) = sum_tmp / zz;
			}

			mzero(&Ck);
			for (i=0; i<n; i++) {
				getrowvec(D, i, &D_i);
				getrowvec(&MUK, k, &MUK_k);
				for (j=0; j<p; j++) {
					*(cen_D_i.pr+j) = *(D_i.pr+j) - *(MUK_k.pr+j);
				}

				vvMul(&cen_D_i, &cen_D_i, &mtmp);
				
				for (j=0; j<p; j++) {
					for (s=0; s<p; s++) {
						*(Ck.pr+j*p+s) += (*(Zjk->pr+i*k0+k)) * (*(mtmp.pr+j*p+s));
					}
				}
			}
			for (j=0; j<p; j++) {
				for (s=0; s<p; s++) {
					*(Var0[k].pr+j*p+s) = (*(Ck.pr+j*p+s)) / zz;
				}
			}
		}   // for (k...

		mcopy(&MU1, &MUK);

		for (i=0; i<n; i++) {
			*(vtmp.pr+i) = fabs(*(Zjk_k.pr+i) - *(Zjk_temp.pr+i));
		}
		err = vmean(&vtmp);
		vcopy(&Zjk_k, &Zjk_temp);
		
		
    }  // while

	vcopy(w0, w0_t);
	mcopy(&MUK, mean1_x);
	for(i=0; i<k0; i++) {
		mcopy(Var0+i, cov_mat+i);
	}

	for(i=0; i<k0; i++) {
		mdelete(Var0+i);
	} 
	mdelete(&Gxn);
	vdelete(&Fx);
	vdelete(&Zjk_temp);
	mdelete(&MUK);
	mdelete(&MU1);
    vdelete(&D_j);
	vdelete(&Zjk_k);
	mdelete(&Ck);
	vdelete(&D_i);
	vdelete(&MUK_k);
	vdelete(&cen_D_i);
	mdelete(&mtmp);
	vdelete(&vtmp);

    return 1;
}
Beispiel #3
0
int
main(int argc, char **argv)
{
    double starttime, endtime;
    int ntasks, myrank;
    char name[128];                      
    int namelen;
    MPI_Status status;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    MPI_Comm_size(MPI_COMM_WORLD, &ntasks);

    MPI_Get_processor_name(name,&namelen);

    /* Get a few OpenMP parameters.                                               */
    int O_P  = omp_get_num_procs();          /* get number of OpenMP processors       */
    int O_T  = omp_get_num_threads();        /* get number of OpenMP threads          */
    int O_ID = omp_get_thread_num();         /* get OpenMP thread ID                  */
    //printf("name:%s   M_ID:%d  O_ID:%d  O_P:%d  O_T:%d\n", name,myrank,O_ID,O_P,O_T);

    FILE *f;
    char line[LINE_SIZE];
    int numlines = 0;

    exprinfo *exprannot = NULL;
    char **glines = NULL;

    f = fopen("gene_list.txt", "r");
    while(fgets(line, LINE_SIZE, f)) {
        glines = (char**)realloc(glines, sizeof(char*)*(numlines+1));
        glines[numlines] = strdup(line);

        char *pch = strtok (line,",");
        char * gene = pch;

        pch = strtok (NULL, ",");
        int chr = atoi(trimwhitespace(pch));

        exprannot = (exprinfo*)realloc(exprannot,sizeof(exprinfo)*(numlines+1));

        exprannot[numlines].gene      = strdup(gene);
        exprannot[numlines].chr       = chr;

        if (!exprannot) printf("not allcoated\n");
        numlines++;
    }
    fclose(f);
    f = fopen("probe_id_mapping.txt", "r");
    numlines = 0;

    free(glines[0]);
    free(glines);

    probeinfo *records = NULL;
    char **lines = NULL;

    while(fgets(line, LINE_SIZE, f)) {              
        lines = (char**)realloc(lines, sizeof(char*)*(numlines+1));
        lines[numlines] = strdup(line);

        char *pch = strtok (line,",");
        int probeid = atoi(pch);

        pch = strtok (NULL, ",");
        char * gene = pch;

        pch = strtok (NULL, ",");
        int chr = atoi(trimwhitespace(pch));

        records = (probeinfo*)realloc(records,sizeof(probeinfo)*(numlines+1));

        records[numlines].probeid   = probeid;
        records[numlines].chr       = chr;
        records[numlines].gene      = strdup(gene);
        if (!records) printf("not allcoated\n");
        numlines++;

    }
    free(lines[0]);
    free(lines);

    fclose(f);


    int NUM_GENES = numlines;
    unsigned long x_nr, x_nc, y_nr, y_nc;

    double **X = h5_read("x.h5", 1, "/X",         &x_nr, &x_nc);
    double **Y = h5_read("filtered_probes.h5", 1, "/FilteredProbes", &y_nr, &y_nc);

    //printf("loaded X, num rows = %d, num cols = %d\n", x_nr, x_nc);
    //printf("loaded Y, num rows = %d, num cols = %d\n", y_nr, y_nc);

    unsigned long total_mem = (x_nr * y_nc);
    double **RHO   = create2dArray(x_nr, y_nc);
    
    int gene, probe, tid, work_completed;
    work_completed = 0;

    
    int BLOCK_SIZE = NUM_ROWS/ntasks;
    int offset = myrank*BLOCK_SIZE;
    int STOP_IDX = offset+BLOCK_SIZE;
    if (NUM_ROWS - STOP_IDX < BLOCK_SIZE)
        STOP_IDX = NUM_ROWS;

   // printf("offset = %d, for rank %d, with ntasks = %d, and BLOCK_SIZE = %d, STOP_IDX = %d\n", 
    //            offset, myrank, ntasks, BLOCK_SIZE, STOP_IDX);

    int num_sig_found = 0; 
    starttime = MPI_Wtime();

    #pragma omp parallel \
     for shared(X, Y, RHO, BLOCK_SIZE, offset, work_completed) \
     private(probe,gene, tid)
    for (gene = offset; gene < STOP_IDX; gene++) {
       for (probe = 0; probe < NUM_COLS; probe++) {
           double *x = getrowvec(X, gene, BUFFER_SIZE);
           double *y = getcolvec(Y, probe, BUFFER_SIZE);

           double avgx = mean(x, BUFFER_SIZE);
           double * xcentered = sub(x, avgx, BUFFER_SIZE);
                    
           double avgy = mean(y, BUFFER_SIZE);
           double * ycentered = sub(y, avgy, BUFFER_SIZE);
           
           double * prod_result = prod(xcentered, ycentered, BUFFER_SIZE);
           double sum_prod = sum(prod_result, BUFFER_SIZE);

           double stdX = stddev(x, avgx, BUFFER_SIZE);
           double stdY = stddev(y, avgy, BUFFER_SIZE);
           double rho  = sum_prod/((BUFFER_SIZE-1)*(stdX*stdY));

           RHO[gene][probe] = rho;
           if (work_completed % 10000 == 0) {
               tid = omp_get_thread_num();
               printf("rank = %d, work = %d, result[%d,%d] from %d = %f\n", myrank, work_completed,
                    gene, probe, tid, rho);
           }
           work_completed++;
           free(x);
           free(y);
           free(xcentered);
           free(ycentered);
           free(prod_result);

       }
    }
    //printf("********* %d FINISHED **********\n", myrank);

    //f = fopen("significant.txt", "a");
    #pragma omp parallel for shared(RHO,exprannot, records)
    for (int i = 0; i < NUM_ROWS; i++) {
        for (int j = 0; j < NUM_COLS; j++) {
            double zscore = ztest(RHO[i][j],BUFFER_SIZE);
            /*
            if (zscore > 5.0) {

                fprintf(f, "%d,%d,%f,%f,%d,%s,%d,%s\n", 
                        i, j, zscore, RHO[i][j], records[j].chr, records[j].gene, exprannot[i].chr,exprannot[i].gene);
                if (i*j%1000 == 0)
                    printf("%d,%d,%f,%f,%d,%s,%d,%s\n", 
                        i, j, zscore, RHO[i][j], records[j].chr, records[j].gene, exprannot[i].chr,exprannot[i].gene);
            }
            */
        }
    }
    //fclose(f);
    endtime   = MPI_Wtime();
    free(X[0]);
    free(X);
    free(Y[0]);
    free(Y);
    printf("rank %d - elapse time - %f\n",myrank, endtime-starttime);
    //for (int i = 0; i < NUM_ROWS; i++) {
        //printf("%s,%d\n", exprannot[i].gene, exprannot[i].chr);
    //}
    //printf("rank %d FINISHED\n",myrank);
    //h5_write(RHO, NUM_ROWS, NUM_COLS, "rho_omp.h5", "/rho");
    free(RHO[0]);
    free(exprannot);
    free(records);
    free(RHO);

  MPI_Finalize();
  return 0;
}