Ejemplo n.º 1
0
int main(void)
{
    int junk[ROWS][COLS] = {{2, 4, 6, 8}, {3, 5, 7, 9}, {12, 10, 8, 6}};
    sum_rows(junk, ROWS);
    sum_cols(junk, ROWS);
    printf("Sum of all elements = %d\n", sum2d(junk, ROWS));
    return 0;
}
Ejemplo n.º 2
0
int main(void) {
    int tot;
    int mdarr[ROWS][COLS] = {
        {1,3,5,7},
        {2,4,6,8},
        {0,1,2,3}
    };

    puts("sum each row element");
    sum_rows(mdarr);
    puts("sum each col element");
    sum_cols(mdarr);
    tot = sum_all(mdarr);

    printf("Total rows and cols: %d\n", tot);

    return(EXIT_SUCCESS);
}
Ejemplo n.º 3
0
void update_div(matrix W, matrix H, matrix X, const float thresh, const int max_iter, double *t,int verbose){
    //run iterative multiplicative updates on W,H


    //initialize temp matrices -----------------------
    //matrix to hold W*H
    matrix WH;
    create_matrix(&WH, W.dim[0], H.dim[1], 0.0);

    //matrix to hold X./(W*H+EPS)
    matrix Z;
    create_matrix(&Z, X.dim[0], X.dim[1], 0.0);

    //matrix to hold W'*Z
    matrix WtZ;
    create_matrix(&WtZ, W.dim[1], Z.dim[1], 0.0);

    //matrix to hold Z*H'
    matrix ZHt;
    create_matrix(&ZHt, Z.dim[0], H.dim[0], 0.0);

    //matrix to hold sum(W) [sum cols of W]
    matrix sumW;
    create_matrix(&sumW, 1, W.dim[1] ,0.0);

    //matrix to hold sum(H,2) [sum rows of H]
    matrix sumH2;
    create_matrix(&sumH2, H.dim[0], 1, 0.0);
    
    int i;
    
    if(t==NULL){
	double t_array[TIMERS];
	t = t_array;
	for(i=0;i<TIMERS;i++)
	    t[i] = 0;
    }

    //turn on the FTZ(15) and DAZ(6) bits in the floating point control register
    //FTZ = flush-to-zero, DAZ = denormal-as-zero
    //without these, sgemms slow down significantly as values approach zero
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
    // the following does the same thing (by Waterman)
    /*
    unsigned int mxcsr;
    __asm__ __volatile__ ("stmxcsr (%0)" : : "r"(&mxcsr) : "memory");
    //mxcsr = (mxcsr | (1<<15) | (1<<6)) & ~((1<<11) | (1<<8));
    mxcsr = (mxcsr | (1<<15) | (1<<6)); 
    __asm__ __volatile__ ("ldmxcsr (%0)" : : "r"(&mxcsr));
    */


     

    float diff,div,prev_div,change;
    matrix_multiply(W,H,WH,mkl_threads);
    diff = matrix_difference_norm(X,WH, check_threads);
    prev_div = matrix_div(X,WH,check_threads);
    div = prev_div;
    if(verbose)
    {
	printf("OpenMP threads: %i\n",omp_threads);
	printf("i: %4i, error: %6.4f, div: %8.4e\n",0,diff,prev_div);
    }

    t[0] -= get_time();
    for(i=0;i<max_iter;i++){

	//check for convergence, print status
	if(i % ITER_CHECK == 0 && i != 0){
	    double tt = get_time();
	    matrix_multiply(W,H,WH,mkl_threads);
	    diff = matrix_difference_norm(X,WH,check_threads);
	    prev_div = div;
	    div = matrix_div(X,WH,check_threads);
	    change = (prev_div-div)/prev_div;
	    if(verbose)
		printf("i: %4i, error: %6.4f, div: %8.4e, change: %8.5f\n",
			i,diff,div,change);
	    if(change < thresh){
		printf("converged\n");
		break;
	    }
	    tt = get_time()-tt;
	    t[9] += tt;
	}
	    

	/* matlab algorithm
	   Z = X./(W*H+eps);
	   H = H.*(W'*Z)./(repmat(sum(W)',1,F));

	   Z = X./(W*H+eps);
	   W = W.*(Z*H')./(repmat(sum(H,2)',N,1));
	   */
		
	//
	// UPDATE H -----------------------------
	//

	//WH = W*H
	t[1] -= get_time();
	t[10] -= get_time();
	//matrix_eps(W,eps_threads);
	//matrix_eps(H,eps_threads);
	matrix_multiply(W,H,WH,mkl_threads);
	t[1] += get_time();
	t[10] += get_time();

	//WH = WH+EPS
	t[2] -= get_time();
	matrix_eps(WH,eps_threads);
	t[2] += get_time();

	//Z = X./WH
	t[3] -= get_time();
	element_divide(X,WH,Z,vecdiv_threads);
	t[3] += get_time();


	//sum cols of W into row vector
	t[6] -= get_time();
	sum_cols(W,sumW,sumcols_threads);
	t[6] += get_time();

	//convert sumW to col vector
	sumW.dim[0] = sumW.dim[1];
	sumW.dim[1] = 1;

	//WtZ = W'*Z
	t[1] -= get_time();
	t[11] -= get_time();
	matrix_multiply_AtB(W,Z,WtZ,mkl_threads);
	t[1] += get_time();
	t[11] += get_time();

	//WtZ = WtZ./(repmat(sum(W)',1,H.dim[1])
	//[element divide cols of WtZ by sumW']
	t[7] -= get_time();
	col_divide(WtZ,sumW,WtZ,coldiv_threads);
	t[7] += get_time();

	//H = H.*WtZ
	t[4] -= get_time();
	element_multiply(H,WtZ,H,vecmult_threads);
	t[4] += get_time();
	
	
	//
	// UPDATE W ---------------------------
	//

	//WH = W*H
	t[1] -= get_time();
	t[12] -= get_time();
	matrix_multiply(W,H,WH,mkl_threads);
	t[1] += get_time();
	t[12] += get_time();

	//WH = WH+EPS
	t[2] -= get_time();
	matrix_eps(WH,eps_threads);
	t[2] += get_time();

	//Z = X./WH
	t[3] -= get_time();
	element_divide(X,WH,Z,vecdiv_threads);
	t[3] += get_time();

	//sum rows of H into col vector
	t[5] -= get_time();
	sum_rows(H,sumH2,sumrows_threads);
	t[5] += get_time();

	//convert sumH2 to row vector
	sumH2.dim[1] = sumH2.dim[0];
	sumH2.dim[0] = 1;

	//ZHt = Z*H'
	t[1] -= get_time();
	t[13] -= get_time();
	matrix_multiply_ABt(Z,H,ZHt,mkl_threads);
	t[1] += get_time();
	t[13] += get_time();

	//ZHt = ZHt./(repmat(sum(H,2)',W.dim[0],1)
	//[element divide rows of ZHt by sumH2']
	t[8] -= get_time();
	row_divide(ZHt,sumH2,ZHt,rowdiv_threads);
	t[8] += get_time();

	//W = W.*ZHt
	t[4] -= get_time();
	element_multiply(W,ZHt,W,vecmult_threads);
	t[4] += get_time();



	// ------------------------------------

	//reset sumW to row vector
	sumW.dim[1] = sumW.dim[0];
	sumW.dim[0] = 1;
	//reset sumH2 to col vector
	sumH2.dim[0] = sumH2.dim[1];
	sumH2.dim[1] = 1;

	// ---------------------------------------
	
	    
    }

    t[0] += get_time();


    matrix_multiply(W,H,WH,mkl_threads);
    diff = matrix_difference_norm(X,WH,check_threads);
    prev_div = div;
    div = matrix_div(X,WH,check_threads);
    change = (prev_div-div)/prev_div;
    if(verbose){
	printf("i: %4i, error: %6.4f, div: %8.4e, change: %8.5f\n",
		i,diff,div,change);


	printf("\n");
	for(i=0;i<TIMERS;i++)
	    printf("t[%i]: %8.3f (%6.2f %%) %s\n",i,t[i],t[i]/t[0]*100,tname[i]);
    }


    //free temporary matrices
    destroy_matrix(&WH);
    destroy_matrix(&Z);
    destroy_matrix(&WtZ);
    destroy_matrix(&ZHt);
    destroy_matrix(&sumW);
    destroy_matrix(&sumH2);


}