C++ (Cpp) matmulの例

コード例 #1

0

ファイルを表示

ファイル: TopologyMatrix.cpp プロジェクト: BingqingCheng/plumed2

void TopologyMatrix::calculateForThreeAtoms( const unsigned& iat, const Vector& d1, const double& d1_len,
    HistogramBead& bead, multicolvar::AtomValuePack& myatoms ) const {
  // Calculate if there are atoms in the cylinder (can use delta here as pbc are done in atom setup)
  Vector d2 = getSeparation( myatoms.getPosition(0), myatoms.getPosition(iat) );
  // Now calculate projection of d2 on d1
  double proj=dotProduct(d2,d1);
  // This tells us if we are outside the end of the cylinder
  double excess = proj - d1_len;
  // Return if we are outside of the cylinder as calculated based on excess
  if( excess>low_sf( getBaseColvarNumber( myatoms.getIndex(0) ), getBaseColvarNumber( myatoms.getIndex(1) ) ).get_dmax() ) return;
  // Find the length of the cylinder
  double binw = binw_mat( getBaseColvarNumber( myatoms.getIndex(0) ), getBaseColvarNumber( myatoms.getIndex(1) ) );
  double lcylinder = (std::floor( d1_len / binw ) + 1)*binw;
  // Return if the projection is outside the length of interest
  if( proj<-bead.getCutoff() || proj>(lcylinder+bead.getCutoff()) ) return;

  // Calculate the excess swiching function
  double edf, eval = low_sf( getBaseColvarNumber( myatoms.getIndex(0) ), getBaseColvarNumber( myatoms.getIndex(1) ) ).calculate( excess, edf );
  // Calculate the projection on the perpendicular distance from the center of the tube
  double cm = d2.modulo2() - proj*proj;

  // Now calculate the density in the cylinder
  if( cm<cylinder_sw( getBaseColvarNumber( myatoms.getIndex(0) ), getBaseColvarNumber( myatoms.getIndex(1) ) ).get_dmax2() ) {
    double dfuncr, val = cylinder_sw( getBaseColvarNumber( myatoms.getIndex(0) ),
                                      getBaseColvarNumber( myatoms.getIndex(1) ) ).calculateSqr( cm, dfuncr );
    double cellv = cell_volume( getBaseColvarNumber( myatoms.getIndex(0) ), getBaseColvarNumber( myatoms.getIndex(1) ) );
    Vector dc1, dc2, dc3, dd1, dd2, dd3, de1, de2, de3;
    if( !doNotCalculateDerivatives() ) {
      Tensor d1_a1;
      // Derivative of director connecting atom1 - atom2 wrt the position of atom 1
      d1_a1(0,0) = ( -(d1[1]*d1[1]+d1[2]*d1[2])/d1_len );   // dx/dx
      d1_a1(0,1) = (  d1[0]*d1[1]/d1_len );                 // dx/dy
      d1_a1(0,2) = (  d1[0]*d1[2]/d1_len );                 // dx/dz
      d1_a1(1,0) = (  d1[1]*d1[0]/d1_len );                 // dy/dx
      d1_a1(1,1) = ( -(d1[0]*d1[0]+d1[2]*d1[2])/d1_len );   // dy/dy
      d1_a1(1,2) = (  d1[1]*d1[2]/d1_len );
      d1_a1(2,0) = (  d1[2]*d1[0]/d1_len );
      d1_a1(2,1) = (  d1[2]*d1[1]/d1_len );
      d1_a1(2,2) = ( -(d1[1]*d1[1]+d1[0]*d1[0])/d1_len );

      // Calculate derivatives of dot product
      dd1 = matmul(d2, d1_a1) - d1;
      dd2 = matmul(d2, -d1_a1);
      dd3 = d1;

      // Calculate derivatives of cross product
      dc1 = dfuncr*( -d2 - proj*dd1 );
      dc2 = dfuncr*( -proj*dd2 );
      dc3 = dfuncr*( d2 - proj*dd3 );

      // Calculate derivatives of excess
      de1 = edf*excess*( dd1 + d1 );
      de2 = edf*excess*( dd2 - d1 );
      de3 = edf*excess*dd3;
    }

    Vector pos1 = myatoms.getPosition(0) + d1_len*d1;
    Vector pos2 = myatoms.getPosition(0) + d2;
    Vector g1derivf,g2derivf,lderivf; Tensor vir;
    for(unsigned bin=0; bin<maxbins; ++bin) {
      bead.set( bin*binw, (bin+1)*binw, sigma );
      if( proj<(bin*binw-bead.getCutoff()) || proj>binw*(bin+1)+bead.getCutoff() ) continue;
      double der, contr=bead.calculateWithCutoff( proj, der ) / cellv; der /= cellv;
      myatoms.addValue( 2+bin, contr*val*eval );

      if( !doNotCalculateDerivatives() ) {
        g1derivf=contr*eval*dc1 + val*eval*der*dd1 + contr*val*de1;
        addAtomDerivatives( 2+bin, 0, g1derivf, myatoms );
        g2derivf=contr*eval*dc2 + val*eval*der*dd2 + contr*val*de2;
        addAtomDerivatives( 2+bin, 1, g2derivf, myatoms );
        lderivf=contr*eval*dc3 + val*eval*der*dd3 + contr*val*de3;
        addAtomDerivatives( 2+bin, iat, lderivf, myatoms );
        // Virial
        vir = -Tensor( myatoms.getPosition(0), g1derivf ) - Tensor( pos1, g2derivf ) - Tensor( pos2, lderivf );
        myatoms.addBoxDerivatives( 2+bin, vir );
      }
    }
  }
}

コード例 #2

0

ファイルを表示

ファイル: ray_tracing.c プロジェクト: alexlib/3dptv_tcltk

void ray_tracing_v2 (double x, double y, Exterior Ex, Interior I, Glass G, mm_np mm,
					 double *Xb2, double *Yb2, double* Zb2, 
					 double *a3, double *b3, double *c3)
/* ray-tracing, see HOEHLE and Manual of Photogrammetry */
{
	double	a1, b1, c1, a2, b2, c2, Xb1, Yb1, Zb1, d1, d2,
			vect1[3], vect2[3], s2;

	double a[3],b[3],base2[3],c,dummy,bn[3],bp[3],n,p;

	s2 = sqrt (x*x + y*y + I.cc*I.cc);
	
	/* direction cosines in image coordinate system */
	vect1[0] = x/s2;  vect1[1] = y/s2;	  vect1[2] = -I.cc/s2;

	matmul (vect2, Ex.dm, vect1, 3,3,1);
	 
	/* direction cosines in space coordinate system , medium n1 */
	a1 = vect2[0];	b1 = vect2[1];	c1 = vect2[2];	
	
	//old d1 = -(Ex.z0 - mm.d[0]) / c1;
	//find dist to outer interface
	//...	from Jakob Mann vector3 XLinePlane(vector3 a, vector3 b, struct plane pl)
	//...	a + b*((pl.c - dot(pl.base[2],a))/dot(pl.base[2],b));
	
	/*Ex.x0=0.;
	Ex.y0=20.;
	Ex.z0=10.;
	Ex.omega=-0.7853981;
	Ex.phi=0.;
	Ex.kappa=0.;
	G.vec_x=0.;
	G.vec_y=10.;
	G.vec_z=0.;
	vect2[0]=0.;
	vect2[1]=-1./sqrt(2.);
	vect2[2]=-1./sqrt(2.);*/
   
	a[0]=Ex.x0;a[1]=Ex.y0;a[2]=Ex.z0;
	b[0]=vect2[0];b[1]=vect2[1];b[2]=vect2[2];
	c=sqrt(G.vec_x*G.vec_x+G.vec_y*G.vec_y+G.vec_z*G.vec_z);
	base2[0]=G.vec_x/c;base2[1]=G.vec_y/c;base2[2]=G.vec_z/c;

	c=c+mm.d[0];
	dummy=base2[0]*a[0]+base2[1]*a[1]+base2[2]*a[2];
	dummy=dummy-c;
	d1=-dummy/(base2[0]*b[0]+base2[1]*b[1]+base2[2]*b[2]);
	

	/* point on the horizontal plane between n1,n2 */
	//old Xb1 = Ex.x0 + d1*a1;	Yb1 = Ex.y0 + d1*b1;  Zb1 = Ex.z0 + d1*c1;
	Xb1=a[0]+b[0]*d1;
	Yb1=a[1]+b[1]*d1;
	Zb1=a[2]+b[2]*d1;
	
	//old cosi1 = c1;
	//cosi1=base2[0]*b[0]+base2[1]*b[1]+base2[2]*b[2];
	//factor = cosi1 * mm.n1/mm.n2[0]
	//		 + sqrt (1 - (mm.n1*mm.n1)/(mm.n2[0]*mm.n2[0])
	//					  + (cosi1*cosi1)*(mm.n1*mm.n1)/(mm.n2[0]*mm.n2[0]));

	/* direction cosines in space coordinate system , medium n2 */
	//old a2 = a1 * mm.n1/mm.n2[0];
	//old b2 = b1 * mm.n1/mm.n2[0];
	//old c2 = c1 * mm.n1/mm.n2[0] - factor;
	
	//old d2 = -mm.d[0]/c2;

	bn[0]=base2[0];bn[1]=base2[1];bn[2]=base2[2];
	n=(b[0]*bn[0]+b[1]*bn[1]+b[2]*bn[2]);
	bp[0]=b[0]-bn[0]*n;bp[1]=b[1]-bn[1]*n;bp[2]=b[2]-bn[2]*n;
	dummy=sqrt(bp[0]*bp[0]+bp[1]*bp[1]+bp[2]*bp[2]);
	if (dummy == 0) dummy = 1.0;
	bp[0]=bp[0]/dummy;bp[1]=bp[1]/dummy;bp[2]=bp[2]/dummy;

	p=sqrt(1-n*n);
	p = p * mm.n1/mm.n2[0];//interface parallel
	//n = n * mm.n1/mm.n2[0] - factor;//interface normal
	n=-sqrt(1-p*p);
	a2=p*bp[0]+n*bn[0];
	b2=p*bp[1]+n*bn[1];
	c2=p*bp[2]+n*bn[2];
	d2=mm.d[0]/fabs((base2[0]*a2+base2[1]*b2+base2[2]*c2));
	

	/* point on the horizontal plane between n2,n3 */
	*Xb2 = Xb1 + d2*a2;  *Yb2 = Yb1 + d2*b2;  *Zb2 = Zb1 + d2*c2;
	
	//old cosi2 = c2;
	//cosi2=base2[0]*a2+base2[1]*b2+base2[2]*c2;
	//factor = cosi2 * mm.n2[0]/mm.n3 
	//		 + sqrt (1 - (mm.n2[0]*mm.n2[0])/(mm.n3*mm.n3)
	//					  + (cosi2*cosi2)*(mm.n2[0]*mm.n2[0])/(mm.n3*mm.n3));

	/* direction cosines in space coordinate system , medium mm.n3 */
	//old *a3 = a2 * mm.n2[0]/mm.n3;
	//old *b3 = b2 * mm.n2[0]/mm.n3;
	//old *c3 = c2 * mm.n2[0]/mm.n3 - factor;

	n=(a2*bn[0]+b2*bn[1]+c2*bn[2]);
	bp[0]=a2-bn[0]*n;bp[1]=b2-bn[1]*n;bp[2]=c2-bn[2]*n;
	dummy=sqrt(bp[0]*bp[0]+bp[1]*bp[1]+bp[2]*bp[2]);
	if (dummy == 0) dummy = 1.0;
	bp[0]=bp[0]/dummy;bp[1]=bp[1]/dummy;bp[2]=bp[2]/dummy;

	p=sqrt(1-n*n);
	p = p * mm.n2[0]/mm.n3;//interface parallel
	//n = n * mm.n2[0]/mm.n3 - factor;//interface normal
	n=-sqrt(1-p*p);
	*a3=p*bp[0]+n*bn[0];
	*b3=p*bp[1]+n*bn[1];
	*c3=p*bp[2]+n*bn[2];
}

コード例 #3

0

ファイルを表示

ファイル: Pbc.cpp プロジェクト: JFDama/plumed2

Vector Pbc::realToScaled(const Vector&d)const {
  return matmul(invBox.transpose(),d);
}

コード例 #4

0

ファイルを表示

int raw_orient (Calibration* cal, control_par *cpar, int nfix, vec3d fix[], target pix[])
{
    double  X[10][6], y[10], XPX[6][6], XPy[6], beta[6];
    int     i, j, n, itnum, stopflag;
    double  dm = 0.0001,  drad = 0.0001;
    double 	xp, yp, xc, yc;
    vec3d   pos;

    /* init X, y (set to zero) */
    for (i = 0; i < 10; i++) {
      for (j = 0; j < 6; j++)
        X[i][j] = 0;
      y[i] = 0;
    }

    cal->added_par.k1 = 0;
    cal->added_par.k2 = 0;
    cal->added_par.k3 = 0;
    cal->added_par.p1 = 0;
    cal->added_par.p2 = 0;
    cal->added_par.scx = 1;
    cal->added_par.she = 0;

    /* main loop, program runs through it, until none of the beta values
     comes over a threshold and no more points are thrown out
     because of their residuals */

    itnum = 0;
    stopflag = 0;

    while ((stopflag == 0) && (itnum < 20)) {
        ++itnum;

        for (i = 0, n = 0; i < nfix; i++) {
            /* we do not check the order - trust the user to click the points
               in the correct order of appearance in man_ori and in the calibration
               parameters GUI
            */
            pixel_to_metric (&xc, &yc, pix[i].x, pix[i].y, cpar);
            /* no corrections as additional parameters are neglected
                correct_brown_affin (xc, yc, cal->added_par, &xc, &yc);
            */
    
            /* every calibration dot is projected to the mm position, xp, yp */
            vec_set(pos, fix[i][0], fix[i][1], fix[i][2]);
            rotation_matrix(&(cal->ext_par));
            img_coord (pos, cal, cpar->mm, &xp, &yp);
    
            /* numeric derivatives of internal camera coefficients */
            num_deriv_exterior(cal, cpar, dm, drad, pos, X[n], X[n + 1]);
    
            y[n]   = xc - xp;
            y[n+1] = yc - yp;
    
            n += 2;
        }

        /* Gauss Markoff Model */
    
        ata ((double *) X, (double *) XPX, n, 6, 6);
        matinv ((double *) XPX, 6, 6);
        atl ((double *) XPy, (double *) X, y, n, 6, 6);
        matmul ((double *) beta, (double *) XPX, (double *) XPy, 6,6,1,6,6);
    
        stopflag = 1;
        for (i = 0; i < 6; i++) {
          if (fabs (beta[i]) > 0.1 )
            stopflag = 0;
        }
    
        cal->ext_par.x0 += beta[0];
        cal->ext_par.y0 += beta[1];
        cal->ext_par.z0 += beta[2];
        cal->ext_par.omega += beta[3];
        cal->ext_par.phi += beta[4];
        cal->ext_par.kappa += beta[5];
    
    }

    if (stopflag) {
        rotation_matrix(&(cal->ext_par));
    }
    return stopflag;
}

コード例 #5

0

ファイルを表示

ファイル: PAPI_matmul.c プロジェクト: xancandal/papi

int main()
{
    double *a;
    double *b;
    double *c;
    int i = 0, j = 0, k = 0;
    int *events;                        // Array of events
    long long *values;                  // Array of values events
    int EventSet = PAPI_NULL;           // Handle for a PAPI event set as created by PAPI_create_eventset (3) 
    int retval;                         // Test fail function
    int num_event = 0;                  // Number of events
    int max_event;                      // Number of available events
    int EventCode = 0;                  // Event code
    PAPI_event_info_t pset;             // PAPI_event_info_t Struct Reference
    char evname[PAPI_MAX_STR_LEN];      // Symbol event
   
    /* Memory asignament to matrixs*/   
    if((a = (double *)malloc(mrows * ncolumns * sizeof(double))) == NULL)
        printf("Error malloc matrix a[%d]\n",mrows * ncolumns);
    if((b = (double *)malloc(ncolumns * pcolumns * sizeof(double))) == NULL)
        printf("Error malloc matrix b[%d]\n",mrows * ncolumns);
    if((c = (double *)malloc(mrows * pcolumns * sizeof(double))) == NULL)
        printf("Error malloc matrix c[%d]\n",mrows * ncolumns);

    /* Initialize the Matrix arrays */
    initmat(a, b, mrows, ncolumns, pcolumns);

    /* Initialize the PAPI library */
    retval = PAPI_library_init(PAPI_VER_CURRENT);
    if (retval != PAPI_VER_CURRENT)
        test_fail( __FILE__, __LINE__, "PAPI_library_init", retval );

    /* Enable and initialize multiplex support */
    retval = PAPI_multiplex_init();
    if ( retval != PAPI_OK )
        test_fail( __FILE__, __LINE__, "PAPI_multiplex_init", retval );
 
    /* Create an EventSet */
    retval = PAPI_create_eventset(&EventSet);
    if ( retval != PAPI_OK )
        test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval );
 
    /* Assign it to the CPU component */
    retval = PAPI_assign_eventset_component(EventSet, 0);
    if ( retval != PAPI_OK )
        test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", retval );
 
    /* Convert the EventSet to a multiplexed event set */
    retval = PAPI_set_multiplex(EventSet);
    if ( retval != PAPI_OK )
        test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval );

    /* Obtaining the number of available events */
    max_event = PAPI_get_opt( PAPI_MAX_MPX_CTRS, NULL );
    printf("\nNumber of available events: %d", max_event );
 
    /* Fill up the event set with as many non-derived events as we can */
    EventCode = PAPI_PRESET_MASK;
    do {
        if ( PAPI_get_event_info( EventCode, &pset ) == PAPI_OK ) {
            if ( pset.count && ( strcmp( pset.derived, "NOT_DERIVED" ) == 0 ) ) {
                retval = PAPI_add_event( EventSet, ( int ) pset.event_code );
                if ( retval != PAPI_OK )
                    test_fail( __FILE__, __LINE__, "PAPI_add_event", retval );
                else {
                    //printf( "Added %s\n", pset.symbol );
                    num_event++;
                }
            }
        }
    } while ( ( PAPI_enum_event( &EventCode, PAPI_PRESET_ENUM_AVAIL ) == PAPI_OK ) && ( num_event < max_event ) );
    
    /* Memory asignament to values and events*/    
    events = ( int * ) malloc( ( size_t ) num_event * sizeof ( int ) );
    if ( events == NULL )
        test_fail( __FILE__, __LINE__, "Error malloc events", 0 );
    values = ( long long * ) malloc( ( size_t ) num_event * sizeof ( long long ) );
    if ( values == NULL )
        test_fail( __FILE__, __LINE__, "Erro malloc values", 0 );

    /* Start counting events */
    if ((retval=PAPI_start(EventSet)) != PAPI_OK)
        test_fail(__FILE__, __LINE__, "PAPI_start", retval);

    /* Matrix-Matrix multiply */
    matmul(a, b, c, mrows, ncolumns, pcolumns);

    /* Read the counters */
    if ((retval=PAPI_read( EventSet, values )) != PAPI_OK)
        test_fail(__FILE__, __LINE__, "PAPI_read_counters", retval);
   
    /* Stop counting events */
    if ((retval=PAPI_stop( EventSet, values )) != PAPI_OK)
        test_fail(__FILE__, __LINE__, "PAPI_stop_counters", retval);

    /* List the events in the event set */
    retval = PAPI_list_events( EventSet, events, &num_event );
    if ( retval != PAPI_OK )
        test_fail( __FILE__, __LINE__, "PAPI_list_events", retval );

    /* Print results */
    printf("\nNumber of non-zero events: %d\n", num_event );
    printf( "\nCounts of non-zero available events........................................................\n" );
    printf("Name: \t\t\t  Value: \t Description:\n");
    for ( i = 0; i < num_event; i++ ) {
        PAPI_event_code_to_name( events[i], evname );   // Obtaining name of available events
        PAPI_get_event_info(events[i], &pset);
        if ( values[i] != 0 )  printf("%s \t %15lld \t %s\n", evname, values[i], pset.long_descr);
    }
    printf( "\nCounts of zero available events............................................................\n" );
    printf("Name: \t\t\t  Value: \t Description:\n");
    for ( i = 0; i < num_event; i++ ) {
        PAPI_event_code_to_name( events[i], evname );   // Obtaining name of available events
        PAPI_get_event_info(events[i], &pset);
        if ( values[i] == 0 )  printf("%s \t %15lld \t %s\n", evname, values[i], pset.long_descr);
    }

    /* Check if counter pair(s) had identical values */
    for ( i = 0; i < num_event; i++ ) {
        for ( i = j+1; j < num_event; j++ ) {
            if ( ( i != j ) && ( values[i] == values[j] ) ) k++;  
        }
    }
    if ( k != 0 ) {
        printf( "\nCaution: %d counter pair(s) had identical values\n", k );
    }
    printf("\n");

    /* Free memory */
    free( events );
    free( values );
    free( a );
    free( b );
    free( c );

    /* Cleaning events */
    retval = PAPI_cleanup_eventset( EventSet );
    if ( retval != PAPI_OK )
        test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval );
    
    /* Destroying events */
    retval = PAPI_destroy_eventset( &EventSet );
    if ( retval != PAPI_OK )
        test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval );

    return 0;
}

コード例 #6

0

ファイルを表示

ファイル: ECEFNRT.c プロジェクト: avinashparitala/NavigationAlgorithm-

void varinit(void)
{
	int i;
	/*
	 * Resetting all flags
	 */
	Intr1_Cnt=0;
	Intr2_Cnt=0;
	IRQ1Flag = 1;
	IRQ2Flag = 1;

	WSZ = 34;
	TA_cnt =0;


	count = 0;
	qcnt = 0;

	velcnt = 0;
	rtime = 0.0;
	rcnt = 0;
	cnt_10ms = 0;




	latm = MasterLat;
	longm = MasterLon;

	epsilon = 0.0;
	four_delt = 4.0 * del_t;
	eight_delt = 8.0 * del_t;
	cdr_delt = cdr * del_t;
	cdr_delt_ms = cdr_delt / 3600;

	for(i=0;i<32;i++){
		Array_SA[i] = 0;
	}

	for(i=0;i<3;i++)
	{
		velo_ref_y[i] = 0.0;
		velo_ref_yold[i] = 0.0;;
		velo_ref_x[i] = 0.0;
		velo_ref_xold[i] = 0.0;

		pure_vel[i] = 0.0;

		p_velo_20ms[i] = 0.0;
		p_velo[i] = 0.0;

		pure_v_old[i] = 0.0;
		p_Ang[i] = 0.0;

		pure_gyro_drift[i] = 0.0;
		pure_acc_residu[i] = 0.0;

	}

#if 0

	/* these are known misalignment angles between M and S -
	 * Measured w.r.t Master to give DCM from slave to Master.
	 * Beware they are not between slave to NED */
	known_si    =  0.0 * cdr;
	known_theta =  0.0 * cdr;
	known_phi   =  0.0 * cdr;

	euler2dcm_stp(0, 0, 0, (double*)CSkew_est);
	transpose(3, 3, (double*)CSkew_est, (double*)CSkew_est_T);

	euler2dcm_stp(known_si, known_theta, known_phi, (double*)CS2M_K);
	transpose(3, 3, (double*)CS2M_K, (double*)CM2S_K);

	euler2dcm_stp(THDG, PITCH, ROLL, (double*)Cb2ned_M);
	matmul(3, 3, (double*)Cb2ned_M, 3, 3, (double*)CS2M_K, (double*)Cb2ned_S);

	if(ta_flag==1 && nav_flag==1)

	{
		dcm2quat((double*)Cb2ned_S, (double *)p_q_body2ned);

	}

	else if(ta_flag ==0 && level_flag==1)
#endif
	{

		euler2quat_spt(mdl_si,mdl_phi,mdl_theta,(double *)p_q_body2ned);


		p_si = mdl_si;
		p_phi = mdl_phi;
		p_theta = mdl_theta;



	}

	ned2ecef_q(latm, longm,(double*) q_ned2ecef);
	quat_mult((double*)q_ned2ecef,(double*)p_q_body2ned, (double*)p_q_body2ecef);


	/*
	 * Modification after Manjit discussion
	 */
	quat2dcm((double *)p_q_body2ecef,(double*)p_dcm);


	quat2dcm((double *)q_ned2ecef,(double*)p_dcm_n);
	matmul(3,3, (double*)p_dcm_n,3,1,(double*)MasterVel,(double*)pure_vel);




	pure_v_old[0] = pure_vel[0];
	pure_v_old[1] = pure_vel[1];
	pure_v_old[2] = pure_vel[2];

	init(0.0, 0.0, 0.0, p_velo_20ms);

	init(0.0, 0.0, 0.0, p_velo);


	init(0.0,0.0,0.0,pure_gyro_drift);
	init(0.0,0.0,0.0,pure_acc_residu);



	for (i = 0; i < 3; i++)
	{
		p_alp1[i] = 0.0;    p_alp2[i] = 0.0;    p_alp3[i] = 0.0;    p_alp4[i] = 0.0;

	}

	for (i = 0; i < 3; i++)
		Delta_Angle[i] = 0.0;

	for (i = 0; i < 6; i++)
		accum1[i] = 0.0;

	init(0.0, 0.0, earth_rate, omega);	 //earth rate vector ECEF

	//used in levelling
	Ned_omega[0] = earth_rate * cos(latm);
	Ned_omega[1] = 0.0;
	Ned_omega[2] = -earth_rate *sin(latm);

	for (i = 0; i < 3; i++)
		omg_dub[i] = 2.0 * omega[i];

	r_init = r0 * (1.0 - eccen * (sin(latm) * sin(latm)));


	pure_R = r_init + MasterAlt; // altitude;


	lla2ecef(latm,longm,MasterAlt,(double *)pure_ecef_pos); //input is geodetic



	pure_g_ecef();

	/****  for epsilon estimation   ****/

	init(0.0, 0.0, -pure_g_ecef_mag, Ned_gravity_detic);

}								 //end of varinit()

コード例 #7

0

ファイルを表示

ファイル: matmul_host.c プロジェクト: futurecore/epiphany-examples

int main(int argc, char *argv[])
{
	e_epiphany_t Epiphany, *pEpiphany;
	e_mem_t      DRAM,     *pDRAM;
	unsigned int msize;
	float        seed;
	unsigned int addr; //, clocks;
	size_t       sz;
	double       tdiff[4];
	int          result, rerval;
	
	pEpiphany = &Epiphany;
	pDRAM     = &DRAM;
	msize     = 0x00400000;

	get_args(argc, argv);


	fo = stderr;
	fi = stdin;

	printf("\nMatrix: C[%d][%d] = A[%d][%d] * B[%d][%d]\n\n", _Smtx, _Smtx, _Smtx, _Smtx, _Smtx, _Smtx);
	printf("Using %d x %d cores\n\n", _Nside, _Nside);
	seed = 0.0;
	printf("Seed = %f\n", seed);



	// Connect to device for communicating with the Epiphany system
	// Prepare device
	e_set_host_verbosity(H_D0);
	e_init(NULL);
	e_reset_system();

	if (e_alloc(pDRAM, 0x00000000, msize))
	{
		printf("\nERROR: Can't allocate Epiphany DRAM!\n\n");
		exit(1);
	}
	if (e_open(pEpiphany, 0, 0, e_platform.chip[0].rows, e_platform.chip[0].cols))
	{
		printf("\nERROR: Can't establish connection to Epiphany device!\n\n");
		exit(1);
	}

	// Initialize Epiphany "Ready" state
	addr = offsetof(shared_buf_t, core.ready);
	Mailbox.core.ready = 0;
	e_write(pDRAM, 0, 0, addr, &Mailbox.core.ready, sizeof(Mailbox.core.ready));

	printf("Loading program on Epiphany chip...\n");
	e_set_loader_verbosity(ar.verbose);
	result = e_load_group(ar.srecFile, pEpiphany, 0, 0, pEpiphany->rows, pEpiphany->cols, ar.run_target);
	if (result == E_ERR) {
		printf("Error loading Epiphany program.\n");
		exit(1);
	}


	// Generate operand matrices based on a provided seed
	matrix_init(seed);


#ifdef __WIPE_OUT_RESULT_MATRIX__
	// Wipe-out any previous remains in result matrix (for verification)
	addr = offsetof(shared_buf_t, C[0]);
	sz = sizeof(Mailbox.C);
	printf("Writing C[%uB] to address %08x...\n", sz, addr);
	e_write(pDRAM, 0, 0, addr, (void *) Mailbox.C, sz);
#endif

	clock_gettime(CLOCK_MONOTONIC, &timer[0]);

	// Copy operand matrices to Epiphany system
	addr = offsetof(shared_buf_t, A[0]);
	sz = sizeof(Mailbox.A);
	printf("Writing A[%uB] to address %08x...\n", sz, addr);
	e_write(pDRAM, 0, 0, addr, (void *) Mailbox.A, sz);
	
	addr = offsetof(shared_buf_t, B[0]);
	sz = sizeof(Mailbox.B);
	printf("Writing B[%uB] to address %08x...\n", sz, addr);
	e_write(pDRAM, 0, 0, addr, (void *) Mailbox.B, sz);


	// Call the Epiphany matmul() function
	printf("GO Epiphany! ...   ");
	clock_gettime(CLOCK_MONOTONIC, &timer[1]);
	matmul_go(pDRAM);
	clock_gettime(CLOCK_MONOTONIC, &timer[2]);
	printf("Finished calculating Epiphany result.\n");


	// Read result matrix and timing
	addr = offsetof(shared_buf_t, C[0]);
	sz = sizeof(Mailbox.C);
	printf("Reading result from address %08x...\n", addr);
	e_read(pDRAM, 0, 0, addr, (void *) Mailbox.C, sz);

	clock_gettime(CLOCK_MONOTONIC, &timer[3]);


	// Calculate a reference result
	printf("Calculating result on Host ...   ");
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[4]);
#ifndef __DO_STRASSEN__
	matmul(Mailbox.A, Mailbox.B, Cref, _Smtx);
#else
	matmul_strassen(Mailbox.A, Mailbox.B, Cref, _Smtx);
#endif
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[5]);
	printf("Finished calculating Host result.\n");


	addr = offsetof(shared_buf_t, core.clocks);
	sz = sizeof(Mailbox.core.clocks);
	printf("Reading time from address %08x...\n", addr);
	e_read(pDRAM,0, 0, addr, &Mailbox.core.clocks, sizeof(Mailbox.core.clocks));
//	clocks = Mailbox.core.clocks;


	// Calculate the difference between the Epiphany result and the reference result
	printf("\n*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n");
	printf("Verifying result correctness ...   ");
	matsub(Mailbox.C, Cref, Cdiff, _Smtx);

	tdiff[0] = (timer[2].tv_sec - timer[1].tv_sec) * 1000 + ((double) (timer[2].tv_nsec - timer[1].tv_nsec) / 1000000.0);//total
	tdiff[1] = (timer[1].tv_sec - timer[0].tv_sec) * 1000 + ((double) (timer[1].tv_nsec - timer[0].tv_nsec) / 1000000.0);//write
	tdiff[2] = (timer[3].tv_sec - timer[2].tv_sec) * 1000 + ((double) (timer[3].tv_nsec - timer[2].tv_nsec) / 1000000.0);//read
	tdiff[3] = (timer[5].tv_sec - timer[4].tv_sec) * 1000 + ((double) (timer[5].tv_nsec - timer[4].tv_nsec) / 1000000.0);//ref


	// If the difference is 0, then the matrices are identical and the
	// calculation was correct
	if (iszero(Cdiff, _Smtx))
	{
		printf("C_epiphany == C_host\n");
		rerval = 0;
	} else {
		printf("\n\nERROR: C_epiphany is different from C_host !!!\n");
		rerval = 1;
	}
	printf("*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n");
	printf("\n");
	printf("Epiphany (compute):  %9.1f msec  (@ %03d MHz)\n"   , tdiff[0], eMHz);
	printf("         (write)  :  %9.1f msec \n"                , tdiff[1]);
	printf("         (read)   :  %9.1f msec\n"                 , tdiff[2]);
	printf("         (*total*):  %9.1f msec\n\n"               , tdiff[2]+tdiff[1]+tdiff[0]);
	printf("Host     (*total*):  %9.1f msec  (@ %03d MHz)\n"   , tdiff[3], aMHz);


#ifdef __DUMP_MATRICES__
	printf("\n\n\n");
	printf("A[][] = \n");
	matprt(Mailbox.A, _Smtx);
	printf("B[][] = \n");
	matprt(Mailbox.B, _Smtx);
	printf("C[][] = \n");
	matprt(Mailbox.C, _Smtx);
	printf("Cref[][] = \n");
	matprt(Cref, _Smtx);

	int i, j;
	for (i=0; i<_Nside; i++)
		for (j=0; j<_Nside; j++)
		{
			e_read(pEpiphany, i, j, 0x2000+0*sizeof(float), &Aepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x2000+2*sizeof(float), &Aepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x4000+0*sizeof(float), &Bepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x4000+2*sizeof(float), &Bepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float));
		}
	printf("Aepi[][] = \n");
	matprt(Aepi, _Smtx);
	printf("Bepi[][] = \n");
	matprt(Bepi, _Smtx);
#endif

	printf("\n* * *   EPIPHANY FTW !!!   * * *\n");


	// Close connection to device
	if (e_close(pEpiphany))
	{
		printf("\nERROR: Can't close connection to Epiphany device!\n\n");
		exit(1);
	}
	if (e_free(pDRAM))
	{
		printf("\nERROR: Can't release Epiphany DRAM!\n\n");
		exit(1);
	}

	e_finalize();

	return rerval;
}

コード例 #8

0

ファイルを表示

ファイル: tstack.c プロジェクト: 00001/plan9port

void ixform(Space *t, Matrix m, Matrix inv){
	matmul(t->t, m);
	matmulr(t->tinv, inv);
}

コード例 #9

0

ファイルを表示

ファイル: matmul.c プロジェクト: einarhorn/FaultSight

void matmul_strassen(double* a, double* b, double* c, int n)
{
    double* tmp1 = (double*) malloc((n*n/4)*sizeof(double));
    double* tmp2 = (double*) malloc((n*n/4)*sizeof(double));
    
    double* a11 = (double*) malloc((n*n/4)*sizeof(double));
    double* a12 = (double*) malloc((n*n/4)*sizeof(double));
    double* a21 = (double*) malloc((n*n/4)*sizeof(double));
    double* a22 = (double*) malloc((n*n/4)*sizeof(double));
    
    double* b11 = (double*) malloc((n*n/4)*sizeof(double));
    double* b12 = (double*) malloc((n*n/4)*sizeof(double));
    double* b21 = (double*) malloc((n*n/4)*sizeof(double));
    double* b22 = (double*) malloc((n*n/4)*sizeof(double));
    
    double* c11 = (double*) malloc((n*n/4)*sizeof(double));
    double* c12 = (double*) malloc((n*n/4)*sizeof(double));
    double* c21 = (double*) malloc((n*n/4)*sizeof(double));
    double* c22 = (double*) malloc((n*n/4)*sizeof(double));
    

    double* m1 = (double*) malloc((n*n/4)*sizeof(double));
    double* m2 = (double*) malloc((n*n/4)*sizeof(double));
    double* m3 = (double*) malloc((n*n/4)*sizeof(double));
    double* m4 = (double*) malloc((n*n/4)*sizeof(double));
    double* m5 = (double*) malloc((n*n/4)*sizeof(double));
    double* m6 = (double*) malloc((n*n/4)*sizeof(double));
    double* m7 = (double*) malloc((n*n/4)*sizeof(double));
	int i, j, k, ii, jj, kk, N;
	double tmp;
    

    // partition A and B
    N = n/2;
    for (i=0; i< N; i++) 
    {

        for (j=0; j< N; j++) 
        {
            a11[i*N +j ] = a[i*n+j];
            b11[i*N +j ] = b[i*n+j];
            
            a12[i*N +j ] = a[i*n+j+N];
            b12[i*N +j ] = b[i*n+j+N];
            
            a21[i*N +j ] = a[(i+N)*n+j];
            b21[i*N +j ] = b[(i+N)*n+j];

            a22[i*N +j ] = a[(i+N)*n+j+N];
            b22[i*N +j ] = b[(i+N)*n+j+N];
        }
    }
    //print(a, n);
    //print(a11, N);
    //print(a12, N);
    //print(a21, N);
    //print(a22, N);
    
    //form m1 = (a11 + a22)(b11 + b22)
    matadd(a11, a22, tmp1, N);
    matadd(b11, b22, tmp2, N);
    matmul(tmp1, tmp2, m1, N);
    
    //form m2 = (a21 + a22)b11
    matadd(a21, a22, tmp1, N);
    matmul(tmp1, b11, m2, N);

    //form m3 = a11(b12 - b22)
    matsub(b12, b22, tmp1, N);
    matmul(a11, tmp1, m3, N);

    //form m4 = a22(b21 - b11)
    matsub(b21, b11, tmp1, N);
    matmul(a22, tmp1, m4, N);
    
    //form m5 = (a11 +a12)b22
    matadd(a11, a12, tmp1, N);
    matmul(tmp1, b22, m5, N);
    
    //form m6 = (a21 -a11)(b11 + b12)
    matsub(a21, a11, tmp1, N);
    matadd(b11, b12, tmp2, N);
    matmul(tmp1, tmp2, m6, N);

    //form m7 = (a12 -a22)(b21 + b22)
    matsub(a12, a22, tmp1, N);
    matadd(b21, b22, tmp2, N);
    matmul(tmp1, tmp2, m7, N);    



    //============================
    //form c11 = m1 + m4 - m5 + m7
    matadd(m1, m4, tmp1, N);
    matsub(tmp1, m5, tmp2, N);
    matadd(tmp2, m7, c11, N);

    //form c12 = m3 + m5
    matadd(m3, m5, c12, N);

    //form c21 = m2 + m4
    matadd(m2, m4, c21, N);

    //fomr c22 = m1 - m2 + m3 + m6
    matsub(m1, m2, tmp1, N);
    matadd(tmp1, m3, tmp2, N);
    matadd(tmp2, m6, c22, N);

    for (i=0; i< N; i++) 
    {

        for (j=0; j< N; j++) 
        {
            c[i*n+j] = c11[i*N +j ];
            c[i*n+j+N] = c12[i*N +j ];
            c[(i+N)*n+j] = c21[i*N +j ];
            c[(i+N)*n+j+N] = c22[i*N +j ]; 
        }
    }
    free(tmp1);
    free(tmp2);

    free(a11);
    free(a12);
    free(a21);
    free(a22);
    
    free(b11);
    free(b12);
    free(b21);
    free(b22);

    free(c11);
    free(c12);
    free(c21);
    free(c22);

    free(m1);
    free(m2);
    free(m3);
    free(m4);
    free(m5);
    free(m6);
    free(m7);


}

コード例 #10

0

ファイルを表示

ファイル: lhsDisFluxJacobian.C プロジェクト: srharris91/Katz_Work

void StrandSPLam::lhsDisFluxJacobian(const int& npts,
				     const double* A,
				     const double* xv,
				     const double* ql,
				     const double* qr,
				     double* M)
{
  // absolute Jacobian matrix (Roe)
  int iq,iA,iM;
  double Ax,Ay,ds,Nx,Ny,Tx,Ty,rl,rul,rvl,re,rqq,p,rhl,rr,rur,rvr,rhr,
    dd,u,v,h,qq,cc,ccr,ccr2,c,ut,un,l[4],a,dlim=1.,R[nq*nq],S[nq*nq];

  for (int n=0; n<npts; n++){
    iq      = nq  *n;
    iA      = ndim*n;
    iM      = nq*nq*n;

    Ax      = A[iA  ];
    Ay      = A[iA+1];
    ds      = sqrt(Ax*Ax+Ay*Ay);
    Nx      = Ax/ds;
    Ny      = Ay/ds;
    Tx      = Ny;
    Ty      =-Nx;

    rl      = ql[iq  ];
    rul     = ql[iq+1];
    rvl     = ql[iq+2];
    re      = ql[iq+3];
    rqq     =(rul*rul+rvl*rvl)/rl;
    p       = gm1*(re-.5*rqq);
    rhl     = re+p;
    rr      = qr[iq  ];
    rur     = qr[iq+1];
    rvr     = qr[iq+2];
    re      = qr[iq+3];
    rqq     =(rur*rur+rvr*rvr)/rr;
    p       = gm1*(re-.5*rqq);
    rhr     = re+p;

    rl      = sqrt(rl);
    rr      = sqrt(rr);
    dd      = 1./(rl+rr);
    rl      = 1./rl;
    rr      = 1./rr;
    u       =(rul*rl+rur*rr)*dd;
    v       =(rvl*rl+rvr*rr)*dd;
    h       =(rhl*rl+rhr*rr)*dd;
    qq      = .5*(u*u+v*v);
    cc      = gm1*(h-qq);
    ccr     = 1./cc;
    ccr2    = .5*ccr;
    c       = sqrt(cc);
    ut      = u*Tx+v*Ty;
    un      = u*Nx+v*Ny-xv[n]/ds;

    l[0]    = ds*fabs(un  );
    l[1]    = ds*fabs(un  );
    l[2]    = ds*fabs(un+c);
    l[3]    = ds*fabs(un-c);
    a       = dlim*c*ds;
    for (int k=0; k<nq; k++) if (l[k] < a) l[k] = .5*(a+l[k]*l[k]/a);

    R[0 ] = l[0];
    R[1 ] = 0.;
    R[2 ] = l[2];
    R[3 ] = l[3];

    R[4 ] = l[0]*u;
    R[5 ] = l[1]*Tx;
    R[6 ] = l[2]*(u+Nx*c);
    R[7 ] = l[3]*(u-Nx*c);

    R[8 ] = l[0]*v;
    R[9 ] = l[1]*Ty;
    R[10] = l[2]*(v+Ny*c);
    R[11] = l[3]*(v-Ny*c);

    R[12] = l[0]*qq;
    R[13] = l[1]*ut;
    R[14] = l[2]*(h+un*c);
    R[15] = l[3]*(h-un*c);

    S[0 ] =-gm1*ccr*qq+1.;
    S[1 ] = gm1*ccr*u;
    S[2 ] = gm1*ccr*v;
    S[3 ] =-gm1*ccr;

    S[4 ] =-ut;
    S[5 ] = Tx;
    S[6 ] = Ty;
    S[7 ] = 0.;

    S[8 ] = ccr2*(gm1*qq-c*un);
    S[9 ] =-ccr2*(gm1*u -c*Nx);
    S[10] =-ccr2*(gm1*v -c*Ny);
    S[11] = ccr2* gm1;

    S[12] = ccr2*(gm1*qq+c*un);
    S[13] =-ccr2*(gm1*u +c*Nx);
    S[14] =-ccr2*(gm1*v +c*Ny);
    S[15] = ccr2* gm1;

    matmul(nq,nq,nq,&R[0],&S[0],&M[iM]);

    M[iM+0 ] = ds*(fabs(un)+c);
    M[iM+1 ] = 0.;
    M[iM+2 ] = 0.;
    M[iM+3 ] = 0.;

    M[iM+4 ] = 0.;
    M[iM+5 ] = ds*(fabs(un)+c);
    M[iM+6 ] = 0.;
    M[iM+7 ] = 0.;

    M[iM+8 ] = 0.;
    M[iM+9 ] = 0.;
    M[iM+10] = ds*(fabs(un)+c);
    M[iM+11] = 0.;

    M[iM+12] = 0.;
    M[iM+13] = 0.;
    M[iM+14] = 0.;
    M[iM+15] = ds*(fabs(un)+c);
  }
}

コード例 #11

0

ファイルを表示

ファイル: hbhankel.c プロジェクト: asl/rssa

static void hbhankel_tmatmul(double* out,
                             const double* v,
                             const void* matrix) {
  matmul(out, v, matrix, 1);
}

コード例 #12

0

ファイルを表示

ファイル: ERMSD.cpp プロジェクト: JFDama/plumed2

void ERMSD::calcMat(const std::vector<Vector> & positions,const Pbc& pbc, std::vector<Vector4d> &mat, std::vector<TensorGeneric<4,3> > &Gderi) {

  std::vector<Vector3d> pos;
  pos.resize(3*nresidues);

  std::vector<Tensor3d> deri;
  deri.resize(nresidues*9);

  std::vector<Vector> centers;
  centers.resize(nresidues);

  unsigned idx_deri = 0;

  Tensor da_dxa = (2./3.)*Tensor::identity();
  Tensor da_dxb = -(1./3.)*Tensor::identity();
  Tensor da_dxc = -(1./3.)*Tensor::identity();

  Tensor db_dxa = -(1./3.)*Tensor::identity();
  Tensor db_dxb = (2./3.)*Tensor::identity();
  Tensor db_dxc = -(1./3.)*Tensor::identity();

  // Form factors - should this be somewhere else?

  double w = 1./3.;
  Vector form_factor = Vector(2.0,2.0,1.0/0.3);

  for(unsigned res_idx=0; res_idx<natoms/3; res_idx++) {


    const unsigned at_idx = 3*res_idx;
    //center
    for (unsigned j=0; j<3; j++) {
      centers[res_idx] += w*positions[at_idx+j];
    }

    Vector3d a = delta(centers[res_idx],positions[at_idx]);
    Vector3d b = delta(centers[res_idx],positions[at_idx+1]);
    Vector3d d = crossProduct(a,b);
    double ianorm = 1./a.modulo();
    double idnorm = 1./d.modulo();

    // X vector: COM-C2
    pos[at_idx] = a*ianorm;
    // Z versor: C2 x (COM-C4/C6)
    pos[at_idx+2] = d*idnorm;
    // Y versor: Z x Y
    pos[at_idx+1] = crossProduct(pos[at_idx+2],pos[at_idx]);

    // Derivatives ////////
    Tensor3d t1 = ianorm*(Tensor::identity()-extProduct(pos[at_idx],pos[at_idx]));
    // dv1/dxa
    deri[idx_deri] = (2./3. )*t1;
    // dv1/dxb
    deri[idx_deri+3] = -(1./3.)*t1;
    // dv1/dxc
    deri[idx_deri+6] = -(1./3.)*t1;

    Tensor dd_dxa =  VcrossTensor(a,db_dxa) -VcrossTensor(b,da_dxa);
    Tensor dd_dxb =  VcrossTensor(a,db_dxb)-VcrossTensor(b,da_dxb);
    Tensor dd_dxc =  VcrossTensor(a,db_dxc)-VcrossTensor(b,da_dxc);

    // dv3/dxa
    deri[idx_deri+2] = deriNorm(d,dd_dxa);
    // dv3/dxb
    deri[idx_deri+5] = deriNorm(d,dd_dxb);
    // dv3/dxc
    deri[idx_deri+8] = deriNorm(d,dd_dxc);

    // dv2/dxa = dv3/dxa cross v1 + v3 cross dv1/dxa
    deri[idx_deri+1] =  (VcrossTensor(deri[idx_deri+2],pos[at_idx]) + \
                         VcrossTensor(pos[at_idx+2],deri[idx_deri]));
    // dv2/dxb
    deri[idx_deri+4] =  (VcrossTensor(deri[idx_deri+5],pos[at_idx]) + \
                         VcrossTensor(pos[at_idx+2],deri[idx_deri+3]));
    // dv2/dxc
    deri[idx_deri+7] =  (VcrossTensor(deri[idx_deri+8],pos[at_idx]) + \
                         VcrossTensor(pos[at_idx+2],deri[idx_deri+6]));

    idx_deri += 9;
    // End derivatives ///////

  }


  // Initialization (unnecessary?)
  for (unsigned i1=0; i1<nresidues*nresidues; i1++) {
    for (unsigned i2=0; i2<4; i2++) {
      mat[i1][i2] = 0.0;
    }
  }

  double maxdist = cutoff/form_factor[0];
  double gamma = pi/cutoff;
  unsigned idx;
  unsigned idx1 = 0;
  // Calculate mat
  for (unsigned i=0; i<nresidues; i++) {
    for (unsigned j=0; j<nresidues; j++) {

      // skip i==j
      if(inPair(i,j) and i != j) {
        //if(i!=j){


        // Calculate normal distance first
        Vector diff = delta(centers[i],centers[j]);
        double d1 = diff.modulo();
        //std::cout << inPair(i,j) << " " << i << " " << j << " "<< d1 <<"\n";
        //std::cout << inPair(i,j) << " " << i << " " << j << " "<< d1 <<"\n";
        if(d1<maxdist) {

          // calculate r_tilde_ij
          Vector3d rtilde;
          for (unsigned k=0; k<3; k++) {
            for (unsigned l=0; l<3; l++) {
              rtilde[l] += pos[3*i+l][k]*diff[k]*form_factor[l];
            }
          }
          double rtilde_norm = rtilde.modulo();

          double irnorm = 1./rtilde_norm;

          // ellipsoidal cutoff
          if(rtilde_norm < cutoff) {
            idx = i*nresidues + j;
            //std::cout << i << " " << j << " " << rtilde_norm << " " << idx <<"\n";


            // fill 4d matrix
            double dummy = sin(gamma*rtilde_norm)/(rtilde_norm*gamma);
            mat[idx][0] = dummy*rtilde[0];
            mat[idx][1] = dummy*rtilde[1];
            mat[idx][2] = dummy*rtilde[2];
            mat[idx][3] = (1.+ cos(gamma*rtilde_norm))/gamma;

            // Derivative (drtilde_dx)
            std::vector<Tensor3d> drtilde_dx;
            drtilde_dx.resize(6);
            unsigned pos_idx = 3*i;
            unsigned deri_idx = 9*i;
            for (unsigned at=0; at<3; at++) {
              for (unsigned l=0; l<3; l++) {
                Vector3d rvec = form_factor[l]*((pos[pos_idx+l])/3.);
                Vector3d vvec = form_factor[l]*(matmul(deri[deri_idx+3*at+l],diff));
                drtilde_dx[at].setRow(l,vvec-rvec);
                drtilde_dx[at+3].setRow(l,rvec);
              }
            }

            //std::vector<TensorGeneric<4,3> > dG_dx;
            //dG_dx.resize(6);

            double dummy1 = (cos(gamma*rtilde_norm) - dummy);

            idx1 = i*nresidues*6 + j*6;

            for (unsigned l=0; l<6; l++) {
              //std::cout << i << " " << j << " " << idx1 << " " << idx1+l << "\n";

              // components 1,2,3
              // sin(gamma*|rtilde|)/gamma*|rtilde|*d_rtilde +
              // + ((d_rtilde*r_tilde/r_tilde^2) out r_tilde)*
              // (cos(gamma*|rtilde| - sin(gamma*|rtilde|)/gamma*|rtilde|))
              Vector3d rdr = matmul(rtilde,drtilde_dx[l]);
              Tensor tt = dummy*drtilde_dx[l] + (dummy1*irnorm*irnorm)*Tensor(rtilde,rdr);
              for (unsigned m=0; m<3; m++) {
                // Transpose here
                //dG_dx[l].setRow(m,tt.getRow(m));
                Gderi[idx1+l].setRow(m,tt.getRow(m));
              }
              // component 4
              // - sin(gamma*|rtilde|)/|rtilde|*(r_tilde*d_rtilde)
              //dG_dx[l].setRow(3,-dummy*gamma*rdr);
              Gderi[idx1+l].setRow(3,-dummy*gamma*rdr);
            }




          }
        }
      }

    }
  }

}

コード例 #13

0

ファイルを表示

ファイル: matmul-host.c プロジェクト: peteasa/pal

int main(int argc, char *argv[])
{
	p_mem_t shared_mem, results_mem;
	uint32_t eram_base;
	char results[1024] = { '\0' };
	int device_cols, device_rows, nside;
	p_dev_t dev;
	p_prog_t prog;
	p_team_t team;
	p_coords_t size;
	p_coords_t start = { .row = 0, .col = 0 };

	unsigned int msize;
	float        seed;
	unsigned int addr; //, clocks;
	size_t       sz;
	int          verbose=0;
	double       tdiff[3];
	int          result, retval = 0;

	msize     = 0x00400000;

	get_args(argc, argv);

	fo = stderr;
	fi = stdin;
	printf( "------------------------------------------------------------\n");
	printf( "Calculating:   C[%d][%d] = A[%d][%d] * B[%d][%d]\n", _Smtx, _Smtx, _Smtx, _Smtx, _Smtx, _Smtx);
	seed = 0.0;
	if(verbose){
	  printf( "Seed = %f\n", seed);
	}

	dev = p_init(P_DEV_EPIPHANY, 0);
	if (p_error(dev)) {
		fprintf(stderr, "Error initializing PAL\n");
		return p_error(dev);
	}

	device_cols = p_query(dev, P_PROP_COLS);
	device_rows = p_query(dev, P_PROP_ROWS);

	// Use min size
	nside = device_cols > device_rows ? device_cols : device_rows;

	if (nside < 4) {
		fprintf(stderr, "Error: Too small device, need at least 4x4\n");
		return 1;
	}

	// Either 1024, 256, 64, or 16 cores (side must be power of two),
	nside = nside >= 32 ? 32 : nside >= 16 ? 16 : nside >= 8 ? 8 : 4;

	size.row = nside;
	size.col = nside;
	team = p_open4(dev, P_TOPOLOGY_2D, &start, &size);
	printf("Using team of size %d\n", p_team_size(team));
	if (p_error(team)) {
		fprintf(stderr, "Error opening team\n");
		return p_error(team);
	}

	prog = p_load(dev, ar.elfFile, 0);

	eram_base = (unsigned) p_query(dev, P_PROP_MEMBASE);
	shared_mem = p_map(dev, eram_base, msize);

	// Clear mailbox contents
	memset(&Mailbox, 0, sizeof(Mailbox));
	p_write(&shared_mem, &Mailbox, 0, sizeof(Mailbox), 0);

	// Generate operand matrices based on a provided seed
	matrix_init((int)seed);

#ifdef __WIPE_OUT_RESULT_MATRIX__
	// Wipe-out any previous remains in result matrix (for verification)
	addr = offsetof(shared_buf_t, C[0]);
	sz = sizeof(Mailbox.C);
	if(verbose){
	  printf( "Writing C[%uB] to address %08x...\n", (unsigned) sz, addr);
	}
	p_write(&shared_mem, (void *) Mailbox.C, addr, sz, 0);
#endif

	/* Wallclock time */
	clock_gettime(CLOCK_MONOTONIC, &timer[0]);
	/* Clock CPUTIME too. We don't want to indicate failure just
	 * because the system was under high load. */
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[4]);

	// Copy operand matrices to Epiphany system
	addr = offsetof(shared_buf_t, A[0]);
	sz = sizeof(Mailbox.A);
	if(verbose){
	  printf( "Writing A[%uB] to address %08x...\n", (unsigned) sz, addr);
	}
	p_write(&shared_mem, (void *) Mailbox.A, addr, sz, 0);

	addr = offsetof(shared_buf_t, B[0]);
	sz = sizeof(Mailbox.B);
	if(verbose){
	  printf( "Writing B[%uB] to address %08x...\n", (unsigned) sz, addr);
	}
	p_write(&shared_mem, (void *) Mailbox.B, addr, sz, 0);
	// Call the Epiphany matmul() function

	if(verbose){
	  printf( "GO Epiphany! ...   ");
	}
	if(verbose){
	  printf("Loading program on Epiphany chip...\n");
	}

	p_arg_t args[] = { &nside, sizeof(nside), true };
	if (p_run(prog, "matmul", team, 0, p_team_size(team), 1, args, 0)) {
		fprintf(stderr, "Error loading Epiphany program.\n");
		exit(1);
	}

	// Read result matrix and timing
	addr = offsetof(shared_buf_t, C[0]);
	sz = sizeof(Mailbox.C);
	if(verbose){
	  printf( "Reading result from address %08x...\n", addr);
	}
	p_read(&shared_mem, (void *) Mailbox.C, addr, sz, 0);

	clock_gettime(CLOCK_MONOTONIC, &timer[1]);
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[5]);


	// Calculate a reference result
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[2]);
#ifndef __DO_STRASSEN__
	matmul(Mailbox.A, Mailbox.B, Cref, _Smtx);
#else
	matmul_strassen(Mailbox.A, Mailbox.B, Cref, _Smtx);
#endif
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &timer[3]);
	addr = offsetof(shared_buf_t, core.clocks);
	sz = sizeof(Mailbox.core.clocks);
	if(verbose){
	  printf( "Reading time from address %08x...\n", addr);
	}
	p_read(&shared_mem, &Mailbox.core.clocks, addr, sizeof(Mailbox.core.clocks), 0);
//	clocks = Mailbox.core.clocks;





	// Calculate the difference between the Epiphany result and the reference result
	matsub(Mailbox.C, Cref, Cdiff, _Smtx);

	tdiff[0] = (timer[1].tv_sec - timer[0].tv_sec) * 1000 + ((double) (timer[1].tv_nsec - timer[0].tv_nsec) / 1000000.0);
//	tdiff[0] = ((double) clocks) / eMHz * 1000;
	tdiff[1] = (timer[3].tv_sec - timer[2].tv_sec) * 1000 + ((double) (timer[3].tv_nsec - timer[2].tv_nsec) / 1000000.0);
	tdiff[2] = (timer[5].tv_sec - timer[4].tv_sec) * 1000 + ((double) (timer[5].tv_nsec - timer[4].tv_nsec) / 1000000.0);


	// If the difference is 0, then the matrices are identical and the
	// calculation was correct
	if (iszero(Cdiff, _Smtx))
	  {

	    printf( "Epiphany(time) %9.1f msec  (@ %03d MHz)\n", tdiff[0], eMHz);
	    printf( "Host(time)     %9.1f msec  (@ %03d MHz)\n", tdiff[1], aMHz);
	    printf( "------------------------------------------------------------\n");
	    printf( "TEST \"matmul-16\" PASSED\n");
	    retval = 0;
	} else {
	  printf( "\n\nERROR: C_epiphany is different from C_host !!!\n");
	  printf( "TEST \"matmul-16\" FAILED\n");
	  retval = 1;
	}

#if 0
#ifdef __DUMP_MATRICES__
	printf( "\n\n\n");
	printf( "A[][] = \n");
	matprt(Mailbox.A, _Smtx);
	printf( "B[][] = \n");
	matprt(Mailbox.B, _Smtx);
	printf( "C[][] = \n");
	matprt(Mailbox.C, _Smtx);
	printf( "Cref[][] = \n");
	matprt(Cref, _Smtx);

	int i, j;
	for (i=0; i<_Nside; i++)
		for (j=0; j<_Nside; j++)
		{
			e_read(pEpiphany, i, j, 0x2000+0*sizeof(float), &Aepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x2000+2*sizeof(float), &Aepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x4000+0*sizeof(float), &Bepi[(i*_Score+0)*_Smtx + j*_Score], 2*sizeof(float));
			e_read(pEpiphany, i, j, 0x4000+2*sizeof(float), &Bepi[(i*_Score+1)*_Smtx + j*_Score], 2*sizeof(float));
		}
	printf( "Aepi[][] = \n");
	matprt(Aepi, _Smtx);
	printf( "Bepi[][] = \n");
	matprt(Bepi, _Smtx);
#endif
#endif



	// p_unmap ...
	p_close(team);
	p_finalize(dev);

	return retval;
}


// Initialize operand matrices
void matrix_init(int seed)
{
	int i, j, p;

	p = 0;
	for (i=0; i<_Smtx; i++)
		for (j=0; j<_Smtx; j++)
			Mailbox.A[p++] = (i + j + seed) % _MAX_MEMBER_;

	p = 0;
	for (i=0; i<_Smtx; i++)
		for (j=0; j<_Smtx; j++)
			Mailbox.B[p++] = ((i + j) * 2 + seed) % _MAX_MEMBER_;

	p = 0;
	for (i=0; i<_Smtx; i++)
		for (j=0; j<_Smtx; j++)
			Mailbox.C[p++] = 0x8dead;

	return;
}

コード例 #14

0

ファイルを表示

ファイル: ppp_ar.c プロジェクト: cyborg-x1/rtk_debug

/* fix narrow-lane ambiguity by ILS ------------------------------------------*/
static int fix_amb_ILS(rtk_t *rtk, int *sat1, int *sat2, int *NW, int n)
{
    double C1,C2,*B1,*N1,*NC,*D,*E,*Q,s[2],lam_NL=lam_LC(1,1,0),lam1,lam2;
    int i,j,k,m=0,info,stat,flgs[MAXSAT]= {0},max_flg=0;

    lam1=lam_carr[0];
    lam2=lam_carr[1];

    C1= SQR(lam2)/(SQR(lam2)-SQR(lam1));
    C2=-SQR(lam1)/(SQR(lam2)-SQR(lam1));

    B1=zeros(n,1);
    N1=zeros(n,2);
    D=zeros(rtk->nx,n);
    E=mat(n,rtk->nx);
    Q=mat(n,n);
    NC=mat(n,1);

    for (i=0; i<n; i++) {

        /* check linear independency */
        if (!is_depend(sat1[i],sat2[i],flgs,&max_flg)) continue;

        j=IB(sat1[i],&rtk->opt);
        k=IB(sat2[i],&rtk->opt);

        /* float narrow-lane ambiguity (cycle) */
        B1[m]=(rtk->x[j]-rtk->x[k]+C2*lam2*NW[i])/lam_NL;
        N1[m]=ROUND(B1[m]);

        /* validation of narrow-lane ambiguity */
        if (fabs(N1[m]-B1[m])>rtk->opt.thresar[2]) continue;

        /* narrow-lane ambiguity transformation matrix */
        D[j+m*rtk->nx]= 1.0/lam_NL;
        D[k+m*rtk->nx]=-1.0/lam_NL;

        sat1[m]=sat1[i];
        sat2[m]=sat2[i];
        NW[m++]=NW[i];
    }
    if (m<3) return 0;

    /* covariance of narrow-lane ambiguities */
    matmul("TN",m,rtk->nx,rtk->nx,1.0,D,rtk->P,0.0,E);
    matmul("NN",m,m,rtk->nx,1.0,E,D,0.0,Q);

    /* integer least square */
    if ((info=lambda(m,2,B1,Q,N1,s))) {
        trace(2,"lambda error: info=%d\n",info);
        return 0;
    }
    if (s[0]<=0.0) return 0;

    rtk->sol.ratio=(float)(MIN(s[1]/s[0],999.9));

    /* varidation by ratio-test */
    if (rtk->opt.thresar[0]>0.0&&rtk->sol.ratio<rtk->opt.thresar[0]) {
        trace(2,"varidation error: n=%2d ratio=%8.3f\n",m,rtk->sol.ratio);
        return 0;
    }
    trace(2,"varidation ok: %s n=%2d ratio=%8.3f\n",time_str(rtk->sol.time,0),m,
          rtk->sol.ratio);

    /* narrow-lane to iono-free ambiguity */
    for (i=0; i<m; i++) {
        NC[i]=C1*lam1*N1[i]+C2*lam2*(N1[i]-NW[i]);
    }
    /* fixed solution */
    stat=fix_sol(rtk,sat1,sat2,NC,m);

    free(B1);
    free(N1);
    free(D);
    free(E);
    free(Q);
    free(NC);

    return stat;
}

コード例 #15

0

ファイルを表示

ファイル: matmul_example.c プロジェクト: gpichon/eigenproblems

/* Try various ways to do matmul and time them.  Tiled algorithms
 * running serially; multi-threaded QUARK runtime with tiled
 * algorithms; and direct serial computation over standard layout. */
int main_algorithm(int NB, int N, int THREADS)
{
    int i, j, k, nerr=0;
    int BB = N/NB;
    double *A = (double*)malloc(N*N*sizeof(double));
    double *Ablk = (double*)malloc(N*N*sizeof(double));
    double *B = (double*)malloc(N*N*sizeof(double));
    double *Bblk = (double*)malloc(N*N*sizeof(double));
    double *C_direct = (double*)malloc(N*N*sizeof(double));
    double *C = (double*)malloc(N*N*sizeof(double));
    double *Cblk = (double*)malloc(N*N*sizeof(double));
    double *C_quark = (double*)malloc(N*N*sizeof(double));
    double *C_quark_blk = (double*)malloc(N*N*sizeof(double));
    struct timeval tstart, tend, tdiff;
    double t_blk=0, t_quark=0, t_direct=0;

    // Initialize
    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            A[i+j*N] = (double)1.0+i;
            B[i+j*N] = (double)2.0+i+j;
            C_quark[i+j*N] = C_direct[i+j*N] = C[i+j*N] = 3.0;
        }
    }

    matrix_print("Printing A", A, N);
    matrix_print("Printing B", B, N);
    matrix_print("Printing C before computation", C, N);

    // Move from F77 to BDL
    std_to_bdl( A, Ablk, N, NB );
    std_to_bdl( B, Bblk, N, NB );
    std_to_bdl( C, Cblk, N, NB );
    std_to_bdl( C_quark, C_quark_blk, N, NB );

    /* ORIGINAL TILED ROUTINE */
    /* This is the code for the serial tile-by-tile multiplication */
    printf("Doing matrix multiplication using serial tile-by-tile algorithm\n");
    gettimeofday( &tstart, NULL );
    for (i = 0; i < BB; i++)
        for (j = 0; j < BB; j++)
            for (k = 0; k < BB; k++)
                matmul ( &Ablk[NB*NB*i + NB*NB*BB*k], &Bblk[NB*NB*k + NB*NB*BB*j], &Cblk[NB*NB*i + NB*NB*BB*j], NB);
    gettimeofday( &tend, NULL );
    t_blk = timeval_subtract( &tdiff, &tend, &tstart );
    printf("Time taken: %f\n", tdiff.tv_sec + (double)tdiff.tv_usec/1000000 );
    bdl_to_std( C, Cblk, N, NB );
    matrix_print("Printing C produced by serial tile-algorithm after computation", C, N);
    printf("\n");

    /* QUARK PARALLEL TILED ROUTINE */
    /* This is the code for the QUARK runtime do do the parallel multi-threaded tile-by-tile algorithm */
    printf("Doing matrix multiplication using the multi-threaded QUARK runtime for a tile based algorithm\n");
    Quark *quark = QUARK_New(THREADS);
    gettimeofday( &tstart, NULL );
    for (i = 0; i < BB; i++)
        for (j = 0; j < BB; j++)
            for (k = 0; k < BB; k++) 
                matmul_quark_call ( quark, &Ablk[NB*NB*i + NB*NB*BB*k], &Bblk[NB*NB*k + NB*NB*BB*j], &C_quark_blk[NB*NB*i + NB*NB*BB*j], NB);
    QUARK_Barrier( quark );
    gettimeofday( &tend, NULL );
    t_quark = timeval_subtract( &tdiff, &tend, &tstart );
    printf("Time taken: %f\n", tdiff.tv_sec + (double)tdiff.tv_usec/1000000 );
    QUARK_Delete(quark);
    bdl_to_std( C_quark, C_quark_blk, N, NB );    
    matrix_print("Printing C produced by QUARK runtime after computation", C_quark, N);
    printf("\n");

    /* DIRECT COMPUTATION OVER STANDARD LAYOUT */
    /* Compute direct C if desired */
    printf("Doing matrix multiplication using direct loops (ie, view matrix as one big tile)\n");
    gettimeofday( &tstart, NULL );
    matmul ( A, B, C_direct, N );
    gettimeofday( &tend, NULL );
    t_direct = timeval_subtract( &tdiff, &tend, &tstart );
    printf("Time taken: %f\n", (double)(tdiff.tv_sec + (double)tdiff.tv_usec/1000000) );
    matrix_print("Printing C produced by direct matmul after computation", C_direct, N);
    printf("\n");

    /* Check for errors */
    printf("Comparing result matrices (direct versus QUARK)\n");
    nerr = matrix_compare( C_direct, C_quark, N );
    printf("Number of differences: %d\n", nerr);    
    printf("\n");    

    printf("Summary of time taken\n");
    printf("Direct       SerialBlock  QUARK(%d threads)\n", THREADS);
    printf("%-12.5f %-12.5f %-12.5f\n", t_direct, t_blk, t_quark);
    
    free(A); free(Ablk); free(B); free(Bblk); free(C); free(Cblk); free(C_direct); free(C_quark); free(C_quark_blk);
    return 0;
}

コード例 #16

0

ファイルを表示

ファイル: RMSD.cpp プロジェクト: apoma/plumed2

double RMSD::optimalAlignment(const  std::vector<double>  & align,
                                     const  std::vector<double>  & displace,
                                     const std::vector<Vector> & positions,
                                     const std::vector<Vector> & reference ,
                                     std::vector<Vector>  & derivatives, bool squared) {
  plumed_massert(displace==align,"OPTIMAL_FAST version of RMSD can only be used when displace weights are same as align weights");

  double dist(0);
  double norm(0);
  const unsigned n=reference.size();
// This is the trace of positions*positions + reference*reference
  double sum00w(0);
// This is positions*reference
  Tensor sum01w;

  derivatives.resize(n);

  Vector cpositions;
  Vector creference;

// first expensive loop: compute centers
  for(unsigned iat=0;iat<n;iat++){
    double w=align[iat];
    norm+=w;
    cpositions+=positions[iat]*w;
    creference+=reference[iat]*w;
  }
  double invnorm=1.0/norm;

  cpositions*=invnorm;
  creference*=invnorm;
  
// second expensive loop: compute second moments wrt centers
  for(unsigned iat=0;iat<n;iat++){
    double w=align[iat];
    sum00w+=(dotProduct(positions[iat]-cpositions,positions[iat]-cpositions)
            +dotProduct(reference[iat]-creference,reference[iat]-creference))*w;
    sum01w+=Tensor(positions[iat]-cpositions,reference[iat]-creference)*w;
  }

  double rr00=sum00w*invnorm;
  Tensor rr01=sum01w*invnorm;

  Matrix<double> m=Matrix<double>(4,4);
  m[0][0]=rr00+2.0*(-rr01[0][0]-rr01[1][1]-rr01[2][2]);
  m[1][1]=rr00+2.0*(-rr01[0][0]+rr01[1][1]+rr01[2][2]);
  m[2][2]=rr00+2.0*(+rr01[0][0]-rr01[1][1]+rr01[2][2]);
  m[3][3]=rr00+2.0*(+rr01[0][0]+rr01[1][1]-rr01[2][2]);
  m[0][1]=2.0*(-rr01[1][2]+rr01[2][1]);
  m[0][2]=2.0*(+rr01[0][2]-rr01[2][0]);
  m[0][3]=2.0*(-rr01[0][1]+rr01[1][0]);
  m[1][2]=2.0*(-rr01[0][1]-rr01[1][0]);
  m[1][3]=2.0*(-rr01[0][2]-rr01[2][0]);
  m[2][3]=2.0*(-rr01[1][2]-rr01[2][1]);
  m[1][0] = m[0][1];
  m[2][0] = m[0][2];
  m[2][1] = m[1][2];
  m[3][0] = m[0][3];
  m[3][1] = m[1][3];
  m[3][2] = m[2][3];

  vector<double> eigenvals;
  Matrix<double> eigenvecs;
  int diagerror=diagMat(m, eigenvals, eigenvecs );

  if (diagerror!=0){
    string sdiagerror;
    Tools::convert(diagerror,sdiagerror);
    string msg="DIAGONALIZATION FAILED WITH ERROR CODE "+sdiagerror;
    plumed_merror(msg);
  }

  dist=eigenvals[0];

  Matrix<double> ddist_dm(4,4);

  Vector4d q(eigenvecs[0][0],eigenvecs[0][1],eigenvecs[0][2],eigenvecs[0][3]);

// This is the rotation matrix that brings reference to positions
// i.e. matmul(rotation,reference[iat])+shift is fitted to positions[iat]

  Tensor rotation;
  rotation[0][0]=q[0]*q[0]+q[1]*q[1]-q[2]*q[2]-q[3]*q[3];
  rotation[1][1]=q[0]*q[0]-q[1]*q[1]+q[2]*q[2]-q[3]*q[3];
  rotation[2][2]=q[0]*q[0]-q[1]*q[1]-q[2]*q[2]+q[3]*q[3];
  rotation[0][1]=2*(+q[0]*q[3]+q[1]*q[2]);
  rotation[0][2]=2*(-q[0]*q[2]+q[1]*q[3]);
  rotation[1][2]=2*(+q[0]*q[1]+q[2]*q[3]);
  rotation[1][0]=2*(-q[0]*q[3]+q[1]*q[2]);
  rotation[2][0]=2*(+q[0]*q[2]+q[1]*q[3]);
  rotation[2][1]=2*(-q[0]*q[1]+q[2]*q[3]);

  double prefactor=2.0*invnorm;
  Vector shift=cpositions-matmul(rotation,creference);

  if(!squared) prefactor*=0.5/sqrt(dist);

// if "safe", recompute dist here to a better accuracy
  if(safe) dist=0.0;

// If safe is set to "false", MSD is taken from the eigenvalue of the M matrix
// If safe is set to "true", MSD is recomputed from the rotational matrix
// For some reason, this last approach leads to less numerical noise but adds an overhead

// third expensive loop: derivatives
  for(unsigned iat=0;iat<n;iat++){
// there is no need for derivatives of rotation and shift here as it is by construction zero
// (similar to Hellman-Feynman forces)
    Vector d(positions[iat]-shift - matmul(rotation,reference[iat]));
    derivatives[iat]= prefactor*align[iat]*d;
    if(safe) dist+=align[iat]*invnorm*modulo2(d);
  }

  if(!squared) dist=sqrt(dist);

  return dist;
}

コード例 #17

0

ファイルを表示

ファイル: wrapper.cpp プロジェクト: rbaghdadi/ISIR

int main(int argc, char *argv[])
{
    int testN = 1;
    bool check_correctness = false;
    if (argc > 1) {
        testN = atoi(argv[1]);
    }
    if (argc > 2) {
        check_correctness = atoi(argv[2]);
    }

    std::cout << std::endl << "----------" << std::endl;
    std::cout << "Running sequential MM benchmark: testN: " << testN
              << ", check correctness: " << check_correctness
              << ", size: (" << S0 << ", " << S1 << ", " << S2 << ", " << S3 << ")" << std::endl;

    auto t1 = std::chrono::high_resolution_clock::now();
    auto t2 = t1;
    
    float *A = (float*) malloc(S0 * S1 * sizeof(float));
    float *B = (float*) malloc(S1 * S2 * sizeof(float));
    float *C = (float*) malloc(S2 * S3 * sizeof(float));

    // Initialize matrices with random values:
    for (int i = 0; i < S0 * S1; i++) A[i] = std::rand() % 10;
    for (int i = 0; i < S1 * S2; i++) B[i] = std::rand() % 10;
    for (int i = 0; i < S2 * S3; i++) C[i] = std::rand() % 10;

    std::cout << "Buffers initialized" << std::endl << std::flush;

    // Note that indices are flipped (see tutorial 2)
    Halide::Buffer<DATA_TYPE> A_buf(A, {S1, S0});
    Halide::Buffer<DATA_TYPE> B_buf(B, {S2, S1});
    Halide::Buffer<DATA_TYPE> C_buf(C, {S3, S2});
    Halide::Buffer<DATA_TYPE> O_buf(S3, S0);

    // Make a dummy call to set up GPU (initalization takes time)
    matmul(A_buf.raw_buffer(), B_buf.raw_buffer(), C_buf.raw_buffer(), O_buf.raw_buffer());

    // CPU Multiplication for correctness check

    if (check_correctness) {
        // Reference matrix multiplication

        std::cout << "Running CPU multiplication.." << std::endl;

        Halide::Buffer<DATA_TYPE> O_val_buf(S3, S0);
        Halide::Buffer<DATA_TYPE> T_val_buf(S2, S0);
        t1 = std::chrono::high_resolution_clock::now();
        for (int i = 0; i < S0; i++) {
            for (int k = 0; k < S2; k++) {
                // Note that indices are flipped (see tutorial 2)
                T_val_buf(k, i) = 0;
            }
        }
        for (int i = 0; i < S0; i++) {
            for (int l = 0; l < S3; l++) {
                // Note that indices are flipped (see tutorial 2)
                O_val_buf(l, i) = 0;
            }
        }
        for (int j = 0; j < S1; j++) {
            for (int i = 0; i < S0; i++) {
                for (int k = 0; k < S2; k++) {
                    // Note that indices are flipped (see tutorial 2)
                    T_val_buf(k, i) += A_buf(j, i) * B_buf(k, j);
                }
            }
        }
        for (int k = 0; k < S2; k++) {
            for (int i = 0; i < S0; i++) {
                for (int l = 0; l < S3; l++) {
                    // Note that indices are flipped (see tutorial 2)
                    O_val_buf(l, i) += T_val_buf(k, i) * C_buf(l, k);
                }
            }
        }
        t2 = std::chrono::high_resolution_clock::now();

        std::cout << "CPU matmul done: " << (std::chrono::duration<double,std::milli>(t2 - t1)).count() << "ms" << std::endl << std::flush;

        compare_buffers("matmul", O_buf, O_val_buf);
    }

    // GPU Multiplication

    t1 = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < testN; i++) {
        matmul(A_buf.raw_buffer(), B_buf.raw_buffer(), C_buf.raw_buffer(), O_buf.raw_buffer());
    }
    t2 = std::chrono::high_resolution_clock::now();

    std::cout << "GPU matmul done: " << (std::chrono::duration<double,std::milli>(t2 - t1)).count() / testN << "ms" << std::endl << std::flush;

    // CUBLAS SGEMM

    // Transposed copies for cublas
    float *A_T = (float*) malloc(S0 * S1 * sizeof(float));
    float *B_T = (float*) malloc(S1 * S2 * sizeof(float));
    float *C_T = (float*) malloc(S2 * S3 * sizeof(float));
    float *O_T = (float*) malloc(S0 * S3 * sizeof(float));
    // Transpose
    for (int i = 0; i < S0; i++) for (int j = 0; j < S1; j++) A_T[i + j * S0] = A[i * S1 + j];
    for (int i = 0; i < S1; i++) for (int j = 0; j < S2; j++) B_T[i + j * S1] = B[i * S2 + j];
    for (int i = 0; i < S2; i++) for (int j = 0; j < S3; j++) C_T[i + j * S2] = C[i * S3 + j];

    // Excluding handle creation which is time consuming
    cublasHandle_t handle;
    cublasCreate(&handle);

    t1 = std::chrono::high_resolution_clock::now();

    for (int i = 0; i < testN; i++) {
        float *d_A;
        float *d_B;
        float *d_C;
        float *d_T;
        float *d_O;
        cudaMalloc((void**)&d_A, S0 * S1 * sizeof(*A));
        cudaMalloc((void**)&d_B, S1 * S2 * sizeof(*A));
        cudaMalloc((void**)&d_C, S2 * S3 * sizeof(*A));
        cudaMalloc((void**)&d_T, S0 * S2 * sizeof(*A));
        cudaMalloc((void**)&d_O, S0 * S3 * sizeof(*A));

        cublasSetMatrix(S0, S1, sizeof(*A), A_T, S0, d_A, S0);
        cublasSetMatrix(S1, S2, sizeof(*B), B_T, S1, d_B, S1);
        cublasSetMatrix(S2, S3, sizeof(*C), C_T, S2, d_C, S2);

        float alpha_var = 1;
        float beta_var = 0;

        cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, S0, S2, S1, &alpha_var, d_A, S0, d_B, S1, &beta_var, d_T, S0);
        cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, S0, S3, S2, &alpha_var, d_T, S0, d_C, S2, &beta_var, d_O, S0);

        cublasGetMatrix(S0, S3, sizeof(*C), d_O, S0, O_T, S0);

        cudaFree(d_A);
        cudaFree(d_B);
        cudaFree(d_C);
        cudaFree(d_T);
        cudaFree(d_O);
    }

    t2 = std::chrono::high_resolution_clock::now();

    std::cout << "cublas matmul done (excluding cublasHandle creation): "
              << (std::chrono::duration<double,std::milli>(t2 - t1) / testN).count() << "ms" << std::endl << std::flush;

    cublasDestroy(handle);

    bool check_cublas_difference = false;
    if (check_cublas_difference) {
        bool flag = true;
        for (int i = 0; i < S0 && flag; i++) {
            for (int j = 0; j < S3; j++) {
                if (O_buf(j, i) != O_T[i + j * S0]) {
                    std::cout << "cublas validation mismatch:" << std::endl;
                    std::cout << i << " " << j << " " << O_T[i + j * S0] << " " << O_buf(j, i) << std::endl;
                }
            }
        }
        if (flag) {
            std::cout << "cublas and validation match" << std::endl;
        }
    }

    free(A);
    free(B);
    free(C);
    free(A_T);
    free(B_T);
    free(C_T);
    free(O_T);

    std::cout << "----------" << std::endl << std::endl;

    return 0;
}

コード例 #18

0

ファイルを表示

ファイル: solve.cpp プロジェクト: PierreBizouard/arrayfire

Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
{
    int M = a.dims()[0];
    int N = a.dims()[1];
    int K = b.dims()[1];
    int MN = std::min(M, N);

    Array<T> B = createEmptyArray<T>(dim4());
    trsm_func<T> gpu_trsm;

    cl_event event;
    cl_command_queue queue = getQueue()();

    if (M < N) {

#define UNMQR 0 // FIXME: UNMQR == 1 should be faster but does not work

        // Least squres for this case is solved using the following
        // solve(A, B) == matmul(Q, Xpad);
        // Where:
        // Xpad == pad(Xt, N - M, 1);
        // Xt   == tri_solve(R1, B);
        // R1   == R(seq(M), seq(M));
        // transpose(A) == matmul(Q, R);

        // QR is performed on the transpose of A
        Array<T> A = transpose<T>(a, true);

#if UNMQR
        B = padArray<T, T>(b, dim4(N, K), scalar<T>(0));
        B.resetDims(dim4(M, K));
#else
        B = copyArray<T>(b);
#endif

        int NB = magma_get_geqrf_nb<T>(A.dims()[1]);
        int NUM = (2*MN + ((M+31)/32)*32)*NB;
        Array<T> tmp = createEmptyArray<T>(dim4(NUM));

        std::vector<T> h_tau(MN);

        int info = 0;
        cl::Buffer *dA = A.get();
        cl::Buffer *dT = tmp.get();
        cl::Buffer *dB = B.get();

        magma_geqrf3_gpu<T>(A.dims()[0], A.dims()[1],
                           (*dA)(), A.getOffset(), A.strides()[1],
                           &h_tau[0], (*dT)(), tmp.getOffset(), getQueue()(), &info);

        A.resetDims(dim4(M, M));

        magmablas_swapdblk<T>(MN-1, NB,
                              (*dA)(), A.getOffset(), A.strides()[1], 1,
                              (*dT)(), tmp.getOffset() + MN * NB, NB, 0, queue);

        gpu_trsm(clblasColumnMajor,
                 clblasLeft, clblasUpper,
                 clblasConjTrans, clblasNonUnit,
                 B.dims()[0], B.dims()[1],
                 scalar<T>(1),
                 (*dA)(), A.getOffset(), A.strides()[1],
                 (*dB)(), B.getOffset(), B.strides()[1],
                 1, &queue, 0, nullptr, &event);

        magmablas_swapdblk<T>(MN - 1, NB,
                              (*dT)(), tmp.getOffset() + MN * NB, NB, 0,
                              (*dA)(), A.getOffset(), A.strides()[1], 1, queue);

#if UNMQR
        int lwork = (B.dims()[0]-A.dims()[0]+NB)*(B.dims()[1]+2*NB);
        std::vector<T> h_work(lwork);
        B.resetDims(dim4(N, K));
        magma_unmqr_gpu<T>(MagmaLeft, MagmaNoTrans,
                           B.dims()[0], B.dims()[1], A.dims()[0],
                           (*dA)(), A.getOffset(), A.strides()[1],
                           &h_tau[0],
                           (*dB)(), B.getOffset(), B.strides()[1],
                           &h_work[0], lwork,
                           (*dT)(), tmp.getOffset(), NB, queue, &info);
#else
        A.resetDims(dim4(N, M));
        magma_ungqr_gpu<T>(A.dims()[0], A.dims()[1], std::min(M, N),
                           (*dA)(), A.getOffset(), A.strides()[1],
                           &h_tau[0],
                           (*dT)(), tmp.getOffset(), NB, queue, &info);

        B = matmul(A, B, AF_MAT_NONE, AF_MAT_NONE);
#endif
    } else if (M > N) {
        // Least squres for this case is solved using the following
        // solve(A, B) == tri_solve(R1, Bt);
        // Where:
        // R1 == R(seq(N), seq(N));
        // Bt == matmul(transpose(Q1), B);
        // Q1 == Q(span, seq(N));
        // A  == matmul(Q, R);

        Array<T> A = copyArray<T>(a);
        B = copyArray(b);

        int MN = std::min(M, N);
        int NB = magma_get_geqrf_nb<T>(M);

        int NUM = (2*MN + ((N+31)/32)*32)*NB;
        Array<T> tmp = createEmptyArray<T>(dim4(NUM));

        std::vector<T> h_tau(NUM);

        int info = 0;
        cl::Buffer *A_buf = A.get();
        cl::Buffer *B_buf = B.get();
        cl::Buffer *dT = tmp.get();

        magma_geqrf3_gpu<T>(M, N,
                           (*A_buf)(), A.getOffset(), A.strides()[1],
                           &h_tau[0], (*dT)(), tmp.getOffset(), getQueue()(), &info);

        int NRHS = B.dims()[1];
        int lhwork = (M - N + NB) * (NRHS + NB) + NRHS * NB;

        std::vector<T> h_work(lhwork);
        h_work[0] = scalar<T>(lhwork);

        magma_unmqr_gpu<T>(MagmaLeft, MagmaConjTrans,
                           M, NRHS, N,
                           (*A_buf)(), A.getOffset(), A.strides()[1],
                           &h_tau[0],
                           (*B_buf)(), B.getOffset(), B.strides()[1],
                           &h_work[0], lhwork,
                           (*dT)(), tmp.getOffset(), NB,
                           queue, &info);

        magmablas_swapdblk<T>(MN - 1, NB,
                              (*A_buf)(), A.getOffset(), A.strides()[1], 1,
                              (*dT)(), tmp.getOffset() + NB * MN,
                              NB, 0, queue);


        std::string pName = getPlatformName(getDevice());
        if(pName.find("NVIDIA") != std::string::npos)
        {
            Array<T> AT = transpose<T>(A, true);
            cl::Buffer* AT_buf = AT.get();
            gpu_trsm(clblasColumnMajor,
                     clblasLeft, clblasLower, clblasConjTrans, clblasNonUnit,
                     N, NRHS, scalar<T>(1),
                     (*AT_buf)(), AT.getOffset(), AT.strides()[1],
                     (*B_buf)(), B.getOffset(), B.strides()[1],
                     1, &queue, 0, nullptr, &event);
        } else {
            gpu_trsm(clblasColumnMajor,
                     clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit,
                     N, NRHS, scalar<T>(1),
                     (*A_buf)(), A.getOffset(), A.strides()[1],
                     (*B_buf)(), B.getOffset(), B.strides()[1],
                     1, &queue, 0, nullptr, &event);
        }
        B.resetDims(dim4(N, K));
    }

    return B;
}

コード例 #19

0

ファイルを表示

ファイル: matrix.cpp プロジェクト: andre487/Matrix

//Умножение матриц
Matrix operator *(const Matrix &A, const Matrix &B) throw (int)
{
    return matmul(A, B);
}

コード例 #20

0

ファイルを表示

ファイル: matmul_mpi_collective.c プロジェクト: e-/MulticoreComputing

void matmul_mpi(float* A, float* B, float* C, int n){
	int rank, nodes_n,
		used_nodes_n,
		row_per_process, 
		i,
		j,
		k,
		start_row,
		end_row,
		child_start_row,
		child_end_row,
		*counts,
		*displs;
	
	float* my_A;
	MPI_Status status;

	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &nodes_n);
	row_per_process = n / nodes_n;
	if(n % nodes_n)row_per_process ++;

	used_nodes_n = n / row_per_process;
	if(n % row_per_process)used_nodes_n ++;
		
	start_row = rank % used_nodes_n * row_per_process;
	end_row = start_row + row_per_process;
	if(end_row >= n)end_row = n;	


	if(rank == 0) {
		// 0 ~ row_per_process are mine
		counts = (int *)malloc(sizeof(int) * nodes_n);
		displs = (int *)malloc(sizeof(int) * nodes_n);
		for(i = 0 ; i <nodes_n;i++){
			child_start_row = i % used_nodes_n * row_per_process;
			child_end_row = child_start_row + row_per_process;
			if(child_end_row >= n) child_end_row = n;
			
			displs[i] = child_start_row * n;
			counts[i] = (child_end_row - child_start_row) * n;
		}
	}
	my_A = (float *)malloc(sizeof(float) * row_per_process * n);
	if(rank > 0) B = (float *)malloc(sizeof(float) * n * n);
	MPI_Scatterv(A, counts, displs, MPI_FLOAT, my_A, row_per_process * n, MPI_FLOAT, 0, MPI_COMM_WORLD);
	MPI_Bcast(B, n * n, MPI_FLOAT, 0, MPI_COMM_WORLD);
	
	//자기것 계산하기
	
	if(C == NULL) {
		C = (float *)malloc(sizeof(float) * (end_row - start_row) * n);
		memset(C, 0, sizeof(float) * (end_row - start_row) * n);
	} else {
		memset(C, 0, sizeof(float) * n * n);
	}
	
	if(end_row-start_row > 0)
		matmul(my_A, B, C, end_row-start_row, n, n);
	
	// 계산 완료

	MPI_Gatherv(C, (end_row - start_row) * n, MPI_FLOAT, C, counts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
	free(my_A);
	if(rank == 0){
		free(counts);
		free(displs);
	} else {
		free(B);
		free(C);
	}
}

コード例 #21

0

ファイルを表示

ファイル: Matrix.cpp プロジェクト: NattyBumppo/Simple-Image-Stitcher

/*
   Solve the overconstrained linear system   Ma = b   using a least
   squares error (pseudo inverse) approach.
*/
int       solve_system (dmat M, dmat a,dmat  b)
    
{
    dmat      Mt,
              MtM,
              Mdag;
	//AfxMessageBox("S1");
    if ((M.ub1 - M.lb1) < (M.ub2 - M.lb2)) {
	fprintf (stderr, "solve_system: matrix M has more columns than rows\n");
	return (-1);
    }

	//AfxMessageBox("S2");
    Mt = newdmat (M.lb2, M.ub2, M.lb1, M.ub1, &errno);
    if (errno) {
	fprintf (stderr, "solve_system: unable to allocate matrix M_transpose\n");
	return (-2);
    }

	//AfxMessageBox("S3");
    transpose (M, Mt);
    if (errno) {
	fprintf (stderr, "solve_system: unable to transpose matrix M\n");
	return (-3);
    }

	//AfxMessageBox("S4");
    MtM = newdmat (M.lb2, M.ub2, M.lb2, M.ub2, &errno);
    if (errno) {
	fprintf (stderr, "solve_system: unable to allocate matrix M_transpose_M\n");
	return (-4);
    }

	//AfxMessageBox("S5");
    matmul (Mt, M, MtM);
    if (errno) {
	fprintf (stderr, "solve_system: unable to compute matrix product of M_transpose and M\n");
	return (-5);
    }
//modified by Dickson
	//AfxMessageBox("S6");
    
	double aa=fabs (matinvert (MtM));
	//AfxMessageBox("S7");
    //if (aa < 0.001) {
	//CString  str;
	//str.Format("determinant=%f",aa);
	//AfxMessageBox("S71");
	//AfxMessageBox(str);
	//AfxMessageBox("S8");
    
    if (aa < 0.001) {
		
	fprintf (stderr, "solve_system: determinant of matrix M_transpose_M is too small\n");
	return (-6);
    }

    if (errno) {
	fprintf (stderr, "solve_system: error during matrix inversion\n");
	return (-7);
    }

    Mdag = newdmat (M.lb2, M.ub2, M.lb1, M.ub1, &errno);
    if (errno) {
	fprintf (stderr, "solve_system: unable to allocate matrix M_diag\n");
	return (-8);
    }

    matmul (MtM, Mt, Mdag);
    if (errno) {
	fprintf (stderr, "solve_system: unable to compute matrix product of M_transpose_M and M_transpose\n");
	return (-9);
    }

    matmul (Mdag, b, a);
    if (errno) {
	fprintf (stderr, "solve_system: unable to compute matrix product of M_diag and b\n");
	return (-10);
    }

    freemat (Mt);
    freemat (MtM);
    freemat (Mdag);

    return 0;
}

コード例 #22

0

ファイルを表示

ファイル: measure.c プロジェクト: nclack/whisk

//
// Measure Whisker Segment Features
// --------------------------------
// <face_axis> indicates the orientation of the mouse head with respect to 
//             the image.
// <face_axis> == 'x' --> horizontally (along x axis)
// <face_axis> == 'y' --> vertically   (along y axis)
//
void Whisker_Seg_Measure( Whisker_Seg *w, double *dest, int facex, int facey, char face_axis )
{ float path_length,     //               
        median_score,    //
        root_angle_deg,  // side  poly
        mean_curvature,  //(side) poly quad?  (depends on side for sign)
        follicle_x,      // side
        follicle_y,      // side
        tip_x,           // side
        tip_y;           // side
  float *x = w->x,
        *y = w->y,
        *s = w->scores;
  int len = w->len,
      idx_follicle,
      idx_tip;
  float dx;
  static double *cumlen = NULL;
  static size_t  cumlen_size = 0;

  cumlen = request_storage( cumlen, &cumlen_size, sizeof(double), len, "measure: cumlen");
  cumlen[0] = 0.0;

  // path length
  // -----------
  // XXX: an alternate approach would be to compute the polynomial fit
  //      and do quadrature on that.  Might be more precise.
  //      Although, need cumlen (a.k.a cl) for polyfit anyway
  { float *ax = x + 1,       *ay = y + 1,
          *bx = x,           *by = y;
    double *cl = cumlen + 1, *clm = cumlen;
    while( ax < x + len )
      *cl++ = (*clm++) + hypotf( (*ax++) - (*bx++), (*ay++) - (*by++) );
    path_length = cl[-1];
  }

  // median score
  // ------------
  { qsort( s, len, sizeof(float), _score_cmp );
    if(len&1) // odd
      median_score = s[ (len-1)/2 ];
    else      //even
      median_score = ( s[len/2 - 1] + s[len/2] )/2.0;
  }

  // Follicle and root positions
  // ---------------------------
  dx = _side( w, facex, facey, &idx_follicle, &idx_tip );

  follicle_x = x[ idx_follicle ];
  follicle_y = y[ idx_follicle ];
  tip_x = x[ idx_tip ];
  tip_y = y[ idx_tip ];

  // Polynomial based measurements
  // (Curvature and angle)
  // -----------------------------
  { double px[  MEASURE_POLY_FIT_DEGREE+1 ],
           py[  MEASURE_POLY_FIT_DEGREE+1 ],
           xp[  MEASURE_POLY_FIT_DEGREE+1 ],
           yp[  MEASURE_POLY_FIT_DEGREE+1 ],
           xpp[ MEASURE_POLY_FIT_DEGREE+1 ],
           ypp[ MEASURE_POLY_FIT_DEGREE+1 ],
           mul1[ 2*MEASURE_POLY_FIT_DEGREE ],
           mul2[ 2*MEASURE_POLY_FIT_DEGREE ],
           num[  2*MEASURE_POLY_FIT_DEGREE ],
           den[  2*MEASURE_POLY_FIT_DEGREE ]; 
    static double *t = NULL;
    static size_t  t_size = 0;
    static double *xd = NULL;
    static size_t  xd_size = 0;
    static double *yd = NULL;
    static size_t  yd_size = 0;
    static double *workspace = NULL;
    static size_t  workspace_size = 0;
    int i;
    const int pad = MIN( MEASURE_POLY_END_PADDING, len/4 );

    // parameter for parametric polynomial representation
    t = request_storage(t, &t_size, sizeof(double), len, "measure");
    xd = request_storage(xd, &xd_size, sizeof(double), len, "measure");
    yd = request_storage(yd, &yd_size, sizeof(double), len, "measure");
    { int i = len; // convert floats to doubles
      while(i--)
      { xd[i] = x[i];
        yd[i] = y[i];
      }
    }

    for( i=0; i<len; i++ )
      t[i] = cumlen[i] / path_length; // [0 to 1]
#ifdef DEBUG_MEASURE_POLYFIT_ERROR
    assert(t[0] == 0.0 );
    assert( (t[len-1] - 1.0)<1e-6 );
#endif

    // polynomial fit
    workspace = request_storage( workspace, 
                                &workspace_size, 
                                 sizeof(double), 
                                 polyfit_size_workspace( len, 2*MEASURE_POLY_FIT_DEGREE ), //need 2*degree for curvature eval later
                                 "measure: polyfit workspace" );
    polyfit( t+pad, xd+pad, len-2*pad, MEASURE_POLY_FIT_DEGREE, px, workspace );
    polyfit_reuse(  yd+pad, len-2*pad, MEASURE_POLY_FIT_DEGREE, py, workspace );

#ifdef DEBUG_MEASURE_POLYFIT_ERROR
    { double err = 0.0;
      int i;
      for( i=pad; i<len-2*pad; i++ )
        err += hypot( xd[i] - polyval( px, MEASURE_POLY_FIT_DEGREE, t[i] ),
                      yd[i] - polyval( py, MEASURE_POLY_FIT_DEGREE, t[i] ) );
      err /= ((float)len);
      debug("Polyfit root mean squared residual: %f\n", err );
      assert( err < 1.0 );
    }
#endif

    // first derivative
    memcpy( xp, px, sizeof(double) * ( MEASURE_POLY_FIT_DEGREE+1 ) );
    memcpy( yp, py, sizeof(double) * ( MEASURE_POLY_FIT_DEGREE+1 ) );
    polyder_ip( xp, MEASURE_POLY_FIT_DEGREE+1, 1 );
    polyder_ip( yp, MEASURE_POLY_FIT_DEGREE+1, 1 );

    // second derivative
    memcpy( xpp, xp, sizeof(double) * ( MEASURE_POLY_FIT_DEGREE+1 ) );
    memcpy( ypp, yp, sizeof(double) * ( MEASURE_POLY_FIT_DEGREE+1 ) );
    polyder_ip( xpp, MEASURE_POLY_FIT_DEGREE+1, 1 );
    polyder_ip( ypp, MEASURE_POLY_FIT_DEGREE+1, 1 );

    // Root angle
    // ----------
    { double teval = (idx_follicle == 0) ? t[pad] : t[len-pad-1];
      static const double rad2deg = 180.0/M_PI;
      switch(face_axis)
      { case 'h':
        case 'x':
          root_angle_deg = atan2( dx*polyval(yp, MEASURE_POLY_FIT_DEGREE, teval ),
                                  dx*polyval(xp, MEASURE_POLY_FIT_DEGREE, teval ) ) * rad2deg;
          break;
        case 'v':
        case 'y':
          root_angle_deg = atan2( dx*polyval(xp, MEASURE_POLY_FIT_DEGREE, teval ),
                                  dx*polyval(yp, MEASURE_POLY_FIT_DEGREE, teval ) ) * rad2deg;
          break;
        default:
          error("In Whisker_Seg_Measure\n"
                "\tParameter <face_axis> must take on a value of 'x' or 'y'\n"
                "\tGot value %c\n",face_axis);
      }
    }

    // Mean curvature
    // --------------
    // Use the most naive of integration schemes
    { double  *V = workspace; // done with workspace, so reuse it for vandermonde matrix (just alias it here)
      static double *evalnum = NULL,
                    *evalden = NULL;
      static size_t evalnum_size = 0,
                    evalden_size = 0;
      size_t npoints = len-2*pad;
  
      evalnum = request_storage( evalnum, &evalnum_size, sizeof(double), npoints, "numerator" );
      evalden = request_storage( evalden, &evalden_size, sizeof(double), npoints, "denominator" );
  
      Vandermonde_Build( t+pad, npoints, 2*MEASURE_POLY_FIT_DEGREE, V ); // used for polynomial evaluation
  
      // numerator
      memset( mul1, 0, 2*MEASURE_POLY_FIT_DEGREE*sizeof(double) );
      memset( mul2, 0, 2*MEASURE_POLY_FIT_DEGREE*sizeof(double) );
      polymul( xp, MEASURE_POLY_FIT_DEGREE+1,
              ypp, MEASURE_POLY_FIT_DEGREE+1,
              mul1 );
      polymul( yp, MEASURE_POLY_FIT_DEGREE+1,
              xpp, MEASURE_POLY_FIT_DEGREE+1,
              mul2 );
      polysub( mul1, 2*MEASURE_POLY_FIT_DEGREE,
               mul2, 2*MEASURE_POLY_FIT_DEGREE,
               num );
  
      // denominator
      memset( mul1, 0, 2*MEASURE_POLY_FIT_DEGREE*sizeof(double) );
      memset( mul2, 0, 2*MEASURE_POLY_FIT_DEGREE*sizeof(double) );
      polymul( xp, MEASURE_POLY_FIT_DEGREE+1,
               xp, MEASURE_POLY_FIT_DEGREE+1,
              mul1 );
      polymul( yp, MEASURE_POLY_FIT_DEGREE+1,
               yp, MEASURE_POLY_FIT_DEGREE+1,
              mul2 );
      polyadd( mul1, 2*MEASURE_POLY_FIT_DEGREE,
               mul2, 2*MEASURE_POLY_FIT_DEGREE,
               den );
  
      // Eval
      matmul(   V, npoints,                   MEASURE_POLY_FIT_DEGREE*2,
              num, MEASURE_POLY_FIT_DEGREE*2, 1,
              evalnum );
      matmul(   V, npoints,                   MEASURE_POLY_FIT_DEGREE*2,
              den, MEASURE_POLY_FIT_DEGREE*2, 1,
              evalden );
      // compute kappa at each t
      { int i;
        for(i=0; i<npoints; i++ )
          evalnum[i] /= pow( evalden[i], 3.0/2.0 )*dx; //dx is 1 or -1 so dx = 1/dx;
        mean_curvature = evalnum[0] * (t[1]-t[0]);
        for(i=1; i<npoints; i++ )
          mean_curvature += evalnum[i] * ( t[i]-t[i-1] );
      }
    }
  }

  // fill in fields
  dest[0] = path_length;
  dest[1] = median_score;
  dest[2] = root_angle_deg;
  dest[3] = mean_curvature;
  dest[4] = follicle_x;
  dest[5] = follicle_y;
  dest[6] = tip_x;
  dest[7] = tip_y;
}

コード例 #23

0

ファイルを表示

ファイル: matmul_mpi_p2p.c プロジェクト: e-/MulticoreComputing

void matmul_mpi(float* A, float* B, float* C, int n){
	int rank, nodes_n, 
		row_per_process, 
		i,
		j,
		k,
		start_row,
		end_row,
		child_start_row,
		child_end_row;
	MPI_Request request_A[32];
	MPI_Request request_B[32];
	MPI_Request request_C[32];
	MPI_Status status;

	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &nodes_n);
	row_per_process = n / nodes_n;
	if(n % nodes_n)row_per_process ++;
	
	nodes_n = n / row_per_process;
	if(n % row_per_process)nodes_n ++;
		
	start_row = rank * row_per_process;
	end_row = (rank + 1) * row_per_process;

	if(start_row >= n) return;
	if(end_row > n)end_row = n;	

	if(rank == 0) {
		// 0 ~ row_per_process are mine
		for(i = 1; i < nodes_n;i++){
			child_start_row = i * row_per_process;
			child_end_row = (i+1) * row_per_process;
			if(child_end_row > n) child_end_row = n;
			MPI_Isend(A + child_start_row * n, (child_end_row - child_start_row) * n, MPI_FLOAT, i, SEND_DATA_TAG, MPI_COMM_WORLD, &request_A[i]);
			MPI_Isend(B, n * n, MPI_FLOAT, i, SEND_DATA_TAG, MPI_COMM_WORLD, &request_B[i]);
			fprintf(stderr, "0: %d 요청 시작\n", i);
		}
	} else { 
		A = (float *)malloc(sizeof(float) * (end_row - start_row) * n);
		B = (float *)malloc(sizeof(float) * n * n);
		MPI_Recv(A, (end_row - start_row) * n, MPI_FLOAT, 0, SEND_DATA_TAG, MPI_COMM_WORLD, &status);
		MPI_Recv(B, n * n, MPI_FLOAT, 0, SEND_DATA_TAG, MPI_COMM_WORLD, &status);
		fprintf(stderr, "%d: 0으로 부터 받음\n", rank);
	}

	//자기것 계산하기
	
	if(C == NULL) {
		C = (float *)malloc(sizeof(float) * (end_row - start_row) * n);
		memset(C, 0, sizeof(float) * (end_row - start_row) * n);
	} else {
		memset(C, 0, sizeof(float) * n * n);
	}

	matmul(A, B, C, end_row-start_row, n, n);
/*	for(i = 0; i < end_row - start_row; ++i){
		for(j = 0 ; j < n ;++j){
			for(k = 0 ; k < n ;++k){
				C[i * n + j] += A[i * n + k] * B[k * n + j];
			}
		}
	}
*/		
	// 계산 완료
	
	if(rank == 0) {
		// 호스트면 보낸거 확인하고 데이터 합치기  
		for(i = 1; i < nodes_n ; i++){
			child_start_row = i * row_per_process;
			MPI_Wait(&request_A[i], &status);
			MPI_Wait(&request_B[i], &status);
			fprintf(stderr, "0: %d 로 보내기 완료\n", i);
		}
		for(i = 1; i < nodes_n ; i++){
			child_start_row = i * row_per_process;
			child_end_row = (i+1) * row_per_process;
			if(child_end_row > n) child_end_row = n;
			MPI_Irecv(C + child_start_row * n, (child_end_row - child_start_row) * n, MPI_FLOAT, i, SEND_ANSWER_TAG, MPI_COMM_WORLD, &request_C[i]);
			//fprintf(stderr, "%d\n",(child_end_row - child_start_row) * n);
			fprintf(stderr, "0: %d 로부터 답변 받기 요청 완료\n", i);
		}
		long t = clock();
		for(i = 1; i < nodes_n ; i++){
			MPI_Wait(&request_C[i], &status);
			fprintf(stderr, "0: %d 로부터 답변 받기 완료\n", i);
		}
		fprintf(stderr, "%f\n", ((double) clock() - t) / CLOCKS_PER_SEC);
		fprintf(stderr, "0: 계산 및 답변받기 모두 완료\n");
	} else {
		// 아니면 보내기
		//fprintf(stderr, "%d\n",(end_row - start_row) * n);
		MPI_Send(C, (end_row - start_row) * n, MPI_FLOAT, 0, SEND_ANSWER_TAG, MPI_COMM_WORLD);
		free(A);
		free(B);
		free(C);
		fprintf(stderr, "%d: 계산 및 보내기 모두 완료\n", rank);
	}
}

コード例 #24

0

ファイルを表示

ファイル: Pbc.cpp プロジェクト: JFDama/plumed2

Vector Pbc::scaledToReal(const Vector&d)const {
  return matmul(box.transpose(),d);
}

コード例 #25

0

ファイルを表示

/*  orient() calculates orientation of the camera, updating its calibration 
    structure using the definitions and algorithms well described in [1].
    
    Arguments:
    Calibration* cal_in - camera calibration object
    control_par *cpar - control parameters
    int nfix - number of 3D known points
    vec3d fix[]	- each of nfix items is one 3D position of known point on
        the calibration object.
    target pix[] - image coordinates corresponding to each point in ``fix``.
        can be obtained from the set of detected 2D points using 
        sortgrid(). The points which are associated with fix[] have real 
        pointer (.pnr attribute), others have -999.
    orient_par flags - structure of all the flags of the parameters to be 
        (un)changed, read from orient.par parameter file using 
        read_orient_par(), defaults are zeros except for x_scale which is
        by default 1.
    
    Output:
    Calibration *cal_in - if the orientation routine converged, this structure
    is updated, otherwise, returned untouched. The routine works on a copy of
    the calibration structure, cal.
    double sigmabeta[] - array of deviations for each of the interior and 
        exterior parameters and glass interface vector (19 in total).

    Returns:
    On success, a pointer to an array of residuals. For each observation point
    i = 0..n-1, residual 2*i is the Gauss-Markof residual for the x coordinate
    and residual 2*i + 1 is for the y. Then come 10 cells with the delta 
    between initial guess and final solution for internal and distortion 
    parameters, which are also part of the G-M model and described in it.
    On failure returns NULL.
*/
double* orient (Calibration* cal_in, control_par *cpar, int nfix, vec3d fix[],
            target pix[], orient_par *flags, double sigmabeta[20]) 
{
    int  	i,j,n, itnum, stopflag, n_obs=0, maxsize;

    double  ident[IDT], XPX[NPAR][NPAR], XPy[NPAR], beta[NPAR], omega=0;
    double xp, yp, xpd, ypd, xc, yc, r, qq, p, sumP;

    int numbers;

    double al,be,ga,nGl,e1_x,e1_y,e1_z,e2_x,e2_y,e2_z,safety_x,safety_y,safety_z;
    double *P, *y, *yh, *Xbeta, *resi;
    vec3d glass_dir, tmp_vec, e1, e2;

    Calibration *cal;

    /* small perturbation for translation/rotation in meters and in radians */
    double  dm = 0.00001,  drad = 0.0000001;

    cal = malloc (sizeof (Calibration));
    memcpy(cal, cal_in, sizeof (Calibration));

    maxsize = nfix*2 + IDT;
    
    P = (double *) calloc(maxsize, sizeof(double));
    y = (double *) calloc(maxsize, sizeof(double));
    yh = (double *) calloc(maxsize, sizeof(double));
    Xbeta = (double *) calloc(maxsize, sizeof(double));
    resi = (double *) calloc(maxsize, sizeof(double));

    double (*X)[NPAR] = malloc(sizeof (*X) * maxsize);
    double (*Xh)[NPAR] = malloc(sizeof (*Xh) * maxsize);

    for(i = 0; i < maxsize; i++) {
        for(j = 0; j < NPAR; j++) {
    	      X[i][j] = 0.0;
    	      Xh[i][j] = 0.0;
        }
        y[i] = 0;
        P[i] = 1;
    }
    
    for(i = 0; i < NPAR; i++)
        sigmabeta[j] = 0.0;

    if(flags->interfflag){
        numbers = 18;
    } else{
        numbers = 16;
    }

    vec_set(glass_dir, 
        cal->glass_par.vec_x, cal->glass_par.vec_y, cal->glass_par.vec_z);
    nGl = vec_norm(glass_dir);

    e1_x = 2*cal->glass_par.vec_z - 3*cal->glass_par.vec_x;
    e1_y = 3*cal->glass_par.vec_x - 1*cal->glass_par.vec_z;
    e1_z = 1*cal->glass_par.vec_y - 2*cal->glass_par.vec_y;
    vec_set(tmp_vec, e1_x, e1_y, e1_z);
    unit_vector(tmp_vec, e1);

    e2_x = e1_y*cal->glass_par.vec_z - e1_z*cal->glass_par.vec_x;
    e2_y = e1_z*cal->glass_par.vec_x - e1_x*cal->glass_par.vec_z;
    e2_z = e1_x*cal->glass_par.vec_y - e1_y*cal->glass_par.vec_y;
    vec_set(tmp_vec, e2_x, e2_y, e2_z);
    unit_vector(tmp_vec, e2);

    al = 0;
    be = 0;
    ga = 0;

    /* init identities */
    ident[0] = cal->int_par.cc;
    ident[1] = cal->int_par.xh;
    ident[2] = cal->int_par.yh;
    ident[3] = cal->added_par.k1;
    ident[4] = cal->added_par.k2;
    ident[5] = cal->added_par.k3;
    ident[6] = cal->added_par.p1;
    ident[7] = cal->added_par.p2;
    ident[8] = cal->added_par.scx;
    ident[9] = cal->added_par.she;

    safety_x = cal->glass_par.vec_x;
    safety_y = cal->glass_par.vec_y;
    safety_z = cal->glass_par.vec_z;
    
    /* main loop, program runs through it, until none of the beta values
      comes over a threshold and no more points are thrown out
      because of their residuals */

    itnum = 0;  
    stopflag = 0;
    while ((stopflag == 0) && (itnum < NUM_ITER)) {
      itnum++;

      for (i = 0, n = 0; i < nfix; i++) {
        /* check for correct correspondence
        note that we do not use anymore pointer in fix, the points are read by
        the order of appearance and if we want to use every other point
        we use 'i', just check it is not -999 */
        if(pix[i].pnr != i) continue;
        
        switch (flags->useflag) {
            case 1: if ((i % 2) == 0)  continue;  break;
            case 2: if ((i % 2) != 0)  continue;  break;
            case 3: if ((i % 3) == 0)  continue;  break;
        }

        /* get metric flat-image coordinates of the detected point */
        pixel_to_metric (&xc, &yc, pix[i].x, pix[i].y, cpar);
        correct_brown_affin (xc, yc, cal->added_par, &xc, &yc);

        /* Projected 2D position on sensor of corresponding known point */
        rotation_matrix(&(cal->ext_par));
        img_coord (fix[i], cal, cpar->mm, &xp, &yp);

        /* derivatives of distortion parameters */

        r = sqrt (xp*xp + yp*yp);

        X[n][7] = cal->added_par.scx;
        X[n+1][7] = sin(cal->added_par.she);

        X[n][8] = 0;
        X[n+1][8] = 1;

        X[n][9] = cal->added_par.scx * xp * r*r;
        X[n+1][9] = yp * r*r;

        X[n][10] = cal->added_par.scx * xp * pow(r,4.0);
        X[n+1][10] = yp * pow(r,4.0);

        X[n][11] = cal->added_par.scx * xp * pow(r,6.0);
        X[n+1][11] = yp * pow(r,6.0);

        X[n][12] = cal->added_par.scx * (2*xp*xp + r*r);
        X[n+1][12] = 2 * xp * yp;

        X[n][13] = 2 * cal->added_par.scx * xp * yp;
        X[n+1][13] = 2*yp*yp + r*r;

        qq =  cal->added_par.k1*r*r; qq += cal->added_par.k2*pow(r,4.0);
        qq += cal->added_par.k3*pow(r,6.0);
        qq += 1;
        X[n][14] = xp * qq + cal->added_par.p1 * (r*r + 2*xp*xp) + \
                                                            2*cal->added_par.p2*xp*yp;
        X[n+1][14] = 0;

        X[n][15] = -cos(cal->added_par.she) * yp;
        X[n+1][15] = -sin(cal->added_par.she) * yp;

        /* numeric derivatives of projection coordinates over external 
           parameters, 3D position and the angles */
        
        num_deriv_exterior(cal, cpar, dm, drad, fix[i], X[n], X[n + 1]);

        /* Num. deriv. of projection coords over sensor distance from PP */
        cal->int_par.cc += dm;
        rotation_matrix(&(cal->ext_par));
        img_coord (fix[i], cal, cpar->mm, &xpd, &ypd);
        X[n][6]   = (xpd - xp) / dm;
        X[n+1][6] = (ypd - yp) / dm;
        cal->int_par.cc -= dm;

        /* ditto, over water-glass-air interface position vector */
        al += dm;
        cal->glass_par.vec_x += e1[0]*nGl*al;
        cal->glass_par.vec_y += e1[1]*nGl*al;
        cal->glass_par.vec_z += e1[2]*nGl*al;

        img_coord (fix[i], cal, cpar->mm, &xpd, &ypd);
        X[n][16]      = (xpd - xp) / dm;
        X[n+1][16] = (ypd - yp) / dm;

        al -= dm;
        cal->glass_par.vec_x = safety_x;
        cal->glass_par.vec_y = safety_y;
        cal->glass_par.vec_z = safety_z;

        be += dm;
        cal->glass_par.vec_x += e2[0]*nGl*be;
        cal->glass_par.vec_y += e2[1]*nGl*be;
        cal->glass_par.vec_z += e2[2]*nGl*be;

        img_coord (fix[i], cal, cpar->mm, &xpd, &ypd);
        X[n][17]      = (xpd - xp) / dm;
        X[n+1][17] = (ypd - yp) / dm;

        be -= dm;
        cal->glass_par.vec_x = safety_x;
        cal->glass_par.vec_y = safety_y;
        cal->glass_par.vec_z = safety_z;

        ga += dm;
        cal->glass_par.vec_x += cal->glass_par.vec_x*nGl*ga;
        cal->glass_par.vec_y += cal->glass_par.vec_y*nGl*ga;
        cal->glass_par.vec_z += cal->glass_par.vec_z*nGl*ga;

        img_coord (fix[i], cal, cpar->mm, &xpd, &ypd);
        X[n][18]      = (xpd - xp) / dm;
        X[n+1][18] = (ypd - yp) / dm;

        ga -= dm;
        cal->glass_par.vec_x = safety_x;
        cal->glass_par.vec_y = safety_y;
        cal->glass_par.vec_z = safety_z;

        y[n]   = xc - xp;
        y[n+1] = yc - yp;

        n += 2;
      }
      
      n_obs = n;
      
      /* identities */
      for (i = 0; i < IDT; i++)
        X[n_obs + i][6 + i] = 1;
        
      y[n_obs+0] = ident[0] - cal->int_par.cc;
      y[n_obs+1] = ident[1] - cal->int_par.xh;
      y[n_obs+2] = ident[2] - cal->int_par.yh;
      y[n_obs+3] = ident[3] - cal->added_par.k1;
      y[n_obs+4] = ident[4] - cal->added_par.k2;
      y[n_obs+5] = ident[5] - cal->added_par.k3;
      y[n_obs+6] = ident[6] - cal->added_par.p1;
      y[n_obs+7] = ident[7] - cal->added_par.p2;
      y[n_obs+8] = ident[8] - cal->added_par.scx;
      y[n_obs+9] = ident[9] - cal->added_par.she;

      /* weights */
      for (i = 0; i < n_obs; i++)
          P[i] = 1;

      P[n_obs+0] = ( ! flags->ccflag) ?  POS_INF : 1;
      P[n_obs+1] = ( ! flags->xhflag) ?  POS_INF : 1;
      P[n_obs+2] = ( ! flags->yhflag) ?  POS_INF : 1;
      P[n_obs+3] = ( ! flags->k1flag) ?  POS_INF : 1;
      P[n_obs+4] = ( ! flags->k2flag) ?  POS_INF : 1;
      P[n_obs+5] = ( ! flags->k3flag) ?  POS_INF : 1;
      P[n_obs+6] = ( ! flags->p1flag) ?  POS_INF : 1;
      P[n_obs+7] = ( ! flags->p2flag) ?  POS_INF : 1;
      P[n_obs+8] = ( ! flags->scxflag) ?  POS_INF : 1;
      P[n_obs+9] = ( ! flags->sheflag) ?  POS_INF : 1;

      n_obs += IDT;
      sumP = 0;
      for (i = 0; i < n_obs; i++) {       	/* homogenize */
          p = sqrt (P[i]);
          for (j = 0; j < NPAR; j++)
              Xh[i][j] = p * X[i][j];
            
          yh[i] = p * y[i];
          sumP += P[i];
      }
        
      /* Gauss Markoff Model it is the least square adjustment 
         of the redundant information contained both in the spatial 
         intersection and the resection, see [1], eq. 23 */
      ata ((double *) Xh, (double *) XPX, n_obs, numbers, NPAR );
      matinv ((double *) XPX, numbers, NPAR);
      atl ((double *) XPy, (double *) Xh, yh, n_obs, numbers, NPAR);
      matmul ((double *) beta, (double *) XPX, (double *) XPy, 
          numbers, numbers,1, NPAR, NPAR);

      stopflag = 1;
      for (i = 0; i < numbers; i++) {
          if (fabs (beta[i]) > CONVERGENCE)  stopflag = 0;
      }

      if ( ! flags->ccflag) beta[6] = 0.0;
      if ( ! flags->xhflag) beta[7] = 0.0;
      if ( ! flags->yhflag) beta[8] = 0.0;
      if ( ! flags->k1flag) beta[9] = 0.0;
      if ( ! flags->k2flag) beta[10] = 0.0;
      if ( ! flags->k3flag) beta[11] = 0.0;
      if ( ! flags->p1flag) beta[12] = 0.0;
      if ( ! flags->p2flag) beta[13] = 0.0;
      if ( ! flags->scxflag)beta[14] = 0.0;
      if ( ! flags->sheflag) beta[15] = 0.0;

      cal->ext_par.x0 += beta[0];
      cal->ext_par.y0 += beta[1];
      cal->ext_par.z0 += beta[2];
      cal->ext_par.omega += beta[3];
      cal->ext_par.phi += beta[4];
      cal->ext_par.kappa += beta[5];
      cal->int_par.cc += beta[6];
      cal->int_par.xh += beta[7];
      cal->int_par.yh += beta[8];
      cal->added_par.k1 += beta[9];
      cal->added_par.k2 += beta[10];
      cal->added_par.k3 += beta[11];
      cal->added_par.p1 += beta[12];
      cal->added_par.p2 += beta[13];
      cal->added_par.scx += beta[14];
      cal->added_par.she += beta[15];

      if (flags->interfflag) {
          cal->glass_par.vec_x += e1[0]*nGl*beta[16];
          cal->glass_par.vec_y += e1[1]*nGl*beta[16];
          cal->glass_par.vec_z += e1[2]*nGl*beta[16];
          cal->glass_par.vec_x += e2[0]*nGl*beta[17];
          cal->glass_par.vec_y += e2[1]*nGl*beta[17];
          cal->glass_par.vec_z += e2[2]*nGl*beta[17];
      }
    }

    /* compute residuals etc. */
    matmul ( (double *) Xbeta, (double *) X, (double *) beta, n_obs, 
        numbers, 1, n_obs, NPAR);
    omega = 0;
    for (i = 0; i < n_obs; i++) {
        resi[i] = Xbeta[i] - y[i];
        omega += resi[i] * P[i] * resi[i];
    }
    sigmabeta[NPAR] = sqrt (omega / (n_obs - numbers));

    for (i = 0; i < numbers; i++) { 
        sigmabeta[i] = sigmabeta[NPAR] * sqrt(XPX[i][i]);
    }

    free(X);
    free(P);
    free(y);
    free(Xbeta);
    free(Xh);

    if (stopflag){
        rotation_matrix(&(cal->ext_par));
        memcpy(cal_in, cal, sizeof (Calibration));
        return resi;
    }
    else {
        free(resi);
        return NULL;
    }
}