C++ (Cpp) mmult 예제들

예제 #1

0

파일 보기

파일: Mstrtoul.c 프로젝트: 8l/csolve

mstrtoul(MINT *a, char *s, char **p, short int b)
{
	MINT	y, base;
	int	c, dectop, alphatop;
	short	qy;
	int	i;

	mset(0,a);
	MSET(b,&base);
	y.len	= 1;
	y.val	= &qy;
	dectop = (b <= 10) ? '0' + b - 1 : '9';
	if (b > 10) alphatop = 'a' + b - 10;

	i=0;
	while (isxdigit(c=s[i++])) {
		if (isupper(c)) c = c - 'A' + 'a';
		if (c >= '0' && c <= dectop) {
			qy = c - '0';
			mmult(a,&base,a);
			if (qy != 0) madd(a,&y,a);
			continue;
		} if (b > 10 && (c >= 'a' && c <= alphatop)) {
			qy = c - 'a' + 10;
			mmult(a,&base,a);
			madd(a,&y,a);
			continue;
		}
	};
	if (p!=NULL) (*p)=(char *)s+i-1;
}

예제 #2

0

파일 보기

파일: cholesky.c 프로젝트: rafat2/codelite

static void rchold(double *A,int N, int stride, double *U22) {
	int j,i,u,w;
	double d1;
	
	if (N == 1) {
		return;
	} else {
		d1 = A[0];
		for (j = 1; j < N;++j) {
			A[j] /= d1;
		}
		mmult(A+1,A+1,U22,N-1,1,N-1);
		scale(U22,N-1,N-1,d1);
		for (i = 0; i < N-1; ++i) {
			u = stride + 1+ i * stride;
			w = i * (N-1);
			for(j = i; j < N-1;j++) {
				A[j + u] -= U22[j + w];
			}
		}
		
		rchold(A+stride+1,N-1,stride,U22);
	
	}
		
}

예제 #3

0

파일 보기

파일: geometry.c 프로젝트: BanditCat/CopyrightYoMamma

void mrotate( mat* ma, const triple* v, f32 a ){
  f32 c = vcos( a );
  f32 s = vsin( a );
  f32 t = 1 - c;
  static mat m;

  m.m[ 0 ][ 0 ] = t * v->x * v->x + c;
  m.m[ 0 ][ 1 ] = t * v->x * v->y + s * v->z;
  m.m[ 0 ][ 2 ] = t * v->x * v->z - s * v->y;
  m.m[ 0 ][ 3 ] = 0;

  m.m[ 1 ][ 0 ] = t * v->x * v->y - s * v->z;
  m.m[ 1 ][ 1 ] = t * v->y * v->y + c;
  m.m[ 1 ][ 2 ] = t * v->y * v->z + s * v->x;
  m.m[ 1 ][ 3 ] = 0;

  m.m[ 2 ][ 0 ] = t * v->x * v->z + s * v->y;
  m.m[ 2 ][ 1 ] = t * v->y * v->z - s * v->x;
  m.m[ 2 ][ 2 ] = t * v->z * v->z + c;
  m.m[ 2 ][ 3 ] = 0;

  m.m[ 3 ][ 0 ] = 0;
  m.m[ 3 ][ 1 ] = 0;
  m.m[ 3 ][ 2 ] = 0;
  m.m[ 3 ][ 3 ] = 1;
  
  mmult( ma, &m );
}

예제 #4

0

파일 보기

파일: canonical_expected.hpp 프로젝트: cadarso/mps

  static inline const typename Tensor::elt_t
  do_string_order(const iTEBD<Tensor> &psi,
		  const Tensor &Opi, int i, const Tensor &Opmiddle,
		  const Tensor &Opj, int j)
  {
    if (i == j) {
      return expected(psi, mmult(Opi, Opj), i);
    } else if (i > j) {
      return do_string_order(psi, Opj, j, Opmiddle, Opi, i);
    } else if (!psi.is_canonical()) {
      return do_string_order(psi.canonical_form(), Opi, i, Opmiddle, Opj, j);
    } else {
      j = j - i;
      i = i & 1;
      j = j + i;
      Tensor v1 = psi.left_boundary(0);
      Tensor v2 = v1;
      const Tensor none;
      const Tensor *op;
      for (int site = 0; (site <= j) || !(site & 1); ++site) {
        if (site == i)
          op = &Opi;
        else if (site == j)
          op = &Opj;
        else if (site > i && site < j)
          op = &Opmiddle;
        else
          op = &none;
        v1 = propagate_right(v1, psi.combined_matrix(site), *op);
        v2 = propagate_right(v2, psi.combined_matrix(site));
      }
      return trace(v1) / trace(v2);
    }
  }

예제 #5

0

파일 보기

파일: time_mult.cpp 프로젝트: divakarvi/Book-SPCA

/*
 * Returns flops per cycle.
 */
double time_mult(long dim, enum mult_flag_enum flag){
	int count = 10;

	double *a = new double[dim*dim];
	double *b = new double[dim*dim];
	double *c = new double[dim*dim];

#pragma omp parallel for
	for(long i=0; i < dim*dim; i++)
		a[i] = b[i] = c[i] = 1;

	TimeStamp clk;
	StatVector stats(count);

	if(flag == AUTO)
		mkl_mic_enable();

	for(int i=0; i < count; i++){
		clk.tic();
		switch(flag){
		case HOST: 
			mmult(a, b, c, dim);
			break;
		case MIC:
#pragma offload target(mic)						\
	in(a:length(dim*dim) align(64) alloc_if(1) free_if(1))		\
	in(b:length(dim*dim) align(64) alloc_if(1) free_if(1))		\
	inout(c: length(dim*dim) align(64) alloc_if(1) free_if(1))
			mmult(a, b, c, dim);
			break;
		case AUTO:
			mmult(a, b, c, dim);
			break;
		}
		double cycles = clk.toc();
		stats.insert(cycles);
	}

	if(flag == AUTO)
		mkl_mic_disable();

	delete[] a;
	delete[] b;
	delete[] c;

	return 2.0*dim*dim*dim/stats.median();
}

예제 #6

0

파일 보기

파일: ftconfig.c 프로젝트: lindemann09/pyForceDAQ

short GetMatrix(Calibration *cal, float *result) {
// Calculates a working matrix based on the basic matrix,
// basic tool transform, user tool transform, and user units,
// and stores in result.
	float UserTTM[6][6];           // the User tool transform matrix 
    float BasicTTM[6][6];          // basic (built-in) tool transform matrix
	float result1[6][MAX_GAUGES];  // temporary intermediate result
	float FConv, TConv;            // unit conversion factors
	unsigned short i, j;           // loop variables
	unsigned short NumGauges=cal->rt.NumChannels-1;  // number of strain gages
	short sts;                     // return value 

	if (cal->rt.NumAxes==6) {
		sts=TTM(cal->BasicTransform,BasicTTM,cal->ForceUnits,cal->TorqueUnits);
		if (sts!=0) return 1;      // error in tool transform units
		sts=TTM(cal->cfg.UserTransform,UserTTM,cal->ForceUnits,cal->TorqueUnits);
		if (sts!=0) return 1;      // error in tool transform units
		mmult(*BasicTTM,6,6,6,
			*cal->BasicMatrix,NumGauges,MAX_GAUGES,
			*result1,MAX_GAUGES);
		mmult(*UserTTM,6,6,6,                // compute working matrix
			*result1,NumGauges,MAX_GAUGES,
			result,MAX_GAUGES);
	} else {
		// No transforms allowed except for 6-axis transducers
		result=*cal->BasicMatrix;
	}
	//Apply units change
	FConv = ForceConv(cal->cfg.ForceUnits) / ForceConv(cal->ForceUnits);
	TConv = TorqueConv(cal->cfg.TorqueUnits) / TorqueConv(cal->TorqueUnits);
	for(i=0;i<cal->rt.NumAxes;i++)  //forces
		for(j=0;j<NumGauges;j++)
			if ((cal->AxisNames[i])[0]=='F') {
				result[i*MAX_GAUGES+j] = result[i*MAX_GAUGES+j] * FConv;
				if (FConv==0) return 2;
			}
			else {
				result[i*MAX_GAUGES+j] = result[i*MAX_GAUGES+j] * TConv;
				if (TConv==0) return 2;
			}
	return 0;
} // GetMatrix()

예제 #7

0

파일 보기

파일: geometry.c 프로젝트: BanditCat/CopyrightYoMamma

void mtranslate( mat* ma, f32 x, f32 y, f32 z ){
  static mat m;
  u32 i, j;
  for( i = 0; i < 4; ++i )
    for( j = 0; j < 4; ++j )
      if( i == j )
        m.m[ i ][ j ] = 1;
      else
        m.m[ i ][ j ] = 0;
  m.m[ 3 ][ 0 ] = x; m.m[ 3 ][ 1 ] = y; m.m[ 3 ][ 2 ] = z;
  mmult( ma, &m );
}

예제 #8

0

파일 보기

파일: aufgabe5.2.cpp 프로젝트: florianm93/studium

int main() {
        int a[3][4] = { {1,2,3,4},
                        {2,6,1,8},
                        {5,1,2,1}};
        int b[4][1] = { {1},{9},{11},{5} };

        Array2d<int> A(a);
        Array2d<int> B(b);
        Array2d<int> C = mmult(A,B);
        printArray(C);

        return 0;
}

예제 #9

0

파일 보기

파일: unrolling.c 프로젝트: recosfero/Par_Comp

int main(int argc, char* argv[])
{
    int i, j;
    double tstart, tstop;
    double nflop;
    double tmmult, tdgemm;

    for( i=0; i<SIZE_M; i++ ) {
	    for( j=0; j<SIZE_N; j++ ) {
	      A[i][j]=(double)(i)+(double)(j);
	    }
    }
    
    for( i=0; i<SIZE_N; i++ ) {
	    for( j=0; j<SIZE_K; j++ ) {
	      B[i][j]=(double)(i)+(double)(j);
	    }
    }

    nflop = 2.0*(double)SIZE_M*(double)SIZE_N*(double)SIZE_K;

    MYTIMESTAMP(tstart);
    mmult( A, B, C);
    MYTIMESTAMP(tstop);

    tmmult = tstop-tstart;

    MYTIMESTAMP(tstart);
    cblas_dgemm(CblasRowMajor,  
		CblasNoTrans, CblasNoTrans, SIZE_M, SIZE_N, SIZE_K,  
		1.0, (const double*)A, SIZE_M, (const double*)B, 
		SIZE_N, 0.0, (double*)C, SIZE_K);  
    MYTIMESTAMP(tstop);

    tdgemm = tstop-tstart;

    fprintf(stderr, "#M,N,K,tmmult,tdgemm,gflops_mmult,gflops_dgemm\n");
    fprintf(stderr, "%d,%d,%d,%f,%f,%f,%f\n",
	    SIZE_M, SIZE_N, SIZE_K, tmmult, tdgemm, 
	    1.0e-6*nflop/tmmult,
	    1.0e-6*nflop/tdgemm);
    
    executeUnloopMethod(nflop, mmult_unroll4, "Unloop 4");
    executeUnloopMethod(nflop, mmult_unroll8, "Unloop 8");
    executeUnloopMethod(nflop, mmult_unroll16, "Unloop 16");
    executeUnloopMethod(nflop, mmult_unroll24, "Unloop 24");
    executeUnloopMethod(nflop, mmult_unroll28, "Unloop 28");
    executeUnloopMethod(nflop, mmult_unroll222, "Unloop 222");
    return 0;
}

예제 #10

0

파일 보기

파일: Mgcd.c 프로젝트: 8l/csolve

FN minvert(MINT *a, MINT *b, MINT *c)
{	MINT x, y, z, w, Anew, Aold;
	int i = 0;
	static MINT one;
	static int oneinit = 1;

	if (oneinit) {
		oneinit = 0;
		MSET(1,&one);
	}
	MINIT(&x);
	MINIT(&y);
	MINIT(&z);
	MINIT(&w);
	MINIT(&Aold);
	MSET (1,&Anew);

	mcopy(b, &x);
	mcopy(a, &y);
	/*
	 * Loop invariant:
	 *
	 * y = -1^i * Anew * a  mod b
	 */
	while(mtest(&y) != 0)
	{	mdiv(&x, &y, &w, &z);
		mcopy(&Anew, &x);
		mmult(&w, &Anew, &Anew);
		madd(&Anew, &Aold, &Anew);
		mmove(&x, &Aold);
		mmove(&y, &x);
		mmove(&z, &y);
		i++;
	}
	if (mcmp(&one,&x)) {
		mcopy(&one,c);
	} else {
		mmove(&Aold, c);
		if( (i&01) == 0) msub(b, c, c);
	}

	MFREE(&x);
	MFREE(&y);
	MFREE(&z);
	MFREE(&w);
	MFREE(&Aold);
	MFREE(&Anew);
}

예제 #11

0

파일 보기

파일: ftrt.c 프로젝트: yournjell/Catheter

void RTConvertToFT(RTCoefs *coefs, float voltages[],float result[],BOOL tempcomp) {
	// perform temp. comp., if applicable
	float cvoltages[MAX_GAUGES];
	unsigned short i;

	for (i=0; i<coefs->NumChannels-1; i++) {
		if (tempcomp==TRUE) {
			cvoltages[i]=TempComp(coefs,voltages[i],voltages[coefs->NumChannels-1],i) - coefs->TCbias_vector[i];
		} else {
			cvoltages[i]=voltages[i]-coefs->bias_vector[i];
		}
	}
	// perform matrix math
	mmult(*coefs->working_matrix,coefs->NumAxes,(unsigned short)(coefs->NumChannels-1),MAX_GAUGES,
		cvoltages,1,1,
		result,1);
}

예제 #12

0

파일 보기

파일: MultiplyMatrices.c 프로젝트: ktakashi/sagittarius-ffi-helper

int main(int argc, char *argv[]) {
    int i, n = ((argc == 2) ? atoi(argv[1]) : 1);
    
    int **m1 = mkmatrix(SIZE, SIZE);
    int **m2 = mkmatrix(SIZE, SIZE);
    int **mm = mkmatrix(SIZE, SIZE);

    for (i=0; i<n; i++) {
    mm = mmult(SIZE, SIZE, m1, m2, mm);
    }
    printf("%d %d %d %d\n", mm[0][0], mm[2][3], mm[3][2], mm[4][4]);

    freematrix(SIZE, m1);
    freematrix(SIZE, m2);
    freematrix(SIZE, mm);
    return(0);
}

예제 #13

0

파일 보기

파일: 039.c 프로젝트: tiehuis/euler

void calc_triple(triple *dst, triple *src)
{
#define mmult(src, x, y, z) (x*src->a + y*src->b + z*src->c)

    /* Calculate first triple */
    dst->a = mmult(src, 1,-2, 2);
    dst->b = mmult(src, 2,-1, 2);
    dst->c = mmult(src, 2,-2, 3);
    ++dst;
    
    /* Calculate second triple */
    dst->a = mmult(src, 1, 2, 2);
    dst->b = mmult(src, 2, 1, 2);
    dst->c = mmult(src, 2, 2, 3);
    ++dst;

    /* Calculate third triple */
    dst->a = mmult(src,-1, 2, 2);
    dst->b = mmult(src,-2, 1, 2);
    dst->c = mmult(src,-2, 2, 3);

#undef mmult
}

예제 #14

0

파일 보기

파일: geometry.c 프로젝트: BanditCat/CopyrightYoMamma

void mscale( mat* ma, f32 xs, f32 ys, f32 zs ){
  static mat m;
  u32 i, j;
  for( i = 0; i < 4; ++i )
    for( j = 0; j < 4; ++j )
      if( i == j )
        if( i == 3 )
          m.m[ i ][ j ] = 1;
        else if( i == 0 )
          m.m[ i ][ j ] = xs;
        else if( i == 1 )
          m.m[ i ][ j ] = ys;
        else
          m.m[ i ][ j ] = zs;
      else
        m.m[ i ][ j ] = 0;
  mmult( ma, &m );
}

예제 #15

0

파일 보기

파일: mmult.c 프로젝트: enee351/enee351.github.io

int main() {
	
	Matrix *A = init_matrix(3, 3);
	Matrix *B = init_matrix(3, 2);
	
	(A->values)[0][0] = 0;
	(A->values)[0][1] = 1;
	(A->values)[0][2] = 2;
	(A->values)[1][0] = 3;
	(A->values)[1][1] = 4;
	(A->values)[1][2] = 5;
	(A->values)[2][0] = 1;
	(A->values)[2][1] = 1;
	(A->values)[2][2] = 1;
	
	(B->values)[0][0] = 0;
	(B->values)[0][1] = 1;
	(B->values)[1][0] = 2;
	(B->values)[1][1] = 3;
	(B->values)[2][0] = 4;
	(B->values)[2][1] = 5;
	
	print_matrix(A);
	print_matrix(B);
	
	Matrix *C = mmult(A, B);
	print_matrix(C);
	
			
	destroy_matrix(A);
	destroy_matrix(B);
	destroy_matrix(C);
	
	
	
	
	
	
	
	return 0;
}

예제 #16

0

파일 보기

파일: nompmatrix.c 프로젝트: charlesxu90/Speedup-program-using-OpenMP-and-OpenACC-on-CUDA-and-Coprocessor

int main(int argc, char* argv[]) 
{
  if (argc < 4) {
    fprintf(stderr, "fail");
    exit(1);
  }

  int i, j, k;
  int I = atoi(argv[1]);
  int J = atoi(argv[2]);
  int K = atoi(argv[3]);
  double *A = calloc(sizeof(double), J * I);
  double *B = calloc(sizeof(double), K * J);
  double *C = calloc(sizeof(double), J * I);
  for (i = 0; i <  I; i++)
    for (k = 0; k < K; k++)
      A[i * K + k] = i + k;
  for (k = 0; k <  K; k++)
    for (j = 0; j < J; j++)
      B[j * K + k] = j + k;
  
  mmult(A, B, C, I, J, K);

  for (i = 0; i <  I; i++) {
    for (k = 0; k < K; k++)
      printf("%2g " , A[i * K + k]);
    printf("\n");
  }
  for (k = 0; k <  K; k++) {
    for (j = 0; j < J; j++)
      printf("%2g " , B[J * k + j]);
    printf("\n");
  }
  for (i = 0; i <  I; i++) {
    for (j = 0; j < J; j++)
      printf("%2g " , C[i * J + j]);
    printf("\n");
  }
    

}

예제 #17

0

파일 보기

파일: unrolling.c 프로젝트: recosfero/Par_Comp

void compare(double D[SIZE_M][SIZE_N])
{
  int i,j;
  double x = 10e-3;

  double E[SIZE_M][SIZE_N];

  mmult(A,B,E);

  for (i = 0; i < SIZE_M; i++) {
	    for (j = 0; j < SIZE_K; j++) {
          double diff = D[i][j] - E[i][j];
          if (fabs(diff) > x){
            fprintf(stderr,"FAILED:%d, %d \n", i,j);
            return;
           }
          
	    }
    }
    fprintf(stderr,"PASSED\n");
}

예제 #18

0

파일 보기

파일: cholesky.c 프로젝트: rafat2/codelite

static int rcholu(double *A,int N, int stride, double *U22) {
	int sc;
	int j,i,u,w;
	double u11;
	
	if (N == 1) {
		if (A[0] > 0) {
			A[0] = sqrt(A[0]);
			return 0;
		} else {
			return -1;
		}
	} else {
		if (A[0] < 0) {
			return -1;
		}
		u11 = sqrt(A[0]);
		A[0] = u11;
		for (j = 1; j < N;++j) {
			A[j] /= u11;
		}
		mmult(A+1,A+1,U22,N-1,1,N-1);
		for (i = 0; i < N-1; ++i) {
			u = stride + 1+ i * stride;
			w = i * (N-1);
			for(j = i; j < N-1;j++) {
				A[j + u] -= U22[j + w];
			}
		}
		
		sc = rcholu(A+stride+1,N-1,stride,U22);
		if (sc == -1) {
			return -1;
		}
		
	}
	
	return sc;
	
}

예제 #19

0

파일 보기

파일: canonical_expected.hpp 프로젝트: cadarso/mps

 static inline const Tensor
 do_string_order_many(const iTEBD<Tensor> &psi,
                      const Tensor &Opi, const Tensor &Opmiddle,
                      const Tensor &Opj, int N)
 {
   if (!psi.is_canonical()) {
     return do_string_order_many(psi.canonical_form(), Opi, Opmiddle, Opj, N);
   } else {
     Tensor v1 = psi.left_boundary(0);
     Tensor v2 = v1;
     Tensor output(N);
     Tensor nextv2;
     for (int site = 0; (site < N); ++site) {
       const Tensor &aux = psi.combined_matrix(site);
       Tensor v = propagate_right(v1, aux, site? Opj : mmult(Opi,Opj));
       if (nextv2.size()) {
         v2 = nextv2;
       } else {
         v2 = propagate_right(v2, aux);
       }
       if (!(site & 1)) {
         const Tensor &aux = psi.combined_matrix(site+1);
         nextv2 = propagate_right(v2, aux);
         v = propagate_right(v, aux);
         output.at(site) = trace(v) / trace(nextv2);
       } else {
         nextv2 = Tensor();
         output.at(site) = trace(v) / trace(v2);
       }
       if (site) {
         v1 = propagate_right(v1, aux, Opmiddle);
       } else {
         v1 = propagate_right(v1, aux, Opi);
       }
     }
     return output;
   }
 }

예제 #20

0

파일 보기

파일: Msqrt.c 프로젝트: 8l/csolve

msqrt(MINT *a, MINT *b, MINT *r)
{	MINT x,y,z;
	register alen,j;

	MINIT(&x); MINIT(&y); MINIT(&z);
	alen = a->len;

	if (alen<0) mpfatal("msqrt: neg arg");
	if (alen==0) {
		mset(0,b);
		mset(0,r);
		return(0);
	}

	if(alen & 01) x.len = (1+alen)/2;
	else x.len = 1 + alen/2;
	valloc(x.val,x.len);
	for (j=x.len; (--j)>=0;) x.val[j]=0;
	if (alen & 01) x.val[x.len-1]=0400;
	else x.val[x.len-1]=1;

	for (;;) {
		mdiv(a,&x,&y,&z);
		madd(&x,&y,&y);
		mshiftr(&y,1);
		if (mcmp(&x,&y) <= 0) break;
		mmove(&y,&x);
	}
	mcopy(&x,&y);
	mmult(&x,&x,&x);
	msub(a,&x,r);
	MFREE(&x);
	MMOVEFREE(&y,b);
	MFREE(&z);
	return(r->len);
}

예제 #21

0

파일 보기

파일: s3_block_vect.c 프로젝트: SahanGH/psi4public

/*
** S3_BLOCK_VECT()
**
** Calculate a block of the sigma3 vector in equation (9c) of
** Olsen, Roos, et al.  For diagonal blocks of sigma.
**
** currently assumes that (ij|ij)'s have not been halved
** Try to get the Olsen vector version working....again!!!!
*/
void s3_block_vect(struct stringwr *alplist, struct stringwr *betlist,
      double **C, double **S, double *tei, int nas, int nbs, int cnbs,
      int Ia_list, int Ja_list, int Jb_list, double **Cprime, double *F, 
      double *V, double *Sgn, int *L, int *R)
{
   struct stringwr *Ib;
   unsigned int Ib_ex;
   int ij, k, l, kl, I, J, RI;
   double tval, *CprimeI0, *CI0;
   int Ja_sym, Ia_sym;
   int ilen, Ib_idx, Jbcnt, *Iaij, *Ibij, *orbsym, norbs;
   unsigned int *Ibridx;
   signed char *Ibsgn;
   double *Tptr;

   norbs = CalcInfo.num_ci_orbs;
   orbsym = CalcInfo.orbsym + CalcInfo.num_fzc_orbs;

   /* assume fci for now */
   Ia_sym = Ia_list;
   Ja_sym = Ja_list;

   /* loop over k, l */
   for (k=0; k<norbs; k++) {
       for (l=0; l<=k; l++) {
           if ((orbsym[k] ^ orbsym[l] ^ Ja_sym ^ Ia_sym) != 0) continue;
           kl = ioff[k] + l;
           ilen = form_ilist(alplist, Ja_list, nas, kl, L, R, Sgn);
           
           if (!ilen) continue;

           Tptr = tei + ioff[kl];

           /* gather operation */
           for (I=0; I<ilen; I++) {
               CprimeI0 = Cprime[I];
               CI0 = C[L[I]];
               tval = Sgn[I];
               for (J=0; J<cnbs; J++) {
                   CprimeI0[J] = CI0[J] * tval;
               }
           }

           /* loop over Ib */
           for (Ib=betlist, Ib_idx=0; Ib_idx<nbs; Ib_idx++, Ib++) {

              zero_arr(F, cnbs);
           
               /* loop over excitations E^b_{ij} from |B(I_b)> */
               Jbcnt = Ib->cnt[Jb_list];
               Ibridx = Ib->ridx[Jb_list];
               Ibsgn = Ib->sgn[Jb_list];
               Ibij = Ib->ij[Jb_list];
               
               for (Ib_ex=0; Ib_ex < Jbcnt && (ij = *Ibij++)<=kl; Ib_ex++) {
                   J = *Ibridx++;
                   tval = *Ibsgn++;
                   if (ij == kl) tval *= 0.5;
                   F[J] += tval * Tptr[ij];
               }

               mmult(Cprime, 0, &F, 1, &V, 1, ilen, cnbs, 1, 0);

               for (I=0; I<ilen; I++) {
                   RI = R[I];
                   S[RI][Ib_idx] += V[I];
               }

           } /* end loop over Ib */

       } /* end loop over l */
   } /* end loop over k */
}

예제 #22

0

파일 보기

파일: cholesky.c 프로젝트: rafat2/codelite

static int rbcholu(double *A,int N, int stride, double *UB, double *UT) {
	int bs,bb,i,j,Nb,t,k,u,v,w,sc;
	double *b,*x,*U12,*U12T;
	double sum;
	
	bs = (int) BLOCKSIZE;
	bb = bs*bs;
	
	if (N <= BLOCKSIZE) {
		sc = rcholu(A,N,stride,UB);
		if (sc == -1) {
			return -1;
		}
	} else {
		Nb = N - bs;
		x = (double*) malloc(sizeof(double) * bs);
		b = (double*) malloc(sizeof(double) * bs);
		U12T = (double*) malloc(sizeof(double) * Nb * bs);
		U12 = (double*) malloc(sizeof(double) * Nb * bs);
		rcholu(A,bs,stride,UB); // U11
		
		for (i =0; i < bs;++i) {
			t = i *stride;
			u = 0;
			for(j = 0; j < N;++j) {
				UT[u+i] = A[j+t];
				u += bs;
			}
		}
		
		for(k = 0; k < Nb;++k) {
			u = k * bs;
			for(i = 0; i < bs;++i) {
				b[i] = UT[bb+u+i];
				x[i] = 0.;
			}
			for (i = 0; i < bs;++i) {
				t = i*bs;
				sum = 0;
				for (j = 0; j < i;++j) {
					sum += UT[t+j] * x[j];
				}
				x[i] = (b[i] - sum) / UT[t+i];
			}
			v = bs + k;
			for(i = 0; i < bs;++i) {
				A[v] = x[i];
				U12T[u+i] = x[i];
				v += stride;
			}
		}
		
		mtranspose(U12T,Nb,bs,U12);
		mmult(U12T,U12,UT,Nb,bs,Nb);
		free(U12T);
		free(U12);
		free(b);
		free(x);
		for (i = 0; i < Nb; ++i) {
			u = bs * stride + bs + i * stride;
			w = i * Nb;
			for(j = i; j < Nb;j++) {
				A[j + u] -= UT[j + w];
			}
		}
		
		sc = rbcholu(A + bs * stride + bs,Nb,stride,UB,UT);
		if (sc == -1) {
			return -1;
		}
	}
	
	return sc;
}

예제 #23

0

파일 보기

파일: dsa.c 프로젝트: dburger/archive

/*=============================================================================
Function main

Purpose:  this is the main entry point of the program.  It verifies the DSA
          computations that Dr. Peterson showed us and then I use DSA on some
          text of my own.
          
Parameters:
          not used
          
Returns:  nothing, DSA is demonstrated
=============================================================================*/
int main(int argc, char *argv[]) {

  UL y[32], r[32], qminus1[32], fminus1[32], pminus1[32], temp1[32], temp2[32],
    one[32], kprime[32], s[32], hcopy[32], res1[32], res2[32], w[32], u1[32],
    u2[32], v[32], mykprime[32];

  UL hash[32], hashcopy[32];
  struct hash *texthash;

  /* make a one to work with */
  zeroUL(one,32);
  one[31] = 1;

  /* first demonstrate that Dr. Peterson's example works as shown */

  printf("=========================================================\n");
  printf("Verifying Dr. Peterson's results:\n\n");

  mexp(g,x,y,p); /* here we compute public key y */

  printf("Public key y is:\n");
  fprintUL(stdout,y,32);

  /* and now determine r */
  mexp(g,k,r,p);
  mmult(one,r,r,q);

  printf("r is:\n");
  fprintUL(stdout,r,32);

  /* now calculate kprime */
  copyUL(q,qminus1,32);
  copyUL(f,fminus1,32);
  copyUL(p,pminus1,32);
  msub(qminus1,one,p);
  msub(fminus1,one,p);
  msub(pminus1,one,p);

  mmult(qminus1,fminus1,temp1,p);

  msub(temp1,one,p);

  mexp(k,temp1,kprime,q);

  printf("k' is:\n");
  fprintUL(stdout,kprime,32);

  /* now we get the s */
  mmult(x,r,temp1,q);
  madd(temp1,h,q);

  mmult(kprime,temp1,s,q);

  printf("s is:\n");
  fprintUL(stdout,s,32);

  /* calculate w=sprime */
  mmult(qminus1,fminus1,temp1,p);
  msub(temp1,one,p);
  
  mexp(s,temp1,w,q);

  printf("w is:\n");
  fprintUL(stdout,w,32);

  /* calculate u1 */
  mmult(h,w,u1,q);

  printf("u1 is:\n");
  fprintUL(stdout,u1,32);

  /* calculate u2 */
  mmult(r,w,u2,q);

  printf("u2 is:\n");
  fprintUL(stdout,u2,32);

  /* now calculate v */
  mexp(g,u1,temp1,p);
  mexp(y,u2,temp2,p);
  mmult(temp1,temp2,temp1,p);

  mmult(one,temp1,v,q);
                     
  printf("v is:\n");
  fprintUL(stdout,v,32);

  if (mcmp(r,v)==0)
    printf("v==r, Signature verifies!\n");
  else
    printf("v!=r, Signature does not verify!\n");
  printf("=========================================================\n\n");

  printf("Now working on some text of my own, I will sign the hash.\n\n");

  /* first get the hash we are going to sign */
  texthash = (struct hash *)malloc(sizeof(struct hash));

  sha1Mem(stuff,strlen(stuff),texthash);

  zeroUL(hash,32);
  hash[31] = texthash->H4;
  hash[30] = texthash->H3;
  hash[29] = texthash->H2;
  hash[28] = texthash->H1;
  hash[27] = texthash->H0;

  printf("Hash that we will sign:\n");
  fprintUL(stdout,hash,32);

  printf("Determining if chosen myk is relatively prime to q...\n");
  
  mexp(myk,qminus1,temp1,q);

  if (mcmp(temp1,one)==0)
    printf("myk is relatively prime to q!\n\n");
  else {
    printf("Chosen myk is not relatively prime to q, please \
            choose another myk.\n");
    exit(0);
  }

  mexp(g,x,y,p); /* here we compute public key y */

  printf("Public key y is:\n");
  fprintUL(stdout,y,32);

  /* and now determine r */
  mexp(g,myk,r,p);
  mmult(one,r,r,q);

  printf("r is:\n");
  fprintUL(stdout,r,32);

  /* calculate mykprime */
  copyUL(qminus1,temp1,32);
  msub(temp1,one,q);
  mexp(myk,temp1,mykprime,q);

  /* calculate s */
  mmult(x,r,temp1,q);
  madd(temp1,hash,q);

  mmult(mykprime,temp1,s,q);

  printf("s is:\n");
  fprintUL(stdout,s,32);

  /* calculate w=sprime */
  mmult(qminus1,fminus1,temp1,p);
  msub(temp1,one,p);
  
  mexp(s,temp1,w,q);

  printf("w is:\n");
  fprintUL(stdout,w,32);

  /* calculate u1 */
  mmult(hash,w,u1,q);

  printf("u1 is:\n");
  fprintUL(stdout,u1,32);

  /* calculate u2 */
  mmult(r,w,u2,q);

  printf("u2 is:\n");
  fprintUL(stdout,u2,32);

  /* now calculate v */
  mexp(g,u1,temp1,p);
  mexp(y,u2,temp2,p);
  mmult(temp1,temp2,temp1,p);

  mmult(one,temp1,v,q);
                     
  printf("v is:\n");
  fprintUL(stdout,v,32);

  if (mcmp(r,v)==0)
    printf("v==r, Signature verifies!\n");
  else
    printf("v!=r, Signature does not verify!\n");

}

예제 #24

0

파일 보기

파일: main.c 프로젝트: rafat2/codelite

int main(int argc, char **argv)
{
	int i,j,N,row,col,n,k,q;
	double *P,*bvec,*R,*Q;
	N = 4;
	double b;
	b = 1.;
	row = 3;
	col = 3;
	P = (double*) malloc(sizeof(double) * N * N);
	//AA = (double*) malloc(sizeof(double) * row * col);
	bvec = (double*) malloc(sizeof(double) * col);
	R = (double*) malloc(sizeof(double) * col *col);
	Q = (double*) malloc(sizeof(double) * row * col);
	double x[4] = {3,1,5,1};
	double v[4] = {0,0,0,0};
	double AA[9] = {12,-51,4,6,167,-68,-4,24,-41};
	printf("b %lf \n", b);
	b = house(x,N,v);
	printf("beta %lf \n",b);
	
	for(i =0;i < N;++i) {
		printf("v %lf \n",v[i]);
	}
	housemat(v,N,b,P);
	mdisplay(P,N,N);
	
	mmult(P,x,v,N,N,1);
	mdisplay(v,N,1);
	
	
	qr_house(AA,row,col,bvec);
	mdisplay(AA,row,col);
	getQR_house(AA,row,col,bvec,Q,R);
	mdisplay(R,col,col);
	mdisplay(Q,row,col);
	
	mmult(Q,R,AA,row,col,col);
	mdisplay(AA,row,col);
	double A[9] = {1,5,7,3,0,6,4,3,1};
	//double A[16] = {4,1,-1,2,1,4,1,-1,-1,1,4,1,2,-1,1,4};
	double H[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
	double eigre[3] = {0,0,0,0};
	double eigim[3] = {0,0,0,0};

	/*
	francis_iter(A,4,H);
	mdisplay(H,4,4);
	double eigre[2] = {0,0};
	double eigim[2] = {0,0};
	eig22(H,4,eigre,eigim);
	 * 
	printf("e0 %lf %lf \n",eigre[0],eigim[0]);
	printf("e1 %lf %lf \n",eigre[1],eigim[1]);
	 */
	eig(A,3,eigre,eigim);
	
	for(i=0; i < 3;++i) {
		printf("e%d %g %g \n",i,eigre[i],eigim[i]);
	}
	
	free(P);
	//free(AA);
	free(bvec);
	free(R);
	free(Q);
	
	return 0;
}

예제 #25

0

파일 보기

파일: transform.c 프로젝트: kasicass/glsim

void
__glcore_transform_vertices (GLcontext *g)
{
    GLrenderstate *r = g->renderstate;
    GL_vertex *verts = r->verts;
    GL_procvert *procverts = r->procverts;
    int i;

    GL_float modelview[4][4];
    GL_float projection[4][4];
    GL_float texture[4][4];
    GL_float composite[4][4];
    GL_float invmodelview[4][4];

    minit(modelview, g->trans.modelview[g->trans.modelviewdepth]);
    minit(projection, g->trans.projection[g->trans.projectiondepth]);
    minit(texture, g->trans.texture[g->trans.texturedepth]);    
    mmult(composite, projection, modelview);
    minvtrans(invmodelview, modelview);

    for (i = 0; i < r->nverts; i++) {
	/* position */
	mmultv(procverts[i].position, composite, verts[i].position);

	/* eye position */
	mmultv(procverts[i].eyeposition, modelview, verts[i].position);

	/* color */
	if (g->lighting.lighting) {
	    GL_float objnormal[4];
	    GL_float normal[4];

	    /* object space normal */
	    vcopy(objnormal, verts[i].normal);
	    objnormal[3] = 0.0f;
	    if (verts[i].position[3] != 0.0f) {
		objnormal[3] = -vdot3(objnormal, verts[i].position);
		objnormal[3] /= verts[i].position[3];
	    }

	    /* eye space normal */
	    mmultv(normal, invmodelview, objnormal);
	    if (g->current.normalize)
		vnorm3(normal, normal);

	    /* front color */
	    compute_lighting(g, procverts[i].frontcolor,
			     procverts[i].eyeposition, normal,
			     &verts[i].frontmaterial);

	    /* back color */
	    if (g->lighting.lightmodeltwoside) {
		vscale(normal, normal, -1.0f);
		compute_lighting(g, procverts[i].backcolor,
				 procverts[i].eyeposition, normal,
				 &verts[i].backmaterial);
	    }
	}
	else {
	    vcopy(procverts[i].frontcolor, verts[i].color);
	    vcopy(procverts[i].backcolor, verts[i].color);
	}
	vclamp(procverts[i].frontcolor, procverts[i].frontcolor, 0.0f, 1.0f);
	vclamp(procverts[i].backcolor, procverts[i].backcolor, 0.0f, 1.0f);

	/* no texture coordinate generation */

	/* texture coords */
	mmultv(procverts[i].texcoord, texture, verts[i].texcoord);
    }
}

예제 #26

0

파일 보기

파일: s3_vector.c 프로젝트: SahanGH/psi4public

/*
** calc_sigma3(): Calculate the sigma3 vector in equation (9c) of
**    Olsen, Roos, et. al.
**
** Modified 4/8/94 to make C and s one-dimensional
** Modified 11/18/94 for virtual scatter/gather method
** Warning to C neophytes: C is case-sensitive! (e.g. s is not S)
**
*/
void calc_sigma3(struct stringwr *slist, double **C, double **s,
      double *tei, int nas, int nbs, 
      unsigned int *bfora, unsigned int *bfirst)
{

   struct stringwr *Ib ;
   int Ia_sym, Ja_sym, Ib_sym, Jb_sym, Ib_idx, Jb_idx;
   int Ib_offset, Ib_end, Jb_end, *Ibij;
   int k, l, ij, kl, klsym;
   int ioffk, Inum, I;
   unsigned int Jbcnt, *Ibridx, Ib_ex;
   signed char *Ibsgn;
   double Jb_sgn;

   static unsigned int *L, LI, *L0 = NULL;
   static unsigned int *R, RI, *R0 = NULL;
   static int *S, *S0 = NULL;  /* hmmm... */
   double tsgn;
   static double *F0 = NULL;
   static double *V = NULL;
   static double **Cprime = NULL;
   double *Tptr;
   int *orbsym;

   orbsym = CalcInfo.orbsym + CalcInfo.num_fzc_orbs;

   if (F0 == NULL) F0 = init_array(nbs);
   if (Cprime == NULL) {
      Cprime = init_matrix(nas, nbs);
      }

   if (V == NULL) V = init_array(nas);
   if (L0 == NULL) L0 = (unsigned int *) malloc(nas * sizeof(unsigned int));
   if (R0 == NULL) R0 = (unsigned int *) malloc(nas * sizeof(unsigned int));
   if (S0 == NULL) S0 = (int *) malloc (nas * sizeof(int));

   /* set up list L(I), R(I), and S(I) */
   for (Ia_sym=0; Ia_sym < CalcInfo.nirreps; Ia_sym++) {
      for (Ja_sym=0; Ja_sym < CalcInfo.nirreps; Ja_sym++) {
         Jb_sym = CalcInfo.ref_sym ^ Ja_sym;
         Jb_end = CalcInfo.bsymnum[Jb_sym];
 
         for (k=0; k<CalcInfo.num_ci_orbs; k++) {
            ioffk = ioff[k];
            for (l=0,kl=ioffk; l<=k; l++,kl++) {
            klsym = orbsym[k] ^ orbsym[l]; 
            Tptr = tei + ioff[kl];
            Inum = form_ilist(Ia_sym, slist, L0, R0, S0, Ja_sym, klsym, kl);
            if (!Inum) continue;

            /* gathering operation to form C' */
            for (I=0,L=L0,S=S0; I<Inum; I++) {
               LI = *L++;
               tsgn = *S++;               
               for (Jb_idx=0; Jb_idx < Jb_end; Jb_idx++) {
                  Cprime[I][Jb_idx] = C[LI][Jb_idx] * tsgn;
                  }
               }

            /* loop over Ib */
            Ib_sym = CalcInfo.ref_sym ^ Ia_sym;
            Ib_offset = CalcInfo.bsymst[Ib_sym];
            Ib_end = CalcInfo.bsymnum[Ib_sym];
            for (Ib_idx=0,Ib=slist+Ib_offset; Ib_idx<Ib_end; Ib_idx++,Ib++) {
               zero_arr(F0, nbs);
                
               /* loop over excitations E^{b}_{ij} from |B(I_b)> */
               Jbcnt = Ib->cnt[Jb_sym][klsym];
               Ibridx = Ib->ridx[Jb_sym][klsym];
               Ibsgn = Ib->sgn[Jb_sym][klsym];
               Ibij = Ib->ij[Jb_sym][klsym];
 
               for (Ib_ex=0; Ib_ex < Jbcnt && (ij = *Ibij++)<=kl; Ib_ex++) {
                  Jb_idx = *Ibridx++;
                  Jb_sgn = (double) *Ibsgn++;
                  F0[Jb_idx] += Jb_sgn * Tptr[ij];
                  }

               /* V(I) = \Sum{J_b} F(J_b) * C'(I, J_b) */
               mmult(Cprime, 0, &F0, 1, &V, 1, Inum, Jb_end, 1, 0);

               /* vectorized scattering */
               R = R0;
               for (I=0,R=R0; I<Inum; I++) {
                  RI = *R++;
                  s[RI][Ib_idx] += V[I];
                  }

               } /* end loop over Ib */

            } /* end loop over l */
         } /* end loop over k */
      } /* end loop over Ja_sym */
   } /* end loop over Ia_sym */

}

예제 #27

0

파일 보기

파일: newton1d.c 프로젝트: rafat2/codelite

int lnsrch(double (*funcpt)(double *,int),double *xi,double *jac,double *p,int N,double maxstep,double * dx,double stol,double *x) {
	int retval,i,j;
	double alpha,lambda,lambdamin,funcf,funci,lambdaprev,lambdatemp,funcprev;
	double lambda2,lambdaprev2,ll,den,rell;
	double *slopei,*temp1,*temp2,*ab,*rcheck;
	
	slopei = (double*) malloc(sizeof(double) *1);
	temp1 = (double*) malloc(sizeof(double) *4);
	temp2 = (double*) malloc(sizeof(double) *2);
	ab = (double*) malloc(sizeof(double) *2);
	rcheck = (double*) malloc(sizeof(double) *N);
	retval = 100;
	alpha = 1e-04;
	lambda = 1.0;
	
	mmult(jac,p,slopei,1,N,1);
	
	for(i = 0; i < N;++i) {
		if (fabs(xi[i]) > 1.0 /fabs(dx[i])) {
			den = fabs(xi[i]);
		} else {
			den = 1.0 /fabs(dx[i]);
		}
		rcheck[i] = p[i]/den;
	}
	
	rell = array_max_abs(rcheck,N);
	
	lambdamin = stol/rell;
	
	while (retval > 1) {
		scale(p,1,N,lambda);
		madd(xi,p,x,1,N);
		funcf = funcpt(x,N);
		funci = funcpt(xi,N);
		
		if (funcf <= funci + alpha *lambda *slopei[0]) {
			retval = 0;
		} else if (lambda < lambdamin) {
			retval = 1;
			for (i = 0; i < N;++i) {
				x[i] = xi[i]; // Check
			}
		} else {
			if (lambda == 1.0) {
				lambdatemp = - slopei[0] / (funcf - funci - slopei[0]); 
			} else {
				lambda2 = lambda * lambda;
				lambdaprev2 = lambdaprev * lambdaprev;
				ll = lambda - lambdaprev;
				temp1[0] = 1.0 / lambda2; temp1[1] = -1.0 /lambdaprev2;
				temp1[2] = - lambdaprev / lambda2; temp1[3] = lambda /lambdaprev2;
				temp2[0] = funcf - funci - lambda * slopei[0];
				temp2[1] = funcprev - funci - lambdaprev * slopei[0];
				mmult(temp1,temp2,ab,2,2,1);
				scale(ab,1,2,ll);
				if (ab[0] == 0.0) {
					lambdatemp = - slopei[0] / (2.0 * ab[1]);
				} else {
					lambdatemp = (-ab[1] + sqrt( ab[1] * ab[1] - 3.0 * ab[0] *slopei[0]))/ (3.0 * ab[0]);
				}
				
				if (lambdatemp > 0.5 * lambda) {
					lambdatemp = 0.5 * lambda;
				}
			}
			lambdaprev = lambda;
			funcprev = funcf;
			if (lambdatemp <= 0.1 * lambda) {
				lambda = 0.1 * lambda;
			} else {
				lambda = lambdatemp;
			}
		}
	
	}
	
	free(slopei);
	free(temp1);
	free(temp2);
	free(ab);
	free(rcheck);
	return retval;
}

예제 #28

0

파일 보기

파일: Main.c 프로젝트: DDCSF/repa

int main(int argc, char** argv)
{
	// Argument parsing.
	Matrix*	mat[2];
	int 	matNum			= 0;
	int	dumpInputMatrices	= 0;
	char*	outFileName		= 0;
	
	for(int a = 1; a < argc; ) {
		if(   (strcmp(argv[a], "-random") == 0) 
		   && (a + 2  < argc)
		   && (matNum < 2))
		{
			a++;
			
			int width	= 0;
			int height	= 0;

			if(sscanf(argv[a++], "%d", &width) != 1) {
				printf("mmult: can't parse matrix width\n");
				exit(1);
			}

			if(sscanf(argv[a++], "%d", &height) != 1) {
				printf("mmult: can't parse matrix height\n");
				exit(1);
			}
			
			mat[matNum++]	= newRandomMatrix (width, height);
		}
		else if (  (strcmp(argv[a], "-out") == 0)
			&& (a + 1 < argc))
		{
			a++;
			outFileName	= argv[a++];
		}
		else if (  strcmp(argv[a], "-dumpinput") == 0)
		{
			a++;
			dumpInputMatrices	= 1;
		}

		else	badUsage();
	}

	if (matNum != 2)
		badUsage();


	// Alloc the destination matrix.
	Matrix* matDest	= newZeroMatrix   (mat[1]->width, mat[0]->height);
	
	// Do the dead.
	struct benchtime *bt = bench_begin();

	mmult(matDest, mat[0], mat[1]);

	bench_done(bt);

	// Write out matrices as files, if we were asked for them
	if (dumpInputMatrices) {
		char name[80];

		snprintf(name, 80, "input1-%dx%d.mat", mat[0]->width, mat[0]->height);
		writeMatrixAsTextFile(name, mat[0]);

		snprintf(name, 80, "input2-%dx%d.mat", mat[1]->width, mat[1]->height);
		writeMatrixAsTextFile(name, mat[1]);
	}

	if(outFileName != 0)
		writeMatrixAsTextFile(outFileName, 	matDest);

	// Dump checksum
	printf("sum = %f\n", sumMatrix(matDest));
}