예제 #1
0
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleElemSolverAlgorithm::execute()
{
  stk::mesh::BulkData & bulk_data = realm_.bulk_data();

  // set any data
  const size_t activeKernelsSize = activeKernels_.size();
  for ( size_t i = 0; i < activeKernelsSize; ++i )
    activeKernels_[i]->setup(*realm_.timeIntegrator_);

  run_algorithm(bulk_data, [&](SharedMemData& smdata)
  {
      set_zero(smdata.simdrhs.data(), smdata.simdrhs.size());
      set_zero(smdata.simdlhs.data(), smdata.simdlhs.size());

      // call supplemental; gathers happen inside the elem_execute method
      for ( size_t i = 0; i < activeKernelsSize; ++i )
        activeKernels_[i]->execute( smdata.simdlhs, smdata.simdrhs, smdata.simdPrereqData );

      for(int simdElemIndex=0; simdElemIndex<smdata.numSimdElems; ++simdElemIndex) {
        extract_vector_lane(smdata.simdrhs, simdElemIndex, smdata.rhs);
        extract_vector_lane(smdata.simdlhs, simdElemIndex, smdata.lhs);
        apply_coeff(nodesPerEntity_, smdata.elemNodes[simdElemIndex],
                    smdata.scratchIds, smdata.sortPermutation, smdata.rhs, smdata.lhs, __FILE__);
      }
  });
}
예제 #2
0
void init_decod_ld8a(struct dec_state_t * state)
{
  /* Initialize static pointer */

  state->exc = state->old_exc + PIT_MAX + L_INTERPOL;

  /* Static vectors to zero */

  set_zero(state->old_exc, PIT_MAX+L_INTERPOL);
  set_zero(state->mem_syn, M);

  state->sharp  = SHARPMIN;
  state->old_t0 = 60;
  state->gain_code = (F)0.0;
  state->gain_pitch = (F)0.0;

  lsp_decw_reset(&state->lsp_s);

  init_exc_err(state->cng_s.exc_err); // ?

  copy(lsp_reset, state->lsp_old, M);

  /* for G.729B */
  state->seed_fer = 21845;
  state->past_ftyp = 1;
  state->seed = INIT_SEED;
  state->sid_sav = (F)0.;
  init_lsfq_noise(&state->cng_s.lsfq_s); // ?

  gain_past_reset(&state->gain_s);

  state->bad_lsf = 0;          /* Initialize bad LSF indicator */
}
예제 #3
0
double chiral_condensate()
{
 complex double q1, q2;
 int i0 = 0;
 q1 = 0.0 + I*0.0;
 q2 = 0.0 + I*0.0;

 //To calculate D^{-1}(x,x), we invert solve the equation D R  = S
 //Where the source S is only nonzero at x and for different spinor components
 //The required number of sources is the number of spinor components
 // Source 1
 set_zero(S0);
 S0[i0].s1 = 1.0 + I*0.0;
 gam5D_wilson(S, S0);
 cg(R1, S, ITER_MAX, DELTACG, &gam5D_SQR_wilson); //Inverting the Dirac operator on source 1
 q1 += R1[i0].s1;
 // Source 2
 set_zero(S0);
 S0[i0].s2 = 1.0 + I*0.0;
 gam5D_wilson(S, S0);
 cg(R2, S, ITER_MAX, DELTACG, &gam5D_SQR_wilson); //Inverting the Dirac operator on source 2
 q2 += R2[i0].s2;
 
 if(fabs(cimag(q1 - q2))>sqrt(DELTACG))
 {
  printf("\n Imaginary part of chiral condensate detected!!! \n"); 
 };
 //q1 and q2 are the diagonal components (11 and 22) of the propagator
 //q1 - q2 is tr(gamma_5 D^-1)
 return creal(q1 - q2);
}
예제 #4
0
 void ARingZZGMP::syzygy(const ElementType& a, const ElementType& b,
                      ElementType& x, ElementType& y) const
 {
   M2_ASSERT(!is_zero(b));
   // First check the special cases a = 0, b = 1, -1.  Other cases: use gcd.
   if (is_zero(a))
     {
       set_from_long(x, 1);
       set_zero(y);
       return;
     }
   if (mpz_cmp_ui(&b,1) == 0)
     {
       set_from_long(x, 1);
       negate(y, a);
       return;
     }
   if (mpz_cmp_si(&b,-1) == 0)
     {
       set_from_long(x, 1);
       set(y, a);
       return;
     }
   elem g;
   init(g);
   mpz_gcd(&g,&a,&b);
   divide(y,a,g);
   divide(x,b,g);
   if (mpz_sgn(&x) > 0)
     negate(y,y);
   else
     negate(x,x);
   clear(g);
 }
예제 #5
0
 void invert(ElementType& result, const ElementType& a) const
 {
   if (is_unit(a))
     set(result, a);
   else
     set_zero(result);
 }
예제 #6
0
파일: main.C 프로젝트: waterret/Qlattice
void displaySpinPropagator4d()
{
  TIMER("displaySpinPropagator4d");
  // qlat::Coordinate total_site(16, 16, 16, 32);
  qlat::Coordinate total_site(4, 4, 4, 8);
  qlat::Geometry geo;
  geo.init(total_site, 1);
  qlat::DisplayInfo(cname, fname, "geo =\n%s\n", qlat::show(geo).c_str());
  std::array<double, qlat::DIMN> momtwist;
  momtwist[0] = 0.0;
  momtwist[1] = 0.0;
  momtwist[2] = 0.0;
  momtwist[3] = 0.0;
  const double mass = 0.1;
  qlat::SpinPropagator4d prop;
  prop.init(geo);
  set_zero(prop);
  qlat::Coordinate xgsrc(0, 0, 0, 0);
  qlat::Coordinate xlsrc = geo.coordinate_l_from_g(xgsrc);
  if (geo.is_local(xlsrc)) {
    qlat::set_unit(prop.get_elem(xlsrc));
  }
  qlat::prop_spin_propagator4d(prop, mass, momtwist);
  qlat::Coordinate xgsnk(0, 0, 0, 0);
  qlat::Coordinate xlsnk = geo.coordinate_l_from_g(xgsnk);
  qlat::DisplayInfo(cname, fname, "xgsnk = %s .\n", qlat::show(xgsnk).c_str());
  if (geo.is_local(xlsnk)) {
    qlat::Display(cname, fname, "prop[xgsnk] =\n%s\n",
                  qlat::show(prop.get_elem(xlsnk)).c_str());
  }
}
예제 #7
0
파일: ddr.c 프로젝트: eagle860/bare
int ddr_init( void )
{
	// tell dramc to configure				
	set_val( P1MEMCCMD, 0x4 );

	// set refresh period	
	set_val( P1REFRESH, nstoclk(7800) );

	// set timing para		
	set_val( P1CASLAT, ( 3 << 1 ) );  
	set_val( P1T_DQSS, 0x1 );	// 0.75 - 1.25
	set_val( P1T_MRD, 0x2 );
	set_val( P1T_RAS, nstoclk(45) );
	set_val( P1T_RC, nstoclk(68) );		

	unsigned int trcd = nstoclk( 23 );
	set_val( P1T_RCD, trcd | (( trcd - 3 ) << 3 ) );
	unsigned int trfc = nstoclk( 80 );
	set_val( P1T_RFC, trfc | ( ( trfc-3 ) << 5 ) );   
	unsigned int trp = nstoclk( 23 );
	set_val( P1T_RP, trp | ( ( trp - 3 ) << 3 ) ); 
	set_val( P1T_RRD, nstoclk(15) );
	set_val( P1T_WR, nstoclk(15) );
	set_val( P1T_WTR, 0x7 );
	set_val( P1T_XP, 0x2 );
	set_val( P1T_XSR, nstoclk(120) );
	set_val( P1T_ESR, nstoclk(120) );
	
	// set mem cfg 
	set_nbit( P1MEMCFG, 0, 3, 0x2 );  /* 10 column address */

	/* set_nbit: 把从第bit位开始的一共len位消零,然后把这几位设为val */
	
	set_nbit( P1MEMCFG, 3, 3, 0x3 );  /* 14 row address */
	set_zero( P1MEMCFG, 6 );		  /* A10/AP */
	set_nbit( P1MEMCFG, 15, 3, 0x2 ); /* Burst 4 */
	
	set_nbit( P1MEMCFG2, 0, 4, 0x5 );
	set_2bit( P1MEMCFG2, 6, 0x1 );		/* 32 bit */
	set_nbit( P1MEMCFG2, 8, 3, 0x3 );	/* Mobile DDR SDRAM */
	set_2bit( P1MEMCFG2, 11, 0x1 );

	set_one( P1_chip_0_cfg, 16 );		/* Bank-Row-Column organization */

	// memory init
	set_val( P1DIRECTCMD, 0xc0000 ); // NOP
	set_val( P1DIRECTCMD, 0x000 );	// precharge
	set_val( P1DIRECTCMD, 0x40000 );// auto refresh
	set_val( P1DIRECTCMD, 0x40000 );// auto refresh
	set_val( P1DIRECTCMD, 0xa0000 ); // EMRS
	set_val( P1DIRECTCMD, 0x80032 ); // MRS

	set_val( MEM_SYS_CFG, 0x0 );
					
	// set dramc to "go" status	
	set_val( P1MEMCCMD, 0x000 );

	// wait ready
	while( !(( read_val( P1MEMSTAT ) & 0x3 ) == 0x1));
}
예제 #8
0
파일: fir~.c 프로젝트: CNMAT/CNMAT-Externs
void *fir_new(long n)
{
	int i;
	
	t_fir *x = (t_fir *)newobject(fir_class);
	t_float *coefs = x->f_coefs;
	t_float *ff = x->f_ff;
	
	dsp_setup((t_pxobject *)x, 1);
	outlet_new((t_object *)x, "signal");
	
	x->f_length = n + 1;
	set_zero(coefs, MAXSIZE);
	set_zero(ff, MAXSIZE);
	return (x);
}
예제 #9
0
void make_vector_zeroes( container & vec, const typename container::size_type & d1)
{
	vec.resize(d1);
	for(auto it=vec.begin(); it!=vec.end(); ++it)
	{
		set_zero(*it);
	}
}
예제 #10
0
파일: aring-qq-gmp.hpp 프로젝트: pzinn/M2
 bool invert(ElementType& result,const ElementType& a) const {
     if (is_unit(a))
     {
         mpq_inv(&result, &a);
         return true;
     }
     set_zero(result);
     return false;
 }
예제 #11
0
/*--------------------------------------------------------------------------
 * init_decod_ld8k - Initialization of variables for the decoder section.
 *--------------------------------------------------------------------------
 */
void init_decod_ld8k(void)
{
    /* Initialize static pointer */
    exc    = old_exc + PIT_MAX + L_INTERPOL;

    /* Static vectors to zero */
    set_zero(old_exc,PIT_MAX + L_INTERPOL);
    set_zero(mem_syn, M);

    sharp = SHARPMIN;
    old_t0 = 60;
    gain_code = (F)0.;
    gain_pitch = (F)0.;

    lsp_decw_reset();

    return;
}
예제 #12
0
 void power(ElementType& result, const ElementType& a, int n) const
 {
   if (is_zero(a))
     set_zero(result);
   else if (n < 0)
     {
       invert(result, a);
       fq_zech_pow_ui(&result, &result, -n, mContext);
     }
   else
     fq_zech_pow_ui(&result, &a, n, mContext);
 }
예제 #13
0
int drw_sudoku()
{
	char *ptr;

	//const char ptr[40];
	int i,j,row=0,col=0;
	//int b[9][9]={{0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0}};
	int b[9][9];
	for(i=0;i<9;i++)
	{
		for(j=0;j<9;j++)
		{       if(row==col) b[i][j]=3;
			else	b[i][j]=-1;
			++col;
		}
		++row;
	}
	setcolor(2);  //G
	settextstyle(DEFAULT_FONT,VERT_DIR,4);
	outtextxy(110,100,"SU-DOKU");
	settextstyle(DEFAULT_FONT,HORIZ_DIR,4);
	outtextxy(130,60,"SU-DOKU");
	setcolor(4);  //R
	rectangle(154,119,346,311);
	setcolor(15); //W
	rectangle(157,122,343,308);
//1*1 GRID:
//cout<<b[0][0];
	row=0;col=0;
	settextstyle(DEFAULT_FONT,HORIZ_DIR,1);
	for(i=160;i<=320;i=i+20)
	{
		for(j=125;j<=285;j=j+20)
		{	rectangle(i,j,i+20,j+20);
			if(row==col)
			{       set_zero(i+3,i+17,j+3,j+17,11);
				setcolor(5);
				sprintf(ptr,"%d",b[row][col]);//**ptr=b[row][col];
				outtextxy(i+7,j+7,ptr);
			}
			setcolor(15);
			++row;
		}
		++col;
	}
//3*3 GRID:
	setcolor(4);  //R
	for(i=160;i<=320;i=i+60)
	{ 	for(j=125;j<=285;j=j+60)
			rectangle(i,j,i+60,j+60);
	}
return 0;
}
예제 #14
0
/*--------------------------------------------------------------------------
* init_decod_ld8c - Initialization of variables for the decoder section.
*--------------------------------------------------------------------------
*/
void init_decod_ld8c(void)
{
    /* Initialize static pointer */
    exc = old_exc + PIT_MAX + L_INTERPOL;
    
    /* Static vectors to zero */
    set_zero(old_exc, PIT_MAX+L_INTERPOL);
    set_zero(mem_syn, M_BWD);
    
    sharp        = SHARPMIN;
    prev_t0      = 60;
    prev_t0_frac = 0;
    gain_code    = (F)0.;
    gain_pitch   = (F)0.;
    
    lsp_decw_resete(freq_prev, prev_lsp, &prev_ma);
    
    set_zero(A_bwd_mem, M_BWDP1);
    set_zero(A_t_bwd_mem, M_BWDP1);
    A_bwd_mem[0]   = (F)1.;
    A_t_bwd_mem[0] = (F)1.;
    
    prev_voicing = 0;
    prev_bfi     = 0;
    prev_lp_mode    = 0;
    c_fe     = (F)0.;
    c_int        = (F)1.1;       /* Filter interpolation parameter */
    set_zero(prev_filter, M_BWDP1);
    prev_filter[0] = (F)1.;
    prev_pitch     = 30;
    stat_pitch     = 0;
    set_zero(old_A_bwd, M_BWDP1);
    set_zero(rexp, M_BWDP1);
    old_A_bwd[0]   = (F)1.;
    set_zero(old_rc_bwd, 2);
    gain_pit_mem   = (F)0.;
    gain_cod_mem   = (F)0.;
    c_muting       = (F)1.;
    count_bfi      = 0;
    stat_bwd       = 0;
    
    /* for G.729B */
    seed_fer = (INT16)21845;
    past_ftyp = 3;
    seed = INIT_SEED;
    sid_sav = (FLOAT)0.;
    init_lsfq_noise();
    
    return;
}
예제 #15
0
	vector_zeroer(container & vec, const other_container & other_vec)
	{
		if(!other_vec.empty())
		{
			vec.resize(1);
			set_zero(vec.front());
			vec.resize(other_vec.size(),vec.front());
		}
		else
		{
			vec.resize(0);
		}
	}
void fix_step_sigmas( T & v_sigmas, const T & v_min, const T & v_max )
{
	typedef typename std::decay<decltype(v_sigmas.get_head())>::type value_type;

	value_type zero;
	set_zero(zero);

	if((v_sigmas.get_head() <= zero) || (v_sigmas.get_head() <= v_max.get_head() - v_min.get_head() ))
		v_sigmas.get_head() = (v_max.get_head() - v_min.get_head())/10.;

	fix_step_sigmas(v_sigmas.get_tail(),v_min.get_tail(),v_max.get_tail());

	return;
}
void fix_step_sigmas( T & v_sigmas )
{
	typedef typename std::decay<decltype(v_sigmas.get_head())>::type value_type;

	value_type zero;
	set_zero(zero);

	if(v_sigmas.get_head() <= zero)
		v_sigmas.get_head() = units_cast<value_type>(1.);

	fix_step_sigmas(v_sigmas.get_tail());

	return;
}
void fix_step_sigmas( const T & v_sigmas )
{
	typedef typename std::decay<decltype(v_sigmas[0])>::type value_type;

	value_type zero;
	set_zero(zero);

	for(int_type i=0; i<ssize(v_sigmas); ++i)
	{
		if( v_sigmas[i]<=zero )
			v_sigmas[i] = units_cast<value_type>(1.);
	}

	return;
}
void fix_step_sigmas( T & v_sigmas, const T & v_min, const T & v_max )
{
	assert(ssize(v_min)==ssize(v_max));

	typedef typename std::decay<decltype(v_sigmas[0])>::type value_type;

	value_type zero;
	set_zero(zero);

	if(ssize(v_sigmas) != ssize(v_min))
	{
		v_sigmas.resize(ssize(v_min));
		for(auto & v : v_sigmas)
			set_zero(v);
	}

	for(int_type i=0; i<ssize(v_sigmas); ++i)
	{
		if( (v_sigmas[i]<=zero) || (v_sigmas[i] >= v_max[i]-v_min[i]))
			v_sigmas[i] = (v_max[i]-v_min[i])/10.;
	}

	return;
}
예제 #20
0
파일: MemsetTest.cpp 프로젝트: ghub/NVprSDK
static void test_32(skiatest::Reporter* reporter) {
    uint32_t buffer[TOTAL];
    
    for (int count = 0; count < MAX_COUNT; ++count) {
        for (int alignment = 0; alignment < MAX_ALIGNMENT; ++alignment) {
            set_zero(buffer, sizeof(buffer));
            
            uint32_t* base = &buffer[PAD + alignment];
            sk_memset32(base, VALUE32, count);
            
            compare32(buffer,       0,       PAD + alignment);
            compare32(base,         VALUE32, count);
            compare32(base + count, 0,       TOTAL - count - PAD - alignment);
        }
    }
}
예제 #21
0
파일: test.c 프로젝트: majorana/QED3
void fprint_fermion_mat() {
	int i, j;
	complex double x;
	complex double basis[GRIDPOINTS];
	complex double out[GRIDPOINTS];
	complex double temp[GRIDPOINTS];
	FILE *fp;
	
	fp = fopen("fmat_real.dat", "w");
	
	printf("\n Output fermion determinant...\n");
	set_zero(basis);
	for(i = 0; i<GRIDPOINTS; i++) 
	{
		basis[i] = 1.0;
		fermion_fp(out, temp, basis);
		//printf("{");
		for(j = 0; j < GRIDPOINTS-1; j++)
		{
			x = out[j];
			fprintf(fp, "%f  ", creal(x));
		}
		fprintf(fp, "%f\n", creal(out[GRIDPOINTS-1]));
		//printf("},");
		basis[i] = 0.0;
	}
	fclose(fp);

	fp = fopen("fmat_imag.dat", "w");

	for(i = 0; i<GRIDPOINTS; i++) 
	{
		basis[i] = 1.0;
		fermion_fp(out, temp, basis);
		//printf("{");
		for(j = 0; j < GRIDPOINTS-1; j++)
		{
			x = out[j];
			fprintf(fp, "%f  ", cimag(x));
		}
		fprintf(fp, "%f\n", cimag(out[GRIDPOINTS-1]));
		//printf("},");
		basis[i] = 0.0;
	}
	fclose(fp);
}
예제 #22
0
/*---------------------------------------------------------------------------*
 * Function  vad_init                                                                                                            *
 * ~~~~~~~~~~~~~~~~~~                                                                                                            *
 *                                                                                                                                                       *
 * -> Initialization of variables for voice activity detection                           *
 *                                                                                                                                                       *
*---------------------------------------------------------------------------*/
void vad_init(struct vad_state_t * state)
{
    /* Static vectors to zero */
    set_zero(state->MeanLSF, M);
    
    /* Initialize VAD parameters */
    state->MeanSE = (F)0.0;
    state->MeanSLE = (F)0.0;
    state->MeanE = (F)0.0;
    state->MeanSZC = (F)0.0;
    state->count_sil = 0;
    state->count_update = 0;
    state->count_ext = 0;
    state->less_count = 0;
    state->flag = 1;
    state->Min = FLT_MAX_G729;
}
예제 #23
0
static void test_16(skiatest::Reporter* reporter) {
    uint16_t buffer[TOTAL];

    for (int count = 0; count < MAX_COUNT; ++count) {
        for (int alignment = 0; alignment < MAX_ALIGNMENT; ++alignment) {
            set_zero(buffer, sizeof(buffer));

            uint16_t* base = &buffer[PAD + alignment];
            sk_memset16(base, VALUE16, count);

            REPORTER_ASSERT(reporter,
                compare16(buffer,       0,       PAD + alignment) &&
                compare16(base,         VALUE16, count) &&
                compare16(base + count, 0,       TOTAL - count - PAD - alignment));
        }
    }
}
예제 #24
0
파일: vad.c 프로젝트: ZF0085/onionphone
/*---------------------------------------------------------------------------*
 * Function  vad_init                                                                                                            *
 * ~~~~~~~~~~~~~~~~~~                                                                                                            *
 *                                                                                                                                                       *
 * -> Initialization of variables for voice activity detection                           *
 *                                                                                                                                                       *
*---------------------------------------------------------------------------*/
void vad_init(void)
{
    /* Static vectors to zero */
    set_zero(MeanLSF, M);

    /* Initialize VAD parameters */
    MeanSE = (float) 0.0;
    MeanSLE = (float) 0.0;
    MeanE = (float) 0.0;
    MeanSZC = (float) 0.0;
    count_sil = 0;
    count_update = 0;
    count_ext = 0;
    less_count = 0;
    flag = 1;
    Min = FLT_MAX_G729;
    return;
}
예제 #25
0
파일: fir~.c 프로젝트: CNMAT/CNMAT-Externs
void fir_set(t_fir *x, Symbol *s, int ac, Atom *av)
{
	int i, j;
	int m = MIN(MAXSIZE, ac);
	t_float *coefs = x->f_coefs;
	set_zero(coefs, MAXSIZE);
	for (i=j=0; i < m; i++) {
		if (av[i].a_type == A_FLOAT) {
			*coefs++ = av[i].a_w.w_float;
			j++;
		}
		else if (av[i].a_type == A_LONG) {
			*coefs++ = (float)av[i].a_w.w_long;
			j++;
		}
	}
	//x->f_length = j;
}
예제 #26
0
	bool init ( PSORA_RADIO_RX_STREAM pRxStream, UCHAR* output, uint out_size ) 
	{
		// CF_11CCA
		CF_11CCA::cca_pwr_threshold() = 1000*1000*4;

		// CF_RxStream
		CF_RxStream::rxstream_pointer() = pRxStream;
		CF_RxStream::rxstream_touched() = 0;

		// CF_VecDC
		vcs& vdc = CF_VecDC::direct_current();
		set_zero(vdc);

		// CF_RxFrameBuffer
		CF_RxFrameBuffer::rx_frame_buf() = output;
		CF_RxFrameBuffer::rx_frame_buf_size() = out_size;

		return reset ();
	}
예제 #27
0
  void power_mpz(ElementType& result, const ElementType& a, mpz_ptr n) const
  {
    if (is_zero(a))
      {
        set_zero(result);
        return;
      }
    bool neg = false;
    if (mpz_sgn(n) < 0)
      {
        neg = true;
        mpz_neg(n, n);
        invert(result, a);
      }
    else
      copy(result, a);

    fmpz_t fn;
    fmpz_init_set_readonly(fn, n);
    fq_zech_pow(&result, &result, fn, mContext);
    fmpz_clear_readonly(fn);
    if (neg) mpz_neg(n, n);
  }
예제 #28
0
int main()
{
    matrix_t mat_a, mat_b;
    matrix_t mat_c;
    struct timeval start_time, end_time;
    random_matrix(&mat_a, 4);
    random_matrix(&mat_b, 4);
    null_matrix(&mat_c, 4);
    print_matrix(mat_a);
    printf("\n");
    print_matrix(mat_b);
    printf("\n");
    print_matrix(mat_c);
    gettimeofday(&start_time, 0);
    matrix_multiplication(mat_a, mat_b, mat_c);
    gettimeofday(&end_time, 0);
    printf("Normal Multiplication\n");
    print_matrix(mat_c);
    print_time_taken(start_time, end_time);
    mat_c = set_zero(mat_c);
    mat_c = matrix_multiplication_strassen(mat_a, mat_b, mat_c, 2);
    printf("Strassen Multiplication\n");
    print_matrix(mat_c);
}
예제 #29
0
파일: main.cpp 프로젝트: alieed/hermes
int main() 
{
  // Time measurement.
  TimePeriod cpu_time;
  cpu_time.tick();

  // Create space, set Dirichlet BC, enumerate basis functions.
  Space* space = new Space(A, B, NELEM, DIR_BC_LEFT, DIR_BC_RIGHT, P_INIT, NEQ);
  int ndof = Space::get_num_dofs(space);
  info("ndof: %d", ndof);

  // Initialize the weak formulation.
  WeakForm wf;
  wf.add_matrix_form(jacobian);
  wf.add_vector_form(residual);

  // Initialize the FE problem.
  bool is_linear = false;
  DiscreteProblem *dp = new DiscreteProblem(&wf, space, is_linear);

  // Set zero initial condition.
  double *coeff_vec = new double[ndof];
  set_zero(coeff_vec, ndof);

  // Set up the solver, matrix, and rhs according to the solver selection.
  SparseMatrix* matrix = create_matrix(matrix_solver);
  Vector* rhs = create_vector(matrix_solver);
  Solver* solver = create_linear_solver(matrix_solver, matrix, rhs);

  int it = 1;
  bool success = false;
  while (1) 
  {
    // Obtain the number of degrees of freedom.
    int ndof = Space::get_num_dofs(space);

    // Assemble the Jacobian matrix and residual vector.
    dp->assemble(coeff_vec, matrix, rhs);

    // Calculate the l2-norm of residual vector.
    double res_l2_norm = get_l2_norm(rhs);

    // Info for user.
    info("---- Newton iter %d, ndof %d, res. l2 norm %g", it, Space::get_num_dofs(space), res_l2_norm);

    // If l2 norm of the residual vector is within tolerance, then quit.
    // NOTE: at least one full iteration forced
    //       here because sometimes the initial
    //       residual on fine mesh is too small.
    if(res_l2_norm < NEWTON_TOL && it > 1) break;

    // Multiply the residual vector with -1 since the matrix 
    // equation reads J(Y^n) \deltaY^{n+1} = -F(Y^n).
    for(int i=0; i<ndof; i++) rhs->set(i, -rhs->get(i));

    // Solve the linear system.
    if(!(success = solver->solve()))
      error ("Matrix solver failed.\n");

    // Add \deltaY^{n+1} to Y^n.
    for (int i = 0; i < ndof; i++) coeff_vec[i] += solver->get_solution()[i];

    // If the maximum number of iteration has been reached, then quit.
    if (it >= NEWTON_MAX_ITER) error ("Newton method did not converge.");
    
    it++;
  }

  info("Total running time: %g s", cpu_time.accumulated());

  // Test variable.
  info("ndof = %d.", Space::get_num_dofs(space));
  if (success)
  {
    info("Success!");
    return ERROR_SUCCESS;
  }
  else
  {
    info("Failure!");
    return ERROR_FAILURE;
  }
}
예제 #30
0
파일: shader.cpp 프로젝트: Lamorna/engine
/*
==================
==================
*/
void Vertex_Lighting(

	const __int32 n_triangles,
	const vertex_light_manager_& vertex_light_manager,
	const float4_ positions[4][3],
	float4_ colour[4][3]

) {

	static const float r_screen_scale_x = 1.0f / screen_scale_x;
	static const float r_screen_scale_y = 1.0f / screen_scale_y;
	const __m128 attenuation_factor = set_all(800.0f);
	const __m128 specular_scale = set_all(100.0f);
	const __m128 diffuse_scale = set_all(20.0f);

	const __m128 zero = set_all(0.0f);
	const __m128 one = set_all(1.0f);

	__m128 r_screen_scale[2];
	r_screen_scale[X] = set_all(r_screen_scale_x);
	r_screen_scale[Y] = set_all(r_screen_scale_y);
	__m128 screen_shift[2];
	screen_shift[X] = set_all(screen_shift_x);
	screen_shift[Y] = set_all(screen_shift_y);

	__m128 clip_space_position[3][4];
	__m128 vertex_colour[3][4];

	for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {

		__m128 vertex_position[4];
		for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) {
			vertex_position[i_triangle] = load_u(positions[i_triangle][i_vertex].f);
			vertex_colour[i_vertex][i_triangle] = load_u(colour[i_triangle][i_vertex].f);
		}
		Transpose(vertex_position);
		Transpose(vertex_colour[i_vertex]);

		__m128 depth = reciprocal(vertex_position[Z]);
		clip_space_position[i_vertex][X] = ((vertex_position[X] - screen_shift[X]) * r_screen_scale[X]) * depth;
		clip_space_position[i_vertex][Y] = ((vertex_position[Y] - screen_shift[Y]) * r_screen_scale[Y]) * depth;
		clip_space_position[i_vertex][Z] = depth;
	}

	__m128 a[3];
	a[X] = clip_space_position[1][X] - clip_space_position[0][X];
	a[Y] = clip_space_position[1][Y] - clip_space_position[0][Y];
	a[Z] = clip_space_position[1][Z] - clip_space_position[0][Z];

	__m128 b[3];
	b[X] = clip_space_position[2][X] - clip_space_position[0][X];
	b[Y] = clip_space_position[2][Y] - clip_space_position[0][Y];
	b[Z] = clip_space_position[2][Z] - clip_space_position[0][Z];


	__m128 normal[4];
	normal[X] = (a[Y] * b[Z]) - (a[Z] * b[Y]);
	normal[Y] = (a[Z] * b[X]) - (a[X] * b[Z]);
	normal[Z] = (a[X] * b[Y]) - (a[Y] * b[X]);

	__m128 mag = (normal[X] * normal[X]) + (normal[Y] * normal[Y]) + (normal[Z] * normal[Z]);
	mag = _mm_rsqrt_ps(mag);
	normal[X] *= mag;
	normal[Y] *= mag;
	normal[Z] *= mag;

	for (__int32 i_light = 0; i_light < 1; i_light++) {


		for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {


			__m128 light_position[3];
			__m128 light_colour[3];
			const float intensity = vertex_light_manager.light_sources[i_light].intensity;
			for (__int32 i_axis = X; i_axis < W; i_axis++) {

				light_position[i_axis] = set_all(vertex_light_manager.light_sources[i_light].position.f[i_axis]);
				light_colour[i_axis] = set_all(vertex_light_manager.light_sources[i_light].colour.f[i_axis] * intensity);
			}

			const __m128 extent = set_all(40.0f);
			__m128i is_valid = set_all(-1);
			is_valid &= (clip_space_position[i_vertex][X] - light_position[X]) < extent;
			is_valid &= (clip_space_position[i_vertex][Y] - light_position[Y]) < extent;
			is_valid &= (clip_space_position[i_vertex][Z] - light_position[Z]) < extent;

			light_position[X] = set_all(0.0f);
			light_position[Y] = set_all(0.0f);
			light_position[Z] = set_all(0.0f);

			light_colour[X] = set_all(100.0f);
			light_colour[Y] = set_all(100.0f);
			light_colour[Z] = set_all(100.0f);

			__m128 light_ray[3];
			light_ray[X] = clip_space_position[i_vertex][X] - light_position[X];
			light_ray[Y] = clip_space_position[i_vertex][Y] - light_position[Y];
			light_ray[Z] = clip_space_position[i_vertex][Z] - light_position[Z];

			__m128 mag = (light_ray[X] * light_ray[X]) + (light_ray[Y] * light_ray[Y]) + (light_ray[Z] * light_ray[Z]);
			mag = _mm_rsqrt_ps(mag);
			light_ray[X] *= mag;
			light_ray[Y] *= mag;
			light_ray[Z] *= mag;

			__m128 dot = (normal[X] * light_ray[X]) + (normal[Y] * light_ray[Y]) + (normal[Z] * light_ray[Z]);
			dot &= dot > zero;
			dot = (dot * dot) * mag;

			__m128 distance = set_zero();
			for (__int32 i_axis = X; i_axis < W; i_axis++) {
				__m128 d = light_position[i_axis] - clip_space_position[i_vertex][i_axis];
				distance += (d * d);
			}
			__m128 scalar = reciprocal(distance) * attenuation_factor;
			scalar = max_vec(scalar, zero);
			scalar = min_vec(scalar, one);

			for (__int32 i_channel = R; i_channel < A; i_channel++) {
				vertex_colour[i_vertex][i_channel] += dot * specular_scale * light_colour[i_channel];
				vertex_colour[i_vertex][i_channel] += mag * diffuse_scale * light_colour[i_channel];
			}
		}
	}
	for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {
		Transpose(vertex_colour[i_vertex]);
		for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) {
			store_u(vertex_colour[i_vertex][i_triangle], colour[i_triangle][i_vertex].f);
		}
	}


}