inline void coordinate_system(const V& v1,V& v2,V& v3) { if (std::abs(v1[0]) > std::abs(v1[1])) { float inv_len = reciprocal(std::sqrt(v1[0]*v1[0] + v1[2] * v1[2])); v2 = V(-v1[2] * inv_len, 0, v1[0] * inv_len); } else{ float inv_len = reciprocal(std::sqrt(v1[1]*v1[1] + v1[2] * v1[2])); v2 = V(0,v1[2] * inv_len, -v1[1] * inv_len); } v3 = cross(v1,v2); }
void gInvDecRq (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p, hInt_t q) { hDim_t blockOffset; hDim_t modOffset; hDim_t i; hDim_t tmp1 = rts*(p-1); hInt_t reciprocalOfP = reciprocal (q,p); for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp2 = blockOffset*tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp2 + modOffset; hInt_t lastOut = 0; for (i=1; i < p; ++i) { lastOut += (i * y[tensorOffset + (i-1)*rts]); } //in the previous loop, |lastOut| <= p*p*q lastOut = lastOut % q; hInt_t acc = (lastOut * reciprocalOfP) % q; // |acc| <= p*q for (i = p-2; i > 0; --i) { hDim_t idx = tensorOffset + i*rts; hInt_t tmp = acc; acc = acc - y[idx]; y[idx] = tmp % q; } y[tensorOffset] = acc % q; } } }
//------------------------------------------------------------------------------ // Name: //------------------------------------------------------------------------------ knumber_base *knumber_float::pow(knumber_base *rhs) { if(knumber_integer *const p = dynamic_cast<knumber_integer *>(rhs)) { mpf_pow_ui(mpf_, mpf_, mpz_get_ui(p->mpz_)); if(p->sign() < 0) { return reciprocal(); } else { return this; } } else if(knumber_float *const p = dynamic_cast<knumber_float *>(rhs)) { return execute_libc_func< ::pow>(mpf_get_d(mpf_), mpf_get_d(p->mpf_)); } else if(knumber_fraction *const p = dynamic_cast<knumber_fraction *>(rhs)) { knumber_float f(p); return execute_libc_func< ::pow>(mpf_get_d(mpf_), mpf_get_d(f.mpf_)); } else if(knumber_error *const p = dynamic_cast<knumber_error *>(rhs)) { if(p->sign() > 0) { knumber_error *e = new knumber_error(knumber_error::ERROR_POS_INFINITY); delete this; return e; } else if(p->sign() < 0) { knumber_integer *n = new knumber_integer(0); delete this; return n; } else { knumber_error *e = new knumber_error(knumber_error::ERROR_UNDEFINED); delete this; return e; } } Q_ASSERT(0); return 0; }
int main() { double t = 10.0; std::cout << "Reciprocal of 10.0 is " << reciprocal(t) << std::endl; std::cout << "Value of t is still: " << t << std::endl; return 0; }
//! Returns the animated mesh based on a detail level. 0 is the lowest, 255 the highest detail. IMesh* CAnimatedMeshMD3::getMesh(SINT32 frame, SINT32 detailLevel, SINT32 startFrameLoop, SINT32 endFrameLoop) { if (0 == Mesh) return 0; //! check if we have the mesh in our private cache SCacheInfo candidate(frame, startFrameLoop, endFrameLoop); if (candidate == Current) return MeshIPol; startFrameLoop = SINT32_max(0, startFrameLoop >> IPolShift); endFrameLoop = if_c_a_else_b(endFrameLoop < 0, Mesh->MD3Header.numFrames - 1, endFrameLoop >> IPolShift); const UINT32 mask = 1 << IPolShift; SINT32 frameA; SINT32 frameB; FLOAT32 iPol; if (LoopMode) { // correct frame to "pixel center" frame -= mask >> 1; // interpolation iPol = FLOAT32(frame & (mask - 1)) * reciprocal(FLOAT32(mask)); // wrap anim frame >>= IPolShift; frameA = if_c_a_else_b(frame < startFrameLoop, endFrameLoop, frame); frameB = if_c_a_else_b(frameA + 1 > endFrameLoop, startFrameLoop, frameA + 1); } else {
SIMD_Vector const & operator /= ( value_type p1 ) { SIMD_Vector<float,4> p1_recip = p1; p1_recip = reciprocal(p1_recip); m_vec = _mm_mul_ps(m_vec, p1_recip.m_vec); return *this; }
samplerate_converter(sample_rate_conversion_quality quality, itype interpolation_factor, itype decimation_factor, ftype scale = ftype(1), ftype cutoff = 0.5f) : kaiser_beta(window_param(quality)), depth(static_cast<itype>(filter_order(quality))), input_position(0), output_position(0) { const i64 gcf = gcd(interpolation_factor, decimation_factor); interpolation_factor /= gcf; decimation_factor /= gcf; taps = depth * interpolation_factor; order = size_t(depth * interpolation_factor - 1); this->interpolation_factor = interpolation_factor; this->decimation_factor = decimation_factor; const itype halftaps = taps / 2; filter = univector<T>(size_t(taps), T()); delay = univector<T>(size_t(depth), T()); cutoff = cutoff - transition_width() / c_pi<ftype, 4>; cutoff = cutoff / std::max(decimation_factor, interpolation_factor); for (itype j = 0, jj = 0; j < taps; j++) { filter[size_t(j)] = sinc((jj - halftaps) * cutoff * c_pi<ftype, 2>) * window(ftype(jj) / ftype(taps - 1)); jj += size_t(interpolation_factor); if (jj >= taps) jj = jj - taps + 1; } const T s = reciprocal(sum(filter)) * interpolation_factor * scale; filter = filter * s; }
void LinkCells::buildCellLists( const std::vector<Vector>& pos, const std::vector<unsigned>& indices, const Pbc& pbc ){ plumed_assert( cutoffwasset && pos.size()==indices.size() ); // Must be able to check that pbcs are not nonsensical in some way?? -- GAT // Setup the pbc object by copying it from action mypbc.setBox( pbc.getBox() ); // Setup the lists if( pos.size()!=allcells.size() ){ allcells.resize( pos.size() ); lcell_lists.resize( pos.size() ); } { // This is the reciprocal lattice // notice that reciprocal.getRow(0) is a vector that is orthogonal to b and c // This allows to use linked cells in non orthorhomic boxes Tensor reciprocal(transpose(mypbc.getInvBox())); ncells[0] = std::floor( 1.0/ reciprocal.getRow(0).modulo() / link_cutoff ); if( ncells[0]==0 ) ncells[0]=1; ncells[1] = std::floor( 1.0/ reciprocal.getRow(1).modulo() / link_cutoff ); if( ncells[1]==0 ) ncells[1]=1; ncells[2] = std::floor( 1.0/ reciprocal.getRow(2).modulo() / link_cutoff ); if( ncells[2]==0 ) ncells[2]=1; } // Setup the strides nstride[0]=1; nstride[1]=ncells[0]; nstride[2]=ncells[0]*ncells[1]; // Setup the storage for link cells unsigned ncellstot=ncells[0]*ncells[1]*ncells[2]; if( lcell_tots.size()!=ncellstot ){ lcell_tots.resize( ncellstot ); lcell_starts.resize( ncellstot ); } // Clear nlcells for(unsigned i=0;i<ncellstot;++i) lcell_tots[i]=0; // Clear allcells allcells.assign( allcells.size(), 0 ); // Find out what cell everyone is in unsigned rank=comm.Get_rank(), size=comm.Get_size(); for(unsigned i=rank;i<pos.size();i+=size){ allcells[i]=findCell( pos[i] ); lcell_tots[allcells[i]]++; } // And gather all this information on every node comm.Sum( allcells ); comm.Sum( lcell_tots ); // Now prepare the link cell lists unsigned tot=0; for(unsigned i=0;i<lcell_tots.size();++i){ lcell_starts[i]=tot; tot+=lcell_tots[i]; lcell_tots[i]=0; } plumed_assert( tot==pos.size() ); // And setup the link cells properly for(unsigned j=0;j<pos.size();++j){ unsigned myind = lcell_starts[ allcells[j] ] + lcell_tots[ allcells[j] ]; lcell_lists[ myind ] = indices[j]; lcell_tots[allcells[j]]++; } }
/*Program to find reciprocal of a number */ int main(int argc, char **argv) { int num; num = atoi(argv[1]); printf("Reciprocal of %d is %g\n", i, reciprocal (i)); return 0; }
void UGenPlugin::processEnvs() { backgroundLock.enter(); const float speed = getMappedParameter(UGenInterface::Parameters::Speed); const float duration = originalBuffer.duration() / speed; const float newSize = originalBuffer.size() / speed; Env ampEnvScaled = ampEnv.timeScale(duration); UGen player = PlayBuf::AR(originalBuffer, speed, 0, 0, 0, UGen::DoNothing); if (!isEnvDefault(filterEnv)) { Env filterEnvScaled = filterEnv.timeScale(duration); UGen envgen = EnvGen::AR(filterEnvScaled).linexp(0, 1, getFilterMin(), getFilterMax()); switch (menuItem) { case UGenInterface::MenuOptions::LowPass: player = BLowPass::AR(player, envgen, reciprocal(getMappedParameter(UGenInterface::Parameters::Resonance))); break; case UGenInterface::MenuOptions::HighPass: player = BHiPass::AR(player, envgen, reciprocal(getMappedParameter(UGenInterface::Parameters::Resonance))); break; case UGenInterface::MenuOptions::BandPass: player = BBandPass::AR(player, envgen, convertQtoOctaves(getMappedParameter(UGenInterface::Parameters::Resonance))); break; case UGenInterface::MenuOptions::BandReject: player = BBandStop::AR(player, envgen, convertQtoOctaves(getMappedParameter(UGenInterface::Parameters::Resonance))); break; } } player *= EnvGen::AR(ampEnvScaled); backgroundLock.exit(); processManager.add(newSize, player); }
int main(int argc, char **argv) { int i; i = atoi(argv[1]); printf("The reciprocal of %d is %g\n", i, reciprocal(i)); return 0; }
Rational Rational::pow(const long exponent) const { const long absExp = std::abs(exponent); const Rational value = (exponent < 0 ? reciprocal() : *this); const Integer num = value.numerator.pow(absExp); const Integer den = value.denominator.pow(absExp); return Rational(num, den, no_normalise_tag()); }
inline typename transform_type<T,3>::type perspective(T fov, T n, T f) { // Perform projective divide typedef typename vector_type<T,3>::type vector_t; typedef typename transform_type<T,3>::type transform_t; typedef typename matrix_type<T,4>::type matrix_t; T inv_denom = reciprocal(f-n); matrix_t mat44 ; mat44<<1, 0, 0, 0, 0, 1, 0, 0, 0, 0, f*inv_denom, -f*n*inv_denom, 0, 0, 1, 0; //T mat44[]={}; T invTanAng = reciprocal(std::tan(Radians(fov)/2)); return transform_t(mat44).prescale(vector_t(invTanAng,invTanAng,1)); // Scale to canonical viewing volume //float invTanAng = 1.f / tanf(Radians(fov) / 2.f); //return Scale(invTanAng, invTanAng, 1) * // Transform(persp); }
int main(void) { /*Intialising the variables*/ int degree; int coefficient[11]; int i; void polynomial(int degree, int coefficient[11]); do { /*Asking the user to input values for the polynomial*/ printf("Please enter the maximum degree of the polynomial: "); scanf("%d", °ree); /*This while loop checks if the degree is below 0 i.e minus and terminates the program if its true*/ while(degree<0) { return 0; } /*Asking the user to input the coefficients*/ printf("Please enter the coefficients: "); for (i=0; i <= degree; i++) { /*This for loop repeats the scanf the required number of times depending on the input of the degree*/ scanf("%d", &coefficient[i]); } /*The results of the program*/ printf("The polynomial is "); polynomial(degree, coefficient); printf("\n"); printf("The reciprocal is "); reciprocal(degree, coefficient); printf("\n"); /*Checks whether the polynomial is self-reciprocal and prints the result*/ if(selfreciprocal(degree, coefficient)==1) printf("The polynomial is self-reciprocal \n"); if(selfreciprocal(degree, coefficient)==0) printf("The polynomial is not self-reciprocal \n"); printf("\n\n"); } while(selfreciprocal(degree, coefficient)==0); /*Will terminate the loop if the polynomial is self-reciprocal*/ return 0; }
void reciprocal( const Kokkos::View< RT,RL,RD,RM,Kokkos::Impl::ViewMPVectorContiguous >& r, const Kokkos::View< XT,XL,XD,XM,Kokkos::Impl::ViewMPVectorContiguous >& x) { typedef Kokkos::Impl::ViewMPVectorContiguous S; typedef Kokkos::View< XT,XL,XD,XM,S > XVector; typedef Kokkos::View< RT,RL,RD,RM,S > RVector; typename XVector::flat_array_type x_flat = x; typename RVector::flat_array_type r_flat = r; reciprocal( r_flat, x_flat ); }
typename std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View<RD,RP...> >::value && Kokkos::is_view_mp_vector< Kokkos::View<XD,XP...> >::value >::type reciprocal( const Kokkos::View<RD,RP...>& r, const Kokkos::View<XD,XP...>& x) { typedef Kokkos::View<RD,RP...> RVector; typedef Kokkos::View<XD,XP...> XVector; typename Kokkos::FlatArrayType<XVector>::type x_flat = x; typename Kokkos::FlatArrayType<RVector>::type r_flat = r; reciprocal( r_flat, x_flat ); }
quaternion divide(quaternion m, quaternion n) { quaternion out; if(norm2(n) == 0) { printf("divide by zero attempted in quaternion divide routine\n"); } else { out = multiply(m,reciprocal(n)); } return out; }
Piecewise<SBasis> reciprocalOnDomain(Interval range, double tol){ Piecewise<SBasis> reciprocal_fn; //TODO: deduce R from tol... double R=2.; SBasis reciprocal1_R=reciprocal(Linear(1,R),3); double a=range.min(), b=range.max(); if (a*b<0){ b=std::max(fabs(a),fabs(b)); a=0; }else if (b<0){ a=-range.max(); b=-range.min(); } if (a<=tol){ reciprocal_fn.push_cut(0); int i0=(int) floor(std::log(tol)/std::log(R)); a=pow(R,i0); reciprocal_fn.push(Linear(1/a),a); }else{ int i0=(int) floor(std::log(a)/std::log(R)); a=pow(R,i0); reciprocal_fn.cuts.push_back(a); } while (a<b){ reciprocal_fn.push(reciprocal1_R/a,R*a); a*=R; } if (range.min()<0 || range.max()<0){ Piecewise<SBasis>reciprocal_fn_neg; //TODO: define reverse(pw<sb>); reciprocal_fn_neg.cuts.push_back(-reciprocal_fn.cuts.back()); for (unsigned i=0; i<reciprocal_fn.size(); i++){ int idx=reciprocal_fn.segs.size()-1-i; reciprocal_fn_neg.push_seg(-reverse(reciprocal_fn.segs.at(idx))); reciprocal_fn_neg.push_cut(-reciprocal_fn.cuts.at(idx)); } if (range.max()>0){ reciprocal_fn_neg.concat(reciprocal_fn); } reciprocal_fn=reciprocal_fn_neg; } return(reciprocal_fn); }
int main(void){ int coefficient[SIZE]; int K, N, Power; int count3 = 0; /* Below while loop basically creat repeatatinon of Entering the polynomial */ N=1; while (N>0){ /* asking the user to the maximum degree of polynomai, which is then taken by scanf*/ printf("Please enter the maximum degree of the polynomial:"); scanf("%d", &N); if(N<0){ exit(0); } /*asking the user the input the coeff*/ printf("Please enter the coefficients:"); for(K=0; K<=N; K++) { scanf("%d", &coefficient[K]); } Power = N; /*Here the functions are called in the main*/ polynomial(coefficient, Power, N); reciprocal(coefficient, Power , N); printf("\n"); } exit(0); }
int main() { while(1){ int i; /* Give values to array by the order of i*/ int co[11]; /* The array to store the coefficients of polynomial*/ int n; /* The maximum degree of polynomial*/ int k=0; /* The variable to check reprocical*/ /* Prompt the user for maximum degree */ printf("Please enter the maximum degree of the polynomial:"); scanf("%d", &n); /* Check whether the maximum degree is negative number If so, terminate here. Otherwise continue the program*/ if(n<0){ return 0;} else{ printf("Please enter the coefficients:"); for(i=0;i<=n;i++){ scanf("%d", &co[i]);} /* Print two polynomials*/ polynomial(n,co); reciprocal(n,co); /* Check whether it is self-reciprocal or not.If coefficient is equal to that after changingposition, K will get 1 increment. Otherwise 0 increment. If k=n+1, that means p(x) = p*(x) and terminate the program*/ for(i=0;i<=n;i++){ if(co[i]==co[n-i]){ k = k+1;} else{k = k;}} if(k==(n+1)){ printf("The polynomial is self-reciprocal\n"); return 0;} else{ printf("The polynomial is not self-reciprocal\n"); printf("\n"); } } } }
int main(void) {/*Declaring variables*/ int b; int degree; int coefficient[11]; int reciprocal1[11]; do/*Allows the program loop*/ { printf("Please enter the maximum degree of the polynomial :");/*prompting user for input of maximum degree*/ scanf("%d" , °ree); /*makes the program terminate when a negative integer is inputed*/ if(degree<0) { return 0; } printf("Please enter the coefficients:");/*prompting user for input of coefficients*/ for (b=0; b <= degree; b++)/*A for loop that repeats scanf the required number of times depending on the degree of the polynomoial*/ { scanf("%d", &coefficient[b]); } printf("The polynomial is ");/*printing the polynomial*/ polynomial(degree,coefficient); printf("Its reciprocal is ");/*Printitng the Reciprocal*/ reciprocal(degree,coefficient); /*Determining if the polynomial is self reciprocal or not*/ if (determination(b, degree, reciprocal1, coefficient)==1) { printf("This not self reciprocal \n\n"); } if (determination(b, degree, reciprocal1, coefficient)==0) { printf("This is self reciprocal \n\n"); } } while (determination(b, degree, reciprocal1, coefficient)==1); }
void convolve_filter<T>::set_data(const univector<T>& data) { univector<T> input(fft.size); const T ifftsize = reciprocal(T(fft.size)); for (size_t i = 0; i < ir_segments.size(); i++) { segments[i].resize(block_size); ir_segments[i].resize(block_size, 0); input = padded(data.slice(i * block_size, block_size)); fft.execute(ir_segments[i], input, temp, dft_pack_format::Perm); process(ir_segments[i], ir_segments[i] * ifftsize); } saved_input.resize(block_size, 0); scratch.resize(block_size * 2); premul.resize(block_size, 0); cscratch.resize(block_size); overlap.resize(block_size, 0); }
//------------------------------------------------------------------------------ // Name: pow //------------------------------------------------------------------------------ knumber_base *knumber_integer::pow(knumber_base *rhs) { if(knumber_integer *const p = dynamic_cast<knumber_integer *>(rhs)) { if(is_zero() && p->is_even() && p->sign() < 0) { delete this; return new knumber_error(knumber_error::ERROR_POS_INFINITY); } mpz_pow_ui(mpz_, mpz_, mpz_get_ui(p->mpz_)); if(p->sign() < 0) { return reciprocal(); } else { return this; } } else if(knumber_float *const p = dynamic_cast<knumber_float *>(rhs)) { knumber_float *f = new knumber_float(this); delete this; return f->pow(p); } else if(knumber_fraction *const p = dynamic_cast<knumber_fraction *>(rhs)) { knumber_fraction *f = new knumber_fraction(this); delete this; return f->pow(p); } else if(knumber_error *const p = dynamic_cast<knumber_error *>(rhs)) { if(p->sign() > 0) { knumber_error *e = new knumber_error(knumber_error::ERROR_POS_INFINITY); delete this; return e; } else if(p->sign() < 0) { mpz_init_set_si(mpz_, 0); return this; } else { knumber_error *e = new knumber_error(knumber_error::ERROR_UNDEFINED); delete this; return e; } } Q_ASSERT(0); return 0; }
int main(void) { while(1){ //loop forever printf("Please enter the maximum degree of the polynomial: "); scanf("%d",°ree); //input maximum degree. if (degree < 0 ){ break; //the program will exit when input less then 0. } printf("Please enter the coefficients: "); for (i=0;i<=degree;i++){ scanf("%d",&coef[i]); //input all coefficients from coef[1] to coef[i]. } polynomial(); //output all polynomial. printf("\n"); reciprocal(); //output all reciprocal. printf("\n"); selfreciprocal(); //output if it is self-reciprocal or not. } }
//! to be called every frame void CFPSCounter::registerFrame(u32 now, u32 primitivesDrawn) { ++FramesCounted; PrimitiveTotal += primitivesDrawn; PrimitivesCounted += primitivesDrawn; Primitive = primitivesDrawn; const u32 milliseconds = now - StartTime; if (milliseconds >= 1500 ) { const f32 invMilli = reciprocal ( (f32) milliseconds ); FPS = ceil32 ( ( 1000 * FramesCounted ) * invMilli ); PrimitiveAverage = ceil32 ( ( 1000 * PrimitivesCounted ) * invMilli ); FramesCounted = 0; PrimitivesCounted = 0; StartTime = now; } }
int main(void){ while(1){//loop printf("\n"); printf("Please enter the maximum degree of the polynomial:"); scanf("%d",&n);//put in the maximum degree of the polynomial if(n<0){//if you want to terminate, put in -1 printf("error "); break; } printf("Please enter the coefficients :"); i=0; while(i<=n) { scanf("%d",&m[i]);//put in the coefficients i++; } polynomial();//print the polynomial printf("\n");//for beautiful :D reciprocal();//print the reciprocal printf("\n");//for beautiful :D selfreciprocal();//Judge it is selfreciprocal or not } }
void Fxaa::render(const Handle<Shader_resource_view>& source, const Viewport& source_viewport, const Rendering_context& context) { Rendering_device& device = rendering_tool_.device(); device.set_framebuffer(context.framebuffer()); device.set_viewports(1, &context.viewport()); device.set_depth_stencil_state(ds_state_); device.set_blend_state(blend_state_); device.set_input_layout(input_layout_); device.set_shader_resources(1, &source); effect_->use(device); change_per_source_.data().inverse_source_size = reciprocal(source_viewport.size); change_per_source_.update(device); effect_->technique(0)->use(); rendering_tool_.render_fullscreen_effect(); }
frame -= mask >> 1; // interpolation iPol = FLOAT32(frame & (mask - 1)) * reciprocal(FLOAT32(mask)); // wrap anim frame >>= IPolShift; frameA = if_c_a_else_b(frame < startFrameLoop, endFrameLoop, frame); frameB = if_c_a_else_b(frameA + 1 > endFrameLoop, startFrameLoop, frameA + 1); } else { // correct frame to "pixel center" frame -= mask >> 1; iPol = FLOAT32(frame & (mask - 1)) * reciprocal(FLOAT32(mask)); // clamp anim frame >>= IPolShift; frameA = SINT32_clamp(frame, startFrameLoop, endFrameLoop); frameB = SINT32_min(frameA + 1, endFrameLoop); } // build current vertex for (UINT32 i = 0; i != Mesh->Buffer.size(); ++i) { buildVertexArray(frameA, frameB, iPol, Mesh->Buffer[i], (SMeshBufferLightMap*)MeshIPol->getMeshBuffer(i)); } MeshIPol->recalculateBoundingBox();
/* ================== ================== */ void Vertex_Lighting_REM( const __int32 n_triangles, const vertex_light_manager_& vertex_light_manager, const float4_ positions[4][3], float4_ colour[4][3] ) { //const __int32 VERTEX_COLOUR = FIRST_ATTRIBUTE + 0; static const float r_screen_scale_x = 1.0f / screen_scale_x; static const float r_screen_scale_y = 1.0f / screen_scale_y; //const __m128 attenuation_factor = set_all(200.0f); //const __m128 attenuation_factor = set_all(800.0f); //const __m128 specular_scale = set_all(100.0f); //const __m128 diffuse_scale = set_all(20.0f); __m128 r_screen_scale[2]; r_screen_scale[X] = set_all(r_screen_scale_x); r_screen_scale[Y] = set_all(r_screen_scale_y); __m128 screen_shift[2]; screen_shift[X] = set_all(screen_shift_x); screen_shift[Y] = set_all(screen_shift_y); __m128 clip_space_position[3][4]; //__m128 vertex_colour[3][4]; float4_ new_position[4][3]; for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { __m128 vertex_position[4]; for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { vertex_position[i_triangle] = load_u(positions[i_triangle][i_vertex].f); //vertex_colour[i_vertex][i_triangle] = load_u(colour[i_triangle][i_vertex].f); } Transpose(vertex_position); //Transpose(vertex_colour[i_vertex]); __m128 depth = reciprocal(vertex_position[Z]); clip_space_position[i_vertex][X] = ((vertex_position[X] - screen_shift[X]) * r_screen_scale[X]) * depth; clip_space_position[i_vertex][Y] = ((vertex_position[Y] - screen_shift[Y]) * r_screen_scale[Y]) * depth; clip_space_position[i_vertex][Z] = depth; } __m128 a[3]; a[X] = clip_space_position[1][X] - clip_space_position[0][X]; a[Y] = clip_space_position[1][Y] - clip_space_position[0][Y]; a[Z] = clip_space_position[1][Z] - clip_space_position[0][Z]; __m128 b[3]; b[X] = clip_space_position[2][X] - clip_space_position[0][X]; b[Y] = clip_space_position[2][Y] - clip_space_position[0][Y]; b[Z] = clip_space_position[2][Z] - clip_space_position[0][Z]; __m128 normal[4]; normal[X] = (a[Y] * b[Z]) - (a[Z] * b[Y]); normal[Y] = (a[Z] * b[X]) - (a[X] * b[Z]); normal[Z] = (a[X] * b[Y]) - (a[Y] * b[X]); __m128 mag = (normal[X] * normal[X]) + (normal[Y] * normal[Y]) + (normal[Z] * normal[Z]); mag = _mm_rsqrt_ps(mag); normal[X] *= mag; normal[Y] *= mag; normal[Z] *= mag; float normal_4[3][4]; store_u(normal[X], normal_4[X]); store_u(normal[Y], normal_4[Y]); store_u(normal[Z], normal_4[Z]); float centre_4[3][4]; float extent_4[3][4]; const __m128 half = set_all(0.5f); for (__int32 i_axis = X; i_axis < W; i_axis++) { __m128 max; __m128 min; max = min = clip_space_position[0][i_axis]; max = max_vec(max_vec(max, clip_space_position[1][i_axis]), clip_space_position[2][i_axis]); min = min_vec(min_vec(min, clip_space_position[1][i_axis]), clip_space_position[2][i_axis]); store_u((max + min) * half, centre_4[i_axis]); store_u((max - min) * half, extent_4[i_axis]); } for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { Transpose(clip_space_position[i_vertex]); for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { store_u(clip_space_position[i_vertex][i_triangle], new_position[i_triangle][i_vertex].f); } } const __m128 zero = set_all(0.0f); const __m128 one = set_all(1.0f); enum { MAX_LIGHTS_PER_VERTEX = 128, }; for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { __m128 centre[3]; __m128 extent[3]; for (__int32 i_axis = X; i_axis < W; i_axis++) { centre[i_axis] = set_all(centre_4[i_axis][i_triangle]); extent[i_axis] = set_all(extent_4[i_axis][i_triangle]); } float z_min = centre_4[Z][i_triangle] - extent_4[Z][i_triangle]; float z_max = centre_4[Z][i_triangle] + extent_4[Z][i_triangle]; __int32 bin_min = __int32(z_min / vertex_light_manager.bin_interval); __int32 bin_max = __int32(z_max / vertex_light_manager.bin_interval); bin_min = min(bin_min, vertex_light_manager_::NUM_BINS - 1); bin_max = min(bin_max, vertex_light_manager_::NUM_BINS - 1); bin_min = max(bin_min, 0); bin_max = max(bin_max, 0); //bin_max = bin_max >= 10 ? 0 : bin_max; //printf_s(" %i , %i \n", bin_min, bin_max); __int32 i_lights[MAX_LIGHTS_PER_VERTEX]; __int32 n_lights = 0; { for (__int32 i_bin = bin_min; i_bin <= bin_max; i_bin++) { const vertex_light_manager_::bin_& bin = vertex_light_manager.bin[i_bin]; for (__int32 i_light_4 = 0; i_light_4 < bin.n_lights; i_light_4 += 4) { const __int32 n = min(bin.n_lights - i_light_4, 4); __m128 light_position[4]; for (__int32 i_light = 0; i_light < n; i_light++) { __int32 index = vertex_light_manager.i_light[bin.i_start + i_light_4 + i_light]; light_position[i_light] = load_u(vertex_light_manager.light_sources[index].position.f); } Transpose(light_position); const __m128 light_extent = set_all(100.0f); __m128i is_valid = set_all(-1); is_valid &= abs(centre[X] - light_position[X]) < (extent[X] + light_extent); is_valid &= abs(centre[Y] - light_position[Y]) < (extent[Y] + light_extent); is_valid &= abs(centre[Z] - light_position[Z]) < (extent[Z] + light_extent); unsigned __int32 result_mask = store_mask(is_valid); for (__int32 i_light = 0; i_light < n; i_light++) { __int32 index = vertex_light_manager.i_light[bin.i_start + i_light_4 + i_light]; i_lights[n_lights] = index; n_lights += (result_mask >> i_light) & 0x1; } if (n_lights > MAX_LIGHTS_PER_VERTEX) { n_lights = MAX_LIGHTS_PER_VERTEX; break; } } } } for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { __m128 vertex_position[3]; vertex_position[X] = set_all(new_position[i_triangle][i_vertex].x); vertex_position[Y] = set_all(new_position[i_triangle][i_vertex].y); vertex_position[Z] = set_all(new_position[i_triangle][i_vertex].z); __m128 vertex_colour[4]; vertex_colour[R] = set_all(0.0f); vertex_colour[G] = set_all(0.0f); vertex_colour[B] = set_all(0.0f); __m128 normal[3]; normal[X] = set_all(normal_4[X][i_triangle]); normal[Y] = set_all(normal_4[Y][i_triangle]); normal[Z] = set_all(normal_4[Z][i_triangle]); for (__int32 i_light_4 = 0; i_light_4 < n_lights; i_light_4 += 4) { const __int32 n = min(n_lights - i_light_4, 4); __m128 light_position[4]; __m128 light_colour[4]; unsigned __int32 mask = 0x0; float intensity_4[4]; for (__int32 i_light = 0; i_light < n; i_light++) { mask |= 0x1 << i_light; const __int32 index = i_lights[i_light_4 + i_light]; intensity_4[i_light] = vertex_light_manager.light_sources[index].intensity; light_position[i_light] = load_u(vertex_light_manager.light_sources[index].position.f); light_colour[i_light] = load_u(vertex_light_manager.light_sources[index].colour.f); } Transpose(light_position); Transpose(light_colour); __m128 light_intensity = load_u(intensity_4); __m128 light_ray[3]; light_ray[X] = vertex_position[X] - light_position[X]; light_ray[Y] = vertex_position[Y] - light_position[Y]; light_ray[Z] = vertex_position[Z] - light_position[Z]; __m128 mag = (light_ray[X] * light_ray[X]) + (light_ray[Y] * light_ray[Y]) + (light_ray[Z] * light_ray[Z]); __m128 r_mag = _mm_rsqrt_ps(mag); light_ray[X] *= r_mag; light_ray[Y] *= r_mag; light_ray[Z] *= r_mag; __m128 dot = (normal[X] * light_ray[X]) + (normal[Y] * light_ray[Y]) + (normal[Z] * light_ray[Z]); dot &= dot > zero; __m128 r_distance = reciprocal(one + mag); __m128 spec = (dot * dot) * r_distance; static const __m128 specular_coefficient = set_all(2000.0f); static const __m128 diffuse_coefficient = set_all(200.0f); //printf_s(" %f ", dot); __m128i loop_mask = load_mask[mask]; for (__int32 i_channel = R; i_channel < A; i_channel++) { __m128 final = spec * specular_coefficient * light_colour[i_channel] * light_intensity; final += r_distance * diffuse_coefficient * light_colour[i_channel] * light_intensity; vertex_colour[i_channel] += final & loop_mask; } } Transpose(vertex_colour); vertex_colour[0] += vertex_colour[1] + vertex_colour[2] + vertex_colour[3]; float4_ temp; store_u(vertex_colour[0], temp.f); colour[i_triangle][i_vertex].x += temp.x; colour[i_triangle][i_vertex].y += temp.y; colour[i_triangle][i_vertex].z += temp.z; } }
/* ================== ================== */ void Vertex_Lighting( const __int32 n_triangles, const vertex_light_manager_& vertex_light_manager, const float4_ positions[4][3], float4_ colour[4][3] ) { static const float r_screen_scale_x = 1.0f / screen_scale_x; static const float r_screen_scale_y = 1.0f / screen_scale_y; const __m128 attenuation_factor = set_all(800.0f); const __m128 specular_scale = set_all(100.0f); const __m128 diffuse_scale = set_all(20.0f); const __m128 zero = set_all(0.0f); const __m128 one = set_all(1.0f); __m128 r_screen_scale[2]; r_screen_scale[X] = set_all(r_screen_scale_x); r_screen_scale[Y] = set_all(r_screen_scale_y); __m128 screen_shift[2]; screen_shift[X] = set_all(screen_shift_x); screen_shift[Y] = set_all(screen_shift_y); __m128 clip_space_position[3][4]; __m128 vertex_colour[3][4]; for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { __m128 vertex_position[4]; for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { vertex_position[i_triangle] = load_u(positions[i_triangle][i_vertex].f); vertex_colour[i_vertex][i_triangle] = load_u(colour[i_triangle][i_vertex].f); } Transpose(vertex_position); Transpose(vertex_colour[i_vertex]); __m128 depth = reciprocal(vertex_position[Z]); clip_space_position[i_vertex][X] = ((vertex_position[X] - screen_shift[X]) * r_screen_scale[X]) * depth; clip_space_position[i_vertex][Y] = ((vertex_position[Y] - screen_shift[Y]) * r_screen_scale[Y]) * depth; clip_space_position[i_vertex][Z] = depth; } __m128 a[3]; a[X] = clip_space_position[1][X] - clip_space_position[0][X]; a[Y] = clip_space_position[1][Y] - clip_space_position[0][Y]; a[Z] = clip_space_position[1][Z] - clip_space_position[0][Z]; __m128 b[3]; b[X] = clip_space_position[2][X] - clip_space_position[0][X]; b[Y] = clip_space_position[2][Y] - clip_space_position[0][Y]; b[Z] = clip_space_position[2][Z] - clip_space_position[0][Z]; __m128 normal[4]; normal[X] = (a[Y] * b[Z]) - (a[Z] * b[Y]); normal[Y] = (a[Z] * b[X]) - (a[X] * b[Z]); normal[Z] = (a[X] * b[Y]) - (a[Y] * b[X]); __m128 mag = (normal[X] * normal[X]) + (normal[Y] * normal[Y]) + (normal[Z] * normal[Z]); mag = _mm_rsqrt_ps(mag); normal[X] *= mag; normal[Y] *= mag; normal[Z] *= mag; for (__int32 i_light = 0; i_light < 1; i_light++) { for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { __m128 light_position[3]; __m128 light_colour[3]; const float intensity = vertex_light_manager.light_sources[i_light].intensity; for (__int32 i_axis = X; i_axis < W; i_axis++) { light_position[i_axis] = set_all(vertex_light_manager.light_sources[i_light].position.f[i_axis]); light_colour[i_axis] = set_all(vertex_light_manager.light_sources[i_light].colour.f[i_axis] * intensity); } const __m128 extent = set_all(40.0f); __m128i is_valid = set_all(-1); is_valid &= (clip_space_position[i_vertex][X] - light_position[X]) < extent; is_valid &= (clip_space_position[i_vertex][Y] - light_position[Y]) < extent; is_valid &= (clip_space_position[i_vertex][Z] - light_position[Z]) < extent; light_position[X] = set_all(0.0f); light_position[Y] = set_all(0.0f); light_position[Z] = set_all(0.0f); light_colour[X] = set_all(100.0f); light_colour[Y] = set_all(100.0f); light_colour[Z] = set_all(100.0f); __m128 light_ray[3]; light_ray[X] = clip_space_position[i_vertex][X] - light_position[X]; light_ray[Y] = clip_space_position[i_vertex][Y] - light_position[Y]; light_ray[Z] = clip_space_position[i_vertex][Z] - light_position[Z]; __m128 mag = (light_ray[X] * light_ray[X]) + (light_ray[Y] * light_ray[Y]) + (light_ray[Z] * light_ray[Z]); mag = _mm_rsqrt_ps(mag); light_ray[X] *= mag; light_ray[Y] *= mag; light_ray[Z] *= mag; __m128 dot = (normal[X] * light_ray[X]) + (normal[Y] * light_ray[Y]) + (normal[Z] * light_ray[Z]); dot &= dot > zero; dot = (dot * dot) * mag; __m128 distance = set_zero(); for (__int32 i_axis = X; i_axis < W; i_axis++) { __m128 d = light_position[i_axis] - clip_space_position[i_vertex][i_axis]; distance += (d * d); } __m128 scalar = reciprocal(distance) * attenuation_factor; scalar = max_vec(scalar, zero); scalar = min_vec(scalar, one); for (__int32 i_channel = R; i_channel < A; i_channel++) { vertex_colour[i_vertex][i_channel] += dot * specular_scale * light_colour[i_channel]; vertex_colour[i_vertex][i_channel] += mag * diffuse_scale * light_colour[i_channel]; } } } for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { Transpose(vertex_colour[i_vertex]); for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { store_u(vertex_colour[i_vertex][i_triangle], colour[i_triangle][i_vertex].f); } } }