// Calc DC for complex numbers in 4 SignalBlocks, divided by 4 SORA_EXTERN_C HRESULT BB11BGetAccurateDCOffset( IN PSORA_RADIO_RX_STREAM pRxStream, OUT vcs & dcOffset, OUT ULONG * pDescCount, OUT FLAG * touched) { int dcReSum = 0, dcImSum = 0; HRESULT hr = S_OK; ULONG count; SignalBlock block; for (count = 0; count < 4; count++) { hr = SoraRadioReadRxStream(pRxStream, touched, block); FAILED_BREAK(hr); dcOffset = SoraCalcDC(block); dcReSum += dcOffset[0].re; dcImSum += dcOffset[0].im; } *pDescCount += count; dcOffset[0].re = (short)(dcReSum >> 2); dcOffset[0].im = (short)(dcImSum >> 2); set_all(dcOffset, dcOffset[0]); return hr; }
/* ================== ================== */ void Process_Fragment_4x4( __int32 w_seed[2], __int32 i_tile_in, __int32 i_buffer_in, const unsigned __int32 coverage_mask, raster_output_& raster_output, shader_input_& shader_input ) { const __int32 i_buffer = i_buffer_in + (i_tile_in * 4 * 4); __m128i bazza[3][4]; for (__int32 i_edge = 0; i_edge < 2; i_edge++) { __m128i w_row = set_all(w_seed[i_edge]); bazza[i_edge][0] = w_row + load_u(raster_output.reject_table[0][i_edge][0]); bazza[i_edge][1] = w_row + load_u(raster_output.reject_table[0][i_edge][1]); bazza[i_edge][2] = w_row + load_u(raster_output.reject_table[0][i_edge][2]); bazza[i_edge][3] = w_row + load_u(raster_output.reject_table[0][i_edge][3]); } pixel_shader(i_buffer, coverage_mask, bazza, shader_input); const __int32 i_buffer_depth_4x4 = i_buffer / (4 * 4); const __int32 i_buffer_depth_16x16 = i_buffer / (16 * 16); const __int32 i_buffer_depth_64x64 = i_buffer / (64 * 64); shader_input.depth_tiles_4x4[i_buffer_depth_4x4] = shader_input.z_max; shader_input.tile_mask_16x16 |= one_bit_64 << i_buffer_depth_16x16; shader_input.tile_mask_64x64 |= one_bit_64 << i_buffer_depth_64x64; }
/** * @brief Constructor that takes a size and an optional bool value to initialize the Bitvector, * false by default. */ Bitvector (const size_t size, const bool initial_value = false) : size_(size) { // reserve enough bits, and init them. data_.resize( (size / IntSize) + (size % IntSize == 0 ? 0 : 1) ); set_all(initial_value); }
void fade(void) { clear(); int fade=0; for (fade=0; fade<255; fade++) { printf("%d\n", fade); set_all(fade,fade,fade); sleep(.2); } for (fade=254; fade>-1; fade--) { printf("%d\n", fade); set_all(fade, fade,fade); sleep(.2); } }
inline void IsingLattice::initialize(){ // initialize the grid (all up) set_all(spin_up); m_total_magnetization = Nx() * Ny() * Nz(); // initial total magnetization = number of sites // As the energy of this initial configuration is the lowest possible for the system anyway, // it is conveniant to just set it to zero: m_total_energy = 0; }
void StimulusGroup::set_active_pattern(unsigned int i) { stringstream oss; oss << "StimulusGroup:: Setting active pattern " << i ; logger->msg(oss.str(),DEBUG); set_all( 0.0 ); if ( i < stimuli.size() ) { set_pattern_activity(i); } redraw(); }
void StimulusGroup::init(string filename, StimulusGroupModeType stimulusmode, string outputfile, AurynFloat baserate) { sys->register_spiking_group(this); ttl = new AurynTime [get_rank_size()]; activity = new AurynFloat [get_rank_size()]; set_baserate(baserate); poisson_gen.seed(162346*communicator->rank()); mean_off_period = 1.0 ; mean_on_period = 0.2 ; stimulus_order = stimulusmode ; stimulus_active = false ; set_all( 0.0 ); scale = 2.0; randomintervals = true; binary_patterns = false; if ( !outputfile.empty() ) { tiserfile.open(outputfile.c_str(),ios::out); if (!tiserfile) { stringstream oss; oss << "StimulusGroup:: Can't open output file " << filename; logger->msg(oss.str(),ERROR); exit(1); } tiserfile.setf(ios::fixed); // tiserfile.precision(5); } stringstream oss; oss << "StimulusGroup:: In mode " << stimulus_order; logger->msg(oss.str(),NOTIFICATION); cur_stim_index = 0; next_action_time = 0; active = true; off_pattern = -1; load_patterns(filename); }
/* ================== ================== */ void Process_Fragment_64x64( __int32 w_seed[2], __int32 i_buffer_in, const unsigned __int32 coverage_mask, raster_output_& raster_output, shader_input_& shader_input ) { __int32 w_table[2][4 * 4]; for (__int32 i_edge = 0; i_edge < 2; i_edge++) { __m128i temp[4]; __m128i w_row = set_all(w_seed[i_edge]); temp[0] = w_row + load_u(raster_output.reject_table[2][i_edge][0]); temp[1] = w_row + load_u(raster_output.reject_table[2][i_edge][1]); temp[2] = w_row + load_u(raster_output.reject_table[2][i_edge][2]); temp[3] = w_row + load_u(raster_output.reject_table[2][i_edge][3]); store_u(temp[0], w_table[i_edge] + (0 << 2)); store_u(temp[1], w_table[i_edge] + (1 << 2)); store_u(temp[2], w_table[i_edge] + (2 << 2)); store_u(temp[3], w_table[i_edge] + (3 << 2)); } for (__int32 i_tile = 0; i_tile < 16; i_tile++) { __int32 w_tile[2]; w_tile[0] = w_table[0][i_tile]; w_tile[1] = w_table[1][i_tile]; Process_Fragment_16x16( w_tile, i_tile, i_buffer_in, coverage_mask, raster_output, shader_input ); } }
void RealMatrix::identity(void) { set_all(0.0); for(int i=0; i<n && i<m; ++i) operator()(i,i) = 1.0; }
/* ================== ================== */ void pixel_shader( const unsigned __int32 i_buffer, const unsigned __int32 coverage_mask, const __m128i bazza[3][4], shader_input_& shader_input ) { static const __m128 zero = set_zero(); static const __m128 half = set_all(0.5f); static const __m128 one = set_all(1.0f); static const __m128 two = one + one; static const __m128 three = two + one; static const __m128i zero_int = set_zero_si128(); static const __m128 colour_clamp = broadcast(load_s(255.0f)); unsigned __int32 depth_mask = 0x0; __m128 w_screen[2][4]; w_screen[0][0] = convert_float(bazza[0][0]) * shader_input.r_area; w_screen[0][1] = convert_float(bazza[0][1]) * shader_input.r_area; w_screen[0][2] = convert_float(bazza[0][2]) * shader_input.r_area; w_screen[0][3] = convert_float(bazza[0][3]) * shader_input.r_area; w_screen[1][0] = convert_float(bazza[1][0]) * shader_input.r_area; w_screen[1][1] = convert_float(bazza[1][1]) * shader_input.r_area; w_screen[1][2] = convert_float(bazza[1][2]) * shader_input.r_area; w_screen[1][3] = convert_float(bazza[1][3]) * shader_input.r_area; __m128 z_screen[4]; z_screen[0] = (shader_input.z_delta[X] * w_screen[0][0]) + (shader_input.z_delta[Y] * w_screen[1][0]) + shader_input.z_delta[Z]; z_screen[1] = (shader_input.z_delta[X] * w_screen[0][1]) + (shader_input.z_delta[Y] * w_screen[1][1]) + shader_input.z_delta[Z]; z_screen[2] = (shader_input.z_delta[X] * w_screen[0][2]) + (shader_input.z_delta[Y] * w_screen[1][2]) + shader_input.z_delta[Z]; z_screen[3] = (shader_input.z_delta[X] * w_screen[0][3]) + (shader_input.z_delta[Y] * w_screen[1][3]) + shader_input.z_delta[Z]; { //if (shader_input.is_test) { // __m128 x = convert_float(set_all(shader_input.x)); // __m128 y = convert_float(set_all(shader_input.y)); // y += set_all(0.5f); // x += set_all(0.5f); // x += set(0.0f, 1.0f, 2.0f, 3.0f); // __m128 y_block[4]; // y_block[0] = y; // y_block[1] = y + one; // y_block[2] = y + two; // y_block[3] = y + three; // __m128 z_interpolant[3]; // z_interpolant[X] = set_all(shader_input.depth_interpolants[X]); // z_interpolant[Y] = set_all(shader_input.depth_interpolants[Y]); // z_interpolant[Z] = set_all(shader_input.depth_interpolants[Z]); // z_screen[0] = (z_interpolant[X] * x) + (z_interpolant[Y] * y_block[0]) + z_interpolant[Z]; // z_screen[1] = (z_interpolant[X] * x) + (z_interpolant[Y] * y_block[1]) + z_interpolant[Z]; // z_screen[2] = (z_interpolant[X] * x) + (z_interpolant[Y] * y_block[2]) + z_interpolant[Z]; // z_screen[3] = (z_interpolant[X] * x) + (z_interpolant[Y] * y_block[3]) + z_interpolant[Z]; //} } __m128i pixel_mask[4]; pixel_mask[0] = load_mask[(coverage_mask >> 0) & 0xf]; pixel_mask[1] = load_mask[(coverage_mask >> 4) & 0xf]; pixel_mask[2] = load_mask[(coverage_mask >> 8) & 0xf]; pixel_mask[3] = load_mask[(coverage_mask >> 12) & 0xf]; __m128 z_buffer[4]; z_buffer[0] = load(shader_input.depth_buffer + i_buffer + 0); z_buffer[1] = load(shader_input.depth_buffer + i_buffer + 4); z_buffer[2] = load(shader_input.depth_buffer + i_buffer + 8); z_buffer[3] = load(shader_input.depth_buffer + i_buffer + 12); __m128i z_mask[4]; z_mask[0] = (z_screen[0] > z_buffer[0]) & pixel_mask[0]; z_mask[1] = (z_screen[1] > z_buffer[1]) & pixel_mask[1]; z_mask[2] = (z_screen[2] > z_buffer[2]) & pixel_mask[2]; z_mask[3] = (z_screen[3] > z_buffer[3]) & pixel_mask[3]; depth_mask |= store_mask(z_mask[0]) << 0; depth_mask |= store_mask(z_mask[1]) << 4; depth_mask |= store_mask(z_mask[2]) << 8; depth_mask |= store_mask(z_mask[3]) << 12; __m128 z_write[4]; z_write[0] = blend(z_screen[0], z_buffer[0], z_mask[0]); z_write[1] = blend(z_screen[1], z_buffer[1], z_mask[1]); z_write[2] = blend(z_screen[2], z_buffer[2], z_mask[2]); z_write[3] = blend(z_screen[3], z_buffer[3], z_mask[3]); { __m128 z_max; z_max = z_write[0]; z_max = min_vec(z_write[1], z_max); z_max = min_vec(z_write[2], z_max); z_max = min_vec(z_write[3], z_max); __m128 z_out = z_max; z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); shader_input.z_max = store_s(z_out); } store(z_write[0], shader_input.depth_buffer + i_buffer + 0); store(z_write[1], shader_input.depth_buffer + i_buffer + 4); store(z_write[2], shader_input.depth_buffer + i_buffer + 8); store(z_write[3], shader_input.depth_buffer + i_buffer + 12); if (depth_mask == 0x0) { return; } __m128 screen_barry[2][4]; screen_barry[0][0] = (w_screen[0][0] * shader_input.barycentric[0][X]) + (w_screen[1][0] * shader_input.barycentric[0][Y]) + shader_input.barycentric[0][Z]; screen_barry[0][1] = (w_screen[0][1] * shader_input.barycentric[0][X]) + (w_screen[1][1] * shader_input.barycentric[0][Y]) + shader_input.barycentric[0][Z]; screen_barry[0][2] = (w_screen[0][2] * shader_input.barycentric[0][X]) + (w_screen[1][2] * shader_input.barycentric[0][Y]) + shader_input.barycentric[0][Z]; screen_barry[0][3] = (w_screen[0][3] * shader_input.barycentric[0][X]) + (w_screen[1][3] * shader_input.barycentric[0][Y]) + shader_input.barycentric[0][Z]; screen_barry[1][0] = (w_screen[0][0] * shader_input.barycentric[1][X]) + (w_screen[1][0] * shader_input.barycentric[1][Y]) + shader_input.barycentric[1][Z]; screen_barry[1][1] = (w_screen[0][1] * shader_input.barycentric[1][X]) + (w_screen[1][1] * shader_input.barycentric[1][Y]) + shader_input.barycentric[1][Z]; screen_barry[1][2] = (w_screen[0][2] * shader_input.barycentric[1][X]) + (w_screen[1][2] * shader_input.barycentric[1][Y]) + shader_input.barycentric[1][Z]; screen_barry[1][3] = (w_screen[0][3] * shader_input.barycentric[1][X]) + (w_screen[1][3] * shader_input.barycentric[1][Y]) + shader_input.barycentric[1][Z]; __m128 r_depth[4]; r_depth[0] = reciprocal(z_screen[0]); r_depth[1] = reciprocal(z_screen[1]); r_depth[2] = reciprocal(z_screen[2]); r_depth[3] = reciprocal(z_screen[3]); __m128 w_clip[2][4]; w_clip[0][0] = screen_barry[0][0] * r_depth[0]; w_clip[0][1] = screen_barry[0][1] * r_depth[1]; w_clip[0][2] = screen_barry[0][2] * r_depth[2]; w_clip[0][3] = screen_barry[0][3] * r_depth[3]; w_clip[1][0] = screen_barry[1][0] * r_depth[0]; w_clip[1][1] = screen_barry[1][1] * r_depth[1]; w_clip[1][2] = screen_barry[1][2] * r_depth[2]; w_clip[1][3] = screen_barry[1][3] * r_depth[3]; __m128i colour_out[4]; { const vertex4_* gradients = shader_input.gradients[ATTRIBUTE_COLOUR]; __m128 red_float[4]; red_float[0] = (gradients[R].x * w_clip[0][0]) + (gradients[R].y * w_clip[1][0]) + gradients[R].z; red_float[1] = (gradients[R].x * w_clip[0][1]) + (gradients[R].y * w_clip[1][1]) + gradients[R].z; red_float[2] = (gradients[R].x * w_clip[0][2]) + (gradients[R].y * w_clip[1][2]) + gradients[R].z; red_float[3] = (gradients[R].x * w_clip[0][3]) + (gradients[R].y * w_clip[1][3]) + gradients[R].z; __m128 green_float[4]; green_float[0] = (gradients[G].x * w_clip[0][0]) + (gradients[G].y * w_clip[1][0]) + gradients[G].z; green_float[1] = (gradients[G].x * w_clip[0][1]) + (gradients[G].y * w_clip[1][1]) + gradients[G].z; green_float[2] = (gradients[G].x * w_clip[0][2]) + (gradients[G].y * w_clip[1][2]) + gradients[G].z; green_float[3] = (gradients[G].x * w_clip[0][3]) + (gradients[G].y * w_clip[1][3]) + gradients[G].z; __m128 blue_float[4]; blue_float[0] = (gradients[B].x * w_clip[0][0]) + (gradients[B].y * w_clip[1][0]) + gradients[B].z; blue_float[1] = (gradients[B].x * w_clip[0][1]) + (gradients[B].y * w_clip[1][1]) + gradients[B].z; blue_float[2] = (gradients[B].x * w_clip[0][2]) + (gradients[B].y * w_clip[1][2]) + gradients[B].z; blue_float[3] = (gradients[B].x * w_clip[0][3]) + (gradients[B].y * w_clip[1][3]) + gradients[B].z; red_float[0] = min_vec(max_vec(red_float[0], zero), colour_clamp); red_float[1] = min_vec(max_vec(red_float[1], zero), colour_clamp); red_float[2] = min_vec(max_vec(red_float[2], zero), colour_clamp); red_float[3] = min_vec(max_vec(red_float[3], zero), colour_clamp); green_float[0] = min_vec(max_vec(green_float[0], zero), colour_clamp); green_float[1] = min_vec(max_vec(green_float[1], zero), colour_clamp); green_float[2] = min_vec(max_vec(green_float[2], zero), colour_clamp); green_float[3] = min_vec(max_vec(green_float[3], zero), colour_clamp); blue_float[0] = min_vec(max_vec(blue_float[0], zero), colour_clamp); blue_float[1] = min_vec(max_vec(blue_float[1], zero), colour_clamp); blue_float[2] = min_vec(max_vec(blue_float[2], zero), colour_clamp); blue_float[3] = min_vec(max_vec(blue_float[3], zero), colour_clamp); __m128i red_int[4]; red_int[0] = convert_int_trunc(red_float[0]); red_int[1] = convert_int_trunc(red_float[1]); red_int[2] = convert_int_trunc(red_float[2]); red_int[3] = convert_int_trunc(red_float[3]); __m128i green_int[4]; green_int[0] = convert_int_trunc(green_float[0]); green_int[1] = convert_int_trunc(green_float[1]); green_int[2] = convert_int_trunc(green_float[2]); green_int[3] = convert_int_trunc(green_float[3]); __m128i blue_int[4]; blue_int[0] = convert_int_trunc(blue_float[0]); blue_int[1] = convert_int_trunc(blue_float[1]); blue_int[2] = convert_int_trunc(blue_float[2]); blue_int[3] = convert_int_trunc(blue_float[3]); colour_out[0] = red_int[0] | (green_int[0] << 8) | (blue_int[0] << 16); colour_out[1] = red_int[1] | (green_int[1] << 8) | (blue_int[1] << 16); colour_out[2] = red_int[2] | (green_int[2] << 8) | (blue_int[2] << 16); colour_out[3] = red_int[3] | (green_int[3] << 8) | (blue_int[3] << 16); } float4_ u_table[4]; float4_ v_table[4]; { const vertex4_* gradients = shader_input.gradients[ATTRIBUTE_TEXCOORD]; __m128 u_axis[4]; u_axis[0] = (gradients[U].x * w_clip[0][0]) + (gradients[U].y * w_clip[1][0]) + gradients[U].z; u_axis[1] = (gradients[U].x * w_clip[0][1]) + (gradients[U].y * w_clip[1][1]) + gradients[U].z; u_axis[2] = (gradients[U].x * w_clip[0][2]) + (gradients[U].y * w_clip[1][2]) + gradients[U].z; u_axis[3] = (gradients[U].x * w_clip[0][3]) + (gradients[U].y * w_clip[1][3]) + gradients[U].z; __m128 v_axis[4]; v_axis[0] = (gradients[V].x * w_clip[0][0]) + (gradients[V].y * w_clip[1][0]) + gradients[V].z; v_axis[1] = (gradients[V].x * w_clip[0][1]) + (gradients[V].y * w_clip[1][1]) + gradients[V].z; v_axis[2] = (gradients[V].x * w_clip[0][2]) + (gradients[V].y * w_clip[1][2]) + gradients[V].z; v_axis[3] = (gradients[V].x * w_clip[0][3]) + (gradients[V].y * w_clip[1][3]) + gradients[V].z; store_u(u_axis[0], u_table[0].f); store_u(u_axis[1], u_table[1].f); store_u(u_axis[2], u_table[2].f); store_u(u_axis[3], u_table[3].f); store_u(v_axis[0], v_table[0].f); store_u(v_axis[1], v_table[1].f); store_u(v_axis[2], v_table[2].f); store_u(v_axis[3], v_table[3].f); } const texture_handler_& texture_handler = *shader_input.texture_handler; float2_ du; du.x = (u_table[0].f[3] - u_table[0].f[0]) * (float)texture_handler.width; du.y = (u_table[3].f[0] - u_table[0].f[0]) * (float)texture_handler.width; float2_ dv; dv.x = (v_table[0].f[3] - v_table[0].f[0]) * (float)texture_handler.height; dv.y = (v_table[3].f[0] - v_table[0].f[0]) * (float)texture_handler.height; float area = abs((du.x * dv.y) - (du.y * dv.x)) * shader_input.mip_level_bias; unsigned long area_int = 1 + (unsigned long)(area + 0.5f); __int32 i_mip_floor; _BitScanReverse((unsigned long*)&i_mip_floor, area_int); i_mip_floor = max(i_mip_floor, 0); i_mip_floor = min(i_mip_floor, texture_handler.n_mip_levels - 1); const __int32 width = texture_handler.width >> i_mip_floor; const __int32 height = texture_handler.height >> i_mip_floor; const __int32 shift = texture_handler.width_shift - i_mip_floor; const __m128i texture_width_int = set_all(width); const __m128 texture_width = convert_float(set_all(width)); const __m128 texture_height = convert_float(set_all(height)); const __m128i width_clamp = set_all(width - 1); const __m128i height_clamp = set_all(height - 1); const __m128i width_shift = load_s(shift); __m128i tex_out[4]; { __m128 u_axis[4]; u_axis[0] = (load_u(u_table[0].f) * texture_width); // - half; u_axis[1] = (load_u(u_table[1].f) * texture_width); // - half; u_axis[2] = (load_u(u_table[2].f) * texture_width); // - half; u_axis[3] = (load_u(u_table[3].f) * texture_width); // - half; __m128 v_axis[4]; v_axis[0] = (load_u(v_table[0].f) * texture_height); // - half; v_axis[1] = (load_u(v_table[1].f) * texture_height); // - half; v_axis[2] = (load_u(v_table[2].f) * texture_height); // - half; v_axis[3] = (load_u(v_table[3].f) * texture_height); // - half; __m128i u_int[4]; u_int[0] = convert_int_trunc(u_axis[0]); u_int[1] = convert_int_trunc(u_axis[1]); u_int[2] = convert_int_trunc(u_axis[2]); u_int[3] = convert_int_trunc(u_axis[3]); __m128i v_int[4]; v_int[0] = convert_int_trunc(v_axis[0]); v_int[1] = convert_int_trunc(v_axis[1]); v_int[2] = convert_int_trunc(v_axis[2]); v_int[3] = convert_int_trunc(v_axis[3]); u_int[0] = max_vec(min_vec(u_int[0], width_clamp), zero_int); u_int[1] = max_vec(min_vec(u_int[1], width_clamp), zero_int); u_int[2] = max_vec(min_vec(u_int[2], width_clamp), zero_int); u_int[3] = max_vec(min_vec(u_int[3], width_clamp), zero_int); v_int[0] = max_vec(min_vec(v_int[0], height_clamp), zero_int); v_int[1] = max_vec(min_vec(v_int[1], height_clamp), zero_int); v_int[2] = max_vec(min_vec(v_int[2], height_clamp), zero_int); v_int[3] = max_vec(min_vec(v_int[3], height_clamp), zero_int); __m128i i_texels[4]; i_texels[0] = u_int[0] + (v_int[0] * texture_width_int); i_texels[1] = u_int[1] + (v_int[1] * texture_width_int); i_texels[2] = u_int[2] + (v_int[2] * texture_width_int); i_texels[3] = u_int[3] + (v_int[3] * texture_width_int); __int32 i_texels_in[4][4]; store_u(i_texels[0], i_texels_in[0]); store_u(i_texels[1], i_texels_in[1]); store_u(i_texels[2], i_texels_in[2]); store_u(i_texels[3], i_texels_in[3]); unsigned __int32 texels_out[4][4]; texels_out[0][0] = texture_handler.texture[i_mip_floor][i_texels_in[0][0]]; texels_out[0][1] = texture_handler.texture[i_mip_floor][i_texels_in[0][1]]; texels_out[0][2] = texture_handler.texture[i_mip_floor][i_texels_in[0][2]]; texels_out[0][3] = texture_handler.texture[i_mip_floor][i_texels_in[0][3]]; texels_out[1][0] = texture_handler.texture[i_mip_floor][i_texels_in[1][0]]; texels_out[1][1] = texture_handler.texture[i_mip_floor][i_texels_in[1][1]]; texels_out[1][2] = texture_handler.texture[i_mip_floor][i_texels_in[1][2]]; texels_out[1][3] = texture_handler.texture[i_mip_floor][i_texels_in[1][3]]; texels_out[2][0] = texture_handler.texture[i_mip_floor][i_texels_in[2][0]]; texels_out[2][1] = texture_handler.texture[i_mip_floor][i_texels_in[2][1]]; texels_out[2][2] = texture_handler.texture[i_mip_floor][i_texels_in[2][2]]; texels_out[2][3] = texture_handler.texture[i_mip_floor][i_texels_in[2][3]]; texels_out[3][0] = texture_handler.texture[i_mip_floor][i_texels_in[3][0]]; texels_out[3][1] = texture_handler.texture[i_mip_floor][i_texels_in[3][1]]; texels_out[3][2] = texture_handler.texture[i_mip_floor][i_texels_in[3][2]]; texels_out[3][3] = texture_handler.texture[i_mip_floor][i_texels_in[3][3]]; tex_out[0] = load_u(texels_out[0]); tex_out[1] = load_u(texels_out[1]); tex_out[2] = load_u(texels_out[2]); tex_out[3] = load_u(texels_out[3]); } __m128i colour_buffer[4]; colour_buffer[0] = load(shader_input.colour_buffer + i_buffer + 0); colour_buffer[1] = load(shader_input.colour_buffer + i_buffer + 4); colour_buffer[2] = load(shader_input.colour_buffer + i_buffer + 8); colour_buffer[3] = load(shader_input.colour_buffer + i_buffer + 12); colour_buffer[0] = _mm_andnot_si128(z_mask[0], colour_buffer[0]); colour_buffer[1] = _mm_andnot_si128(z_mask[1], colour_buffer[1]); colour_buffer[2] = _mm_andnot_si128(z_mask[2], colour_buffer[2]); colour_buffer[3] = _mm_andnot_si128(z_mask[3], colour_buffer[3]); colour_buffer[0] = add_uint8_saturate(colour_buffer[0], colour_out[0] & z_mask[0]); colour_buffer[1] = add_uint8_saturate(colour_buffer[1], colour_out[1] & z_mask[1]); colour_buffer[2] = add_uint8_saturate(colour_buffer[2], colour_out[2] & z_mask[2]); colour_buffer[3] = add_uint8_saturate(colour_buffer[3], colour_out[3] & z_mask[3]); colour_buffer[0] = add_uint8_saturate(colour_buffer[0], tex_out[0] & z_mask[0]); colour_buffer[1] = add_uint8_saturate(colour_buffer[1], tex_out[1] & z_mask[1]); colour_buffer[2] = add_uint8_saturate(colour_buffer[2], tex_out[2] & z_mask[2]); colour_buffer[3] = add_uint8_saturate(colour_buffer[3], tex_out[3] & z_mask[3]); store(colour_buffer[0], shader_input.colour_buffer + i_buffer + 0); store(colour_buffer[1], shader_input.colour_buffer + i_buffer + 4); store(colour_buffer[2], shader_input.colour_buffer + i_buffer + 8); store(colour_buffer[3], shader_input.colour_buffer + i_buffer + 12); }
void test_bitmap_set_all(opal_bitmap_t *bm) { int result = set_all(bm); TEST_AND_REPORT(result, 0, " error in opal_bitmap_set_ala_bitsl"); }
int main(int argc, char* argv[]) { #ifdef __linux__ struct sigaction sig_struct; sig_struct.sa_handler = sig_handler; sig_struct.sa_flags = 0; sigemptyset(&sig_struct.sa_mask); if (sigaction(SIGINT, &sig_struct, NULL) == -1) { cout << "Problem with sigaction" << endl; exit(1); } #endif // __linux__ /// === File read needed if moving something from PC to here // myFP = fopen(netname, "a+"); // if(myFP == NULL) // { // cout<<"ERROR opening"<<endl; // exit(1); // } // fread(read_buf, 1, 100, myFP); // int buffersize = strlen(read_buf); // fclose(myFP); // cout<<"read the file: "<<read_buf<<endl; // ============================================================= int lcdp=lcd_open(); int adcp=ADS1015_Init("/dev/i2c-1"); PCA9685 myPCA={0x40, 0, 69, 0, 0, 0x11, 0x4, 50, 0x79,}; // control structure myPCA.file=PCA_Init("/dev/i2c-1"); PCA9685_start(myPCA.file); //adcresult=read_convert_register(adcp); //sprintf(dis_buf, "ADC: %6.3f V", adcresult); //lcd_write(dis_buf); lcd_write("Hello from Steve's\nLCD stuff"); lcd_clear(); get_NIST(); mcp23s17_enable_interrupts(GPIO_INTERRUPT_PIN); //mcp23s17_enable_interrupts(SW_GPIO_INTERRUPT_PIN); cout.setf(ios::fixed); //=== SET CURRENT TIME ========================== struct tm *newtime; //--- for time now time_t long_time; //--- Get time as long integer. double DeltaT=0.0; //--- time since is in minutes Observer PLACENTIA={"Yorba Linda",Rad(33.909),Rad(-117.782),30.0,0}; //Observer PHILLY={"Philly",Rad(40.0),Rad(-75.0),0.0,0}; double sdctime; SATELSET Eset; SATPOS satpos; //ELLIPSE myEllipse; //double SP,JDG,E2JD,JDN; double JDG,E2JD,JDN; VectorIJK test,test1; //ptest; //VectLook testlook; SATSUB SB; clock_t goal; clock_t wait=(clock_t)2 * CLOCKS_PER_SEC; // change the 2 for update rate, 2= about 2 seconds Read_TLE(argv[1], Eset); // read the 2 line data do { time( &long_time ); newtime=gmtime( &long_time ); // time, expressed as a UTC time, GMT timezone JDN=JD_Now(newtime); //--- JD based on system clock as GMT JDG=ThetaG_JD(JDN); //--- in radians E2JD=Epoch2JD(Eset.iEpochYear,Eset.dEpochDay); //--- JD based on TLE epoch double local_time=0.0; double test_time=0.0; local_time=newtime->tm_yday+1+(newtime->tm_hour+(newtime->tm_min+newtime->tm_sec/60.0)/60.0)/24.0; test_time=local_time-Eset.dEpochDay; //cout<<"test_time delta days "<<test_time<<endl; test_time*=1440.0; //cout<<"test_time delta minutes "<<test_time<<endl; /************************************** local_time minus Eset.dEpochDay matches JDN-E2JD. And is easier to check and calculate and no need for all the JD and JD0 code. *************************************/ sdctime=JDN-E2JD; //--- delta days sdctime*=1440.0; // delta minutes //sdctime=fmod(sdctime,60); //cout<<"Current sdctime "<<sdctime<<endl; DeltaT=sdctime; //satpos=SatPos(DeltaT, &Eset); //--- get satellite position satpos=clean_SatPos(DeltaT, &Eset); cout<<"=====Satellite ECI position============================\n"<<satpos; test=Obs_Position(PLACENTIA,JDG); //--- get observer position //test1=Obs_to_ECI(PHILLY,JDG); //-- test data from TS Kelso test1=Obs_to_ECI(PLACENTIA,JDG); testlook=LookAngles(satpos, PLACENTIA,JDG); //--- get look angles SB= SatSubPoint(satpos,JDG); cout<<"=====Observer ECI====================\n"<<test1; cout<<"=====Observer Look angles============\n"<<testlook; // for antenna tracker cout<<"=====Sat Sub Point===================\n"<<SB; /// used before //int s_count=read_convert_register_count(adcp); //set_count(myPCA.file, 0, 5, s_count); // file channel, start count, end count /// LCD setup and stuff adcresult=read_convert_register_volts(adcp); sprintf(dis_buf, "ADC: %6.3f V", adcresult); lcd_write(dis_buf); /// aztovolts is the target reference position double aztovolts = (Deg(testlook.AZ)) * (3.2/360.0); /// wtf is the difference of the pot input, adcresults, and reference double wtf = aztovolts - adcresult; /// double - float printf("\nVOLTS ADC: %6.3f V\n",adcresult); printf("AZ Degrees: %6.3f \n",Deg(testlook.AZ)); printf("AZ to volts: %6.3f V\n",aztovolts); printf("DELTA: %6.3f \n",wtf); /** volts 0 1.6 3.2 count 200 320 450 max left no motion max right 1 ms 1.5 ms 2ms 50 hz timing **/ /// for applying delta Vin //float PCAcount = (wtf*80)+320; ///this is for 0 - 3.2 Vin float PCAcount = (wtf*80)+300; ///this is for 0 - 3.2 Vin, 340 from measurements if(wtf< -1.25) PCAcount = 240; else if(wtf> 1.25) PCAcount = 425; //set_count(myPCA.file, 0, 5, PCAcount); // file channel, start count, end count set_count(myPCA.file, 0, 1, PCAcount); // file channel, start count, end count //set_count(myPCA.file, 1, 1, PCAcount); // file channel, start count, end count printf("MOTOR count: %6.3f \n",PCAcount); //#define TRACK 0 //#define LOCATION 1 //#define SATDATA 2 //#define NIST 3 if(display_count < 5) { display_control(TRACK, PLACENTIA, SB, Eset, testlook); display_count++; LED_off(GPIO_INTERRUPT_PIN); } else { display_control(LOCATION, PLACENTIA, SB, Eset, testlook); display_count++; LED_on(GPIO_INTERRUPT_PIN); } if(display_count > 10) { display_count = 0; } /** ==================== Look angles:visible AZ:123456 EL:123456 Sat LAT/LONG LT:123456 LG:123456 ==================== Location Yorba Linda LT:123456 LG:123456 Range: 123456 ==================== Tracking:ISS (ZARYA) Incl:12345 MM: 123456 MA: 123456 **/ /// LCD done goal = wait + clock(); while( goal > clock() ); #ifdef __linux__ if(ctrl_c_pressed) { cout << "Ctrl^C Pressed" << endl; cout << "unexporting pins" << endl; //gpio26->unexport_gpio(); //gpio16->unexport_gpio(); mcp23s17_disable_interrupts(GPIO_INTERRUPT_PIN); //mcp23s17_disable_interrupts(SW_GPIO_INTERRUPT_PIN); cout << "deallocating GPIO Objects" << endl; //delete gpio26; //gpio26 = 0; //delete gpio16; //gpio16 =0; break; } #endif // __linux__ } #ifdef __linux__ while(1); #elif _WIN32 while(!(_kbhit())); #else #endif //while(1); //while(!(_kbhit())); //pthread_exit(NULL); set_all(myPCA.file, 0, 0); /// kill the servos lcd_close(); /// kill the LCD return 0; }
int check_record(struct database *db, struct Table* theTable, char* key, char predicate_name[MAX_COLUMNS_PER_TABLE][MAX_COLNAME_LEN+1], char predicate_value[MAX_COLUMNS_PER_TABLE][MAX_STRTYPE_SIZE+1], char*return_string) { int i = 0; while(predicate_name[i][0] != '\0') { printf("Column %d: %s\n Value %d: %s\n", i, predicate_name[i], i, predicate_value[i]); i++; } //check if out of order int pn_cnt = 0; int cn_cnt = 0; while(predicate_name[pn_cnt][0] != '\0') { while(theTable->col_names[cn_cnt][0] != '\0') { if(strcmp(predicate_name[pn_cnt], theTable->col_names[cn_cnt]) == 0) { cn_cnt++; break; } cn_cnt++; } if(theTable->col_names[cn_cnt][0] == '\0') { pn_cnt++; if(predicate_name[pn_cnt][0] != '\0') { strcpy(return_string, "E INVALID_PARAM"); return -3; } if (predicate_name[pn_cnt][0] == '\0') { if(strcmp(predicate_name[pn_cnt-1], theTable->col_names[cn_cnt-1]) != 0) { strcpy(return_string, "E INVALID_PARAM"); return -3; } } } pn_cnt++; } int predicate_names_cnt = 0; return_string[0] = 'S'; return_string[1] = '\0'; int found = 0; while(predicate_name[predicate_names_cnt][0] != '\0') { printf("WHAT IS THE PREDICATE NAME? :%s\n",predicate_name[predicate_names_cnt] ); int col_names_cnt = 0; while(theTable->col_names[col_names_cnt][0] != '\0') { printf("WHAT IS THE COL NAME? :%s\n",theTable->col_names[col_names_cnt] ); if(strcmp(theTable->col_names[col_names_cnt], predicate_name[predicate_names_cnt]) == 0) {printf("AM HERE\n"); found = 1; if(theTable->col_string_size[col_names_cnt] == 0) { int value_cnt = 0; if((predicate_value[predicate_names_cnt][value_cnt] == '+') ||(predicate_value[predicate_names_cnt][value_cnt] == '-')) { value_cnt++; } while(predicate_value[predicate_names_cnt][value_cnt] != '\0') {printf("AM HERE pnamecnt is %s\n", predicate_value[predicate_names_cnt]); if(isdigit(predicate_value[predicate_names_cnt][value_cnt]) == 0) {printf("AM HERE IS WRONG:%c:\n", predicate_value[predicate_names_cnt][value_cnt]); strcpy(return_string, "E INVALID_PARAM"); return -3; } value_cnt++; } } } col_names_cnt++; } if(found == 0) { strcpy(return_string, "E INVALID_PARAM"); return -3; } found = 0; predicate_names_cnt++; } int stat = set_all(db, theTable, key, predicate_name, predicate_value); return stat; }
void WorkerDataArray<T>::clear() { set_all(0); }
void RealMatrix::one(void) { set_all(1.0); }
/* ================== ================== */ void Vertex_Lighting( const __int32 n_triangles, const vertex_light_manager_& vertex_light_manager, const float4_ positions[4][3], float4_ colour[4][3] ) { static const float r_screen_scale_x = 1.0f / screen_scale_x; static const float r_screen_scale_y = 1.0f / screen_scale_y; const __m128 attenuation_factor = set_all(800.0f); const __m128 specular_scale = set_all(100.0f); const __m128 diffuse_scale = set_all(20.0f); const __m128 zero = set_all(0.0f); const __m128 one = set_all(1.0f); __m128 r_screen_scale[2]; r_screen_scale[X] = set_all(r_screen_scale_x); r_screen_scale[Y] = set_all(r_screen_scale_y); __m128 screen_shift[2]; screen_shift[X] = set_all(screen_shift_x); screen_shift[Y] = set_all(screen_shift_y); __m128 clip_space_position[3][4]; __m128 vertex_colour[3][4]; for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { __m128 vertex_position[4]; for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { vertex_position[i_triangle] = load_u(positions[i_triangle][i_vertex].f); vertex_colour[i_vertex][i_triangle] = load_u(colour[i_triangle][i_vertex].f); } Transpose(vertex_position); Transpose(vertex_colour[i_vertex]); __m128 depth = reciprocal(vertex_position[Z]); clip_space_position[i_vertex][X] = ((vertex_position[X] - screen_shift[X]) * r_screen_scale[X]) * depth; clip_space_position[i_vertex][Y] = ((vertex_position[Y] - screen_shift[Y]) * r_screen_scale[Y]) * depth; clip_space_position[i_vertex][Z] = depth; } __m128 a[3]; a[X] = clip_space_position[1][X] - clip_space_position[0][X]; a[Y] = clip_space_position[1][Y] - clip_space_position[0][Y]; a[Z] = clip_space_position[1][Z] - clip_space_position[0][Z]; __m128 b[3]; b[X] = clip_space_position[2][X] - clip_space_position[0][X]; b[Y] = clip_space_position[2][Y] - clip_space_position[0][Y]; b[Z] = clip_space_position[2][Z] - clip_space_position[0][Z]; __m128 normal[4]; normal[X] = (a[Y] * b[Z]) - (a[Z] * b[Y]); normal[Y] = (a[Z] * b[X]) - (a[X] * b[Z]); normal[Z] = (a[X] * b[Y]) - (a[Y] * b[X]); __m128 mag = (normal[X] * normal[X]) + (normal[Y] * normal[Y]) + (normal[Z] * normal[Z]); mag = _mm_rsqrt_ps(mag); normal[X] *= mag; normal[Y] *= mag; normal[Z] *= mag; for (__int32 i_light = 0; i_light < 1; i_light++) { for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { __m128 light_position[3]; __m128 light_colour[3]; const float intensity = vertex_light_manager.light_sources[i_light].intensity; for (__int32 i_axis = X; i_axis < W; i_axis++) { light_position[i_axis] = set_all(vertex_light_manager.light_sources[i_light].position.f[i_axis]); light_colour[i_axis] = set_all(vertex_light_manager.light_sources[i_light].colour.f[i_axis] * intensity); } const __m128 extent = set_all(40.0f); __m128i is_valid = set_all(-1); is_valid &= (clip_space_position[i_vertex][X] - light_position[X]) < extent; is_valid &= (clip_space_position[i_vertex][Y] - light_position[Y]) < extent; is_valid &= (clip_space_position[i_vertex][Z] - light_position[Z]) < extent; light_position[X] = set_all(0.0f); light_position[Y] = set_all(0.0f); light_position[Z] = set_all(0.0f); light_colour[X] = set_all(100.0f); light_colour[Y] = set_all(100.0f); light_colour[Z] = set_all(100.0f); __m128 light_ray[3]; light_ray[X] = clip_space_position[i_vertex][X] - light_position[X]; light_ray[Y] = clip_space_position[i_vertex][Y] - light_position[Y]; light_ray[Z] = clip_space_position[i_vertex][Z] - light_position[Z]; __m128 mag = (light_ray[X] * light_ray[X]) + (light_ray[Y] * light_ray[Y]) + (light_ray[Z] * light_ray[Z]); mag = _mm_rsqrt_ps(mag); light_ray[X] *= mag; light_ray[Y] *= mag; light_ray[Z] *= mag; __m128 dot = (normal[X] * light_ray[X]) + (normal[Y] * light_ray[Y]) + (normal[Z] * light_ray[Z]); dot &= dot > zero; dot = (dot * dot) * mag; __m128 distance = set_zero(); for (__int32 i_axis = X; i_axis < W; i_axis++) { __m128 d = light_position[i_axis] - clip_space_position[i_vertex][i_axis]; distance += (d * d); } __m128 scalar = reciprocal(distance) * attenuation_factor; scalar = max_vec(scalar, zero); scalar = min_vec(scalar, one); for (__int32 i_channel = R; i_channel < A; i_channel++) { vertex_colour[i_vertex][i_channel] += dot * specular_scale * light_colour[i_channel]; vertex_colour[i_vertex][i_channel] += mag * diffuse_scale * light_colour[i_channel]; } } } for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { Transpose(vertex_colour[i_vertex]); for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { store_u(vertex_colour[i_vertex][i_triangle], colour[i_triangle][i_vertex].f); } } }
void main_loop(void) { char key; int paper_w, paper_h; long diff; if (!landscape) { paper_w = XLENG / shrink; paper_h = YLENG / shrink; } else { paper_w = YLENG / shrink; paper_h = XLENG / shrink; } set_all(); XUndefineCursor(display, main_window); for (;;) { XNextEvent(display, &ev); switch (ev.type) { case Expose: if (ev.xexpose.count == 0) get_window_size(); realize_part(ev.xexpose.x, ev.xexpose.y, ev.xexpose.width, ev.xexpose.height, ev.xexpose.x, ev.xexpose.y); break; case MappingNotify: /* XRefreshKeyboardMapping(&ev); */ XRefreshKeyboardMapping((XMappingEvent *) & ev); break; /* case ConfigureNotify: get_window_size(); shr_w = paper_w / window_w; shr_h = paper_h / window_h; shrink = (shr_w >= shr_h) ? shr_w :shr_h; rewind(stdin); plot(); main_loop(); origin_x += window_x; origin_y += window_y; realize(); realize_part(origin_x, origin_y, window_w, window_h, origin_x, origin_y); origin_x = paper_w - xsh.width; origin_y = paper_h - xsh.height; origin_x += xsh.x; origin_y += xsh.y; break; */ case MotionNotify: break; case ButtonPress: break; case KeyPress: get_window_size(); XLookupString(&ev.xkey, &key, 1, NULL, NULL); switch (key) { case 'j': diff = paper_h - window_h; if (origin_y >= diff) { beep(); break; } origin_y += window_h / 4; if (origin_y > diff) origin_y = diff; if (origin_y < 0) origin_y = 0; realize(); continue; break; case 'k': if (origin_y <= 0) { beep(); break; } origin_y -= window_h / 4; if (origin_y < 0) origin_y = 0; realize(); continue; break; case 'l': diff = paper_w - window_w; if (origin_x >= diff) { beep(); break; } origin_x += window_w / 4; if (origin_x > diff) origin_x = diff; if (origin_x < 0) origin_x = 0; realize(); continue; break; case 'h': if (origin_x <= 0) { beep(); break; } origin_x -= window_w / 4; if (origin_x < 0) origin_x = 0; realize(); continue; break; case 'q': case '\003': /* control-C */ case '\004': /* control-D */ close_window(); break; default: beep(); break; } break; default: break; } } }
void StimulusGroup::evolve() { if ( !active ) return; // detect and push spikes boost::exponential_distribution<> dist(BASERATE); boost::variate_generator<boost::mt19937&, boost::exponential_distribution<> > die(poisson_gen, dist); for ( NeuronID i = 0 ; i < get_rank_size() ; ++i ) { if ( ttl[i] < sys->get_clock() && activity[i]>0.0 ) { push_spike ( i ); ttl[i] = sys->get_clock() + (AurynTime)((AurynFloat)die()/((activity[i]+base_rate)*dt)); } } // update stimulus properties if ( sys->get_clock() >= next_action_time ) { write_sequence_file(dt*(sys->get_clock())); if ( stimulus_active ) { if ( off_pattern >= 0 ) { set_active_pattern( off_pattern ); // turn on "off-stimulus" cur_stim_index = off_pattern; } else set_all( 0.0 ); // turn off currently active stimulus stimulus_active = false ; if ( randomintervals ) { boost::exponential_distribution<> dist(1./mean_off_period); boost::variate_generator<boost::mt19937&, boost::exponential_distribution<> > die(order_gen, dist); next_action_time = sys->get_clock() + (AurynTime)(max(0.0,die())/dt); } else { next_action_time = sys->get_clock() + (AurynTime)(mean_off_period/dt); } } else { if ( active ) { // choose stimulus switch ( stimulus_order ) { case MANUAL: break; case SEQUENTIAL: cur_stim_index = (cur_stim_index+1)%stimuli.size(); break; case SEQUENTIAL_REV: --cur_stim_index; if ( cur_stim_index <= 0 ) cur_stim_index = stimuli.size() - 1 ; break; case RANDOM: default: double draw = order_die(); double cummulative = 0; // TODO make this less greedy and do not compute this every draw cur_stim_index = 0; // cout.precision(5); // cout << " draw " << draw << endl; for ( unsigned int i = 0 ; i < probabilities.size() ; ++i ) { cummulative += probabilities[i]; // cout << cummulative << endl; if ( draw <= cummulative ) { cur_stim_index = i; break; } } break; } set_active_pattern( cur_stim_index ); stimulus_active = true; if ( randomintervals ) { boost::normal_distribution<> dist(mean_on_period,mean_on_period/3); boost::variate_generator<boost::mt19937&, boost::normal_distribution<> > die(order_gen, dist); next_action_time = sys->get_clock() + (AurynTime)(max(0.0,die())/dt); } else { next_action_time = sys->get_clock() + (AurynTime)(mean_on_period/dt); } } } write_sequence_file(dt*(sys->get_clock()+1)); } }
void WorkerDataArray<T>::reset() { set_all(uninitialized()); if (_thread_work_items != NULL) { _thread_work_items->reset(); } }
int main(void) { if (init_serial()) { printf("Could not open serial port!\n"); return 1; } int count = 0; int SLEEP = 3; //sleep(5); while (1) { printf("set all\n"); set_all(15,15,15); sleep(1); //continue; printf("row green\n"); row_gradient_green(); sleep(1); //continue; printf("set all\n"); set_all(10,10,10); sleep(2); printf("Walk\n"); walk(); printf("Set all red\n"); set_all(254,0,0); sleep(SLEEP); printf("Set all green\n"); set_all(0,254,0); sleep(SLEEP); printf("Set all blue\n"); set_all(0,0,254); sleep(SLEEP); printf("Set all red/blue\n"); set_all(254,0,254); sleep(SLEEP); printf("Set all red/green\n"); set_all(254,254,0); sleep(SLEEP); printf("Set all green/blue\n"); set_all(0,254,254); sleep(SLEEP); printf("Set all white\n"); set_all(254,254,254); sleep(SLEEP); printf("Loop cols\n"); loop_cols(); printf("Loop rows\n"); loop_rows(); printf("Fade\n"); clear(); fade(); printf("Clear\n"); clear(); sleep(SLEEP); } return 0; }
/* ================== ================== */ void Process_Fragments( raster_output_& raster_output, shader_input_& shader_input ) { const __m128 zero = set_all(0.0f); shader_input.tile_mask_16x16 = 0x0; shader_input.tile_mask_64x64 = 0x0; //=============================================================================================== { const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_64x64]; for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) { raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_64x64][i_fragment]; const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16; const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff; Process_Fragment_64x64( raster_fragment.w, i_buffer, coverage_mask, raster_output, shader_input ); } } //=============================================================================================== { const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_16x16]; for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) { raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_16x16][i_fragment]; const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16; const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff; Process_Fragment_16x16( raster_fragment.w, 0, i_buffer, coverage_mask, raster_output, shader_input ); } } //=============================================================================================== { const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_4x4]; for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) { raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_4x4][i_fragment]; const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16; const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff; Process_Fragment_4x4(raster_fragment.w, 0, i_buffer, coverage_mask, raster_output, shader_input); } } //=============================================================================================== { //const __int32 start = raster_output_::MAX_FRAGMENTS - 1; //const __int32 end = raster_output.n_fragments[raster_output_::PARTIAL_ACCEPT_4x4]; //for (__int32 i_fragment = start; i_fragment > end; i_fragment--) { // raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::PARTIAL_ACCEPT_4x4][i_fragment]; // const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16; // const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff; // Process_Fragment_4x4(raster_fragment.w, 0, i_buffer, coverage_mask, raster_output, shader_input); //} } //=============================================================================================== { const __int32 n_fragments = raster_output.n_fragments_COMPLETE; __int32 n_depth_fragments = 0; for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) { raster_fragment_complete_& raster_fragment = raster_output.raster_fragment_complete[i_fragment]; const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16; const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff; pixel_shader(i_buffer, coverage_mask, raster_fragment.bazza, shader_input); const __int32 i_buffer_depth_4x4 = i_buffer / (4 * 4); const __int32 i_buffer_depth_16x16 = i_buffer / (16 * 16); const __int32 i_buffer_depth_64x64 = i_buffer / (64 * 64); shader_input.depth_tiles_4x4[i_buffer_depth_4x4] = shader_input.z_max; shader_input.tile_mask_16x16 |= one_bit_64 << i_buffer_depth_16x16; shader_input.tile_mask_64x64 |= one_bit_64 << i_buffer_depth_64x64; } } //=============================================================================================== { //printf_s(" %llu ", shader_input.tile_mask_16x16); __int64 n_tiles = _mm_popcnt_u64(shader_input.tile_mask_16x16); for (__int32 i_bit = 0; i_bit < n_tiles; i_bit++) { unsigned long i_tile_16x16; _BitScanForward64(&i_tile_16x16, shader_input.tile_mask_16x16); shader_input.tile_mask_16x16 ^= one_bit_64 << i_tile_16x16; const __int32 i_tile_4x4 = i_tile_16x16 * (4 * 4); __m128 depth_4x4[4]; depth_4x4[0] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (0 * 4)); depth_4x4[1] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (1 * 4)); depth_4x4[2] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (2 * 4)); depth_4x4[3] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (3 * 4)); __m128 z_max; z_max = depth_4x4[0]; z_max = min_vec(depth_4x4[1], z_max); z_max = min_vec(depth_4x4[2], z_max); z_max = min_vec(depth_4x4[3], z_max); __m128 z_out = z_max; z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); shader_input.depth_tiles_16x16[i_tile_16x16] = store_s(z_out); } } { __int64 n_tiles = _mm_popcnt_u64(shader_input.tile_mask_64x64); //printf_s(" %llu ", n_tiles); for (__int32 i_bit = 0; i_bit < n_tiles; i_bit++) { unsigned long i_tile_64x64; _BitScanForward64(&i_tile_64x64, shader_input.tile_mask_64x64); shader_input.tile_mask_64x64 ^= one_bit_64 << i_tile_64x64; const __int32 i_tile_16x16 = i_tile_64x64 * (4 * 4); __m128 depth_16x16[4]; depth_16x16[0] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (0 * 4)); depth_16x16[1] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (1 * 4)); depth_16x16[2] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (2 * 4)); depth_16x16[3] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (3 * 4)); __m128 z_max; z_max = depth_16x16[0]; z_max = min_vec(depth_16x16[1], z_max); z_max = min_vec(depth_16x16[2], z_max); z_max = min_vec(depth_16x16[3], z_max); __m128 z_out = z_max; z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); z_max = rotate_left(z_max); z_out = min_vec(z_max, z_out); shader_input.depth_tiles_64x64[i_tile_64x64] = store_s(z_out); } } }
/**************************************************** EDIT_DATA ****************************************************/ void edit_data( object *root, int *choice, char *obj_name ) { char *l , ch[ 2 * MAX_ELEM_LENGTH ], ch1[ MAX_ELEM_LENGTH ]; int i, counter, lag; object *first; cmd( "if {$tcl_platform(os) == \"Darwin\"} {set cwidth 9; set cbd 2 } {set cwidth 8; set cbd 2}" ); Tcl_LinkVar( inter, "lag", ( char * ) &lag, TCL_LINK_INT ); cmd( "if { ! [ info exists autoWidth ] } { set autoWidth 1 }" ); cmd( "if { ! [ winfo exists .ini ] } { newtop .ini; showtop .ini topleftW 1 1 1 $hsizeI $vsizeI } { if { ! $autoWidth } { resizetop $hsizeI $vsizeI } }" ); cmd( "set position 1.0" ); in_edit_data = true; *choice = 0; while ( *choice == 0 ) { // reset title and destroy command because may be coming from set_obj_number cmd( "settop .ini \"%s%s - LSD Initial Values Editor\" { set choice 1 }", unsaved_change() ? "*" : " ", simul_name ); first = root->search( obj_name ); cmd( "frame .ini.b" ); cmd( "set w .ini.b.tx" ); cmd( "scrollbar .ini.b.ys -command \".ini.b.tx yview\"" ); cmd( "scrollbar .ini.b.xs -command \".ini.b.tx xview\" -orient horizontal" ); cmd( "text $w -yscrollcommand \".ini.b.ys set\" -xscrollcommand \".ini.b.xs set\" -wrap none" ); cmd( ".ini.b.tx conf -cursor arrow" ); strncpy( ch1, obj_name, MAX_ELEM_LENGTH - 1 ); ch1[ MAX_ELEM_LENGTH - 1 ] = '\0'; cmd( "label $w.tit_empty -width 32 -relief raised -text \"Object: %-17s \" -borderwidth 4", ch1 ); cmd( "bind $w.tit_empty <Button-1> {set choice 4}" ); if ( ! in_set_obj ) // show only if not already recursing cmd( "bind $w.tit_empty <Enter> {set msg \"Click to edit number of instances\"}" ); cmd( "bind $w.tit_empty <Leave> {set msg \"\"}" ); cmd( "$w window create end -window $w.tit_empty" ); strcpy( ch, "" ); i = 0; counter = 1; colOvflw = false; search_title( root, ch, &i, obj_name, &counter ); cmd( "$w insert end \\n" ); // explore the tree searching for each instance of such object and create: // - titles // - entry cells linked to the values set_focus = 0; link_data( root, obj_name ); cmd( "pack .ini.b.ys -side right -fill y" ); cmd( "pack .ini.b.xs -side bottom -fill x" ); cmd( "pack .ini.b.tx -expand yes -fill both" ); cmd( "pack .ini.b -expand yes -fill both" ); cmd( "label .ini.msg -textvariable msg" ); cmd( "pack .ini.msg -pady 5" ); cmd( "frame .ini.st" ); cmd( "label .ini.st.err -text \"\"" ); cmd( "label .ini.st.pad -text \" \"" ); cmd( "checkbutton .ini.st.aw -text \"Automatic width\" -variable autoWidth -command { set choice 5 }" ); cmd( "pack .ini.st.err .ini.st.pad .ini.st.aw -side left" ); cmd( "pack .ini.st -anchor e -padx 10 -pady 5" ); cmd( "donehelp .ini boh { set choice 1 } { LsdHelp menudata_init.html }" ); cmd( "$w configure -state disabled" ); if ( set_focus == 1 ) cmd( "focus $initial_focus; $initial_focus selection range 0 end" ); cmd( "bind .ini <KeyPress-Escape> {set choice 1}" ); cmd( "bind .ini <F1> { LsdHelp menudata_init.html }" ); // show overflow warning just once per configuration but always indicate if ( colOvflw ) { cmd( ".ini.st.err conf -text \"OBJECTS NOT SHOWN! (> %d)\" -fg red", MAX_COLS ); if ( ! iniShowOnce ) { cmd( "update; tk_messageBox -parent .ini -type ok -title Warning -icon warning -message \"Too many objects to edit\" -detail \"LSD Initial Values editor can show only the first %d objects' values. Please use the 'Set All' button to define values for objects beyond those.\" ", MAX_COLS ); iniShowOnce = true; } } noredraw: cmd( "if $autoWidth { resizetop .ini [ expr ( 40 + %d * ( $cwidth + 1 ) ) * [ font measure TkTextFont -displayof .ini 0 ] ] }", counter ); // editor main command loop while ( ! *choice ) { try { Tcl_DoOneEvent( 0 ); } catch ( bad_alloc& ) // raise memory problems { throw; } catch ( ... ) // ignore the rest { goto noredraw; } } // handle both resizing event and block object # setting while editing initial values if ( *choice == 5 || ( *choice == 4 && in_set_obj ) ) // avoid recursion { *choice = 0; goto noredraw; } // clean up strcpy( ch, "" ); i = 0; clean_cell( root, ch, obj_name ); cmd( "destroy .ini.b .ini.boh .ini.msg .ini.st" ); if ( *choice == 2 ) { l = ( char * ) Tcl_GetVar( inter, "var-S-A", 0 ); strcpy( ch, l ); *choice = 2; // set data editor window parent set_all( choice, first, ch, lag ); cmd( "bind .ini <KeyPress-Return> {}" ); *choice = 0; } if ( *choice ==4 ) { *choice = 0; set_obj_number( root, choice ); *choice = 0; } } in_edit_data = false; Tcl_UnlinkVar( inter, "lag"); }
/* ================== ================== */ void Vertex_Lighting_REM( const __int32 n_triangles, const vertex_light_manager_& vertex_light_manager, const float4_ positions[4][3], float4_ colour[4][3] ) { //const __int32 VERTEX_COLOUR = FIRST_ATTRIBUTE + 0; static const float r_screen_scale_x = 1.0f / screen_scale_x; static const float r_screen_scale_y = 1.0f / screen_scale_y; //const __m128 attenuation_factor = set_all(200.0f); //const __m128 attenuation_factor = set_all(800.0f); //const __m128 specular_scale = set_all(100.0f); //const __m128 diffuse_scale = set_all(20.0f); __m128 r_screen_scale[2]; r_screen_scale[X] = set_all(r_screen_scale_x); r_screen_scale[Y] = set_all(r_screen_scale_y); __m128 screen_shift[2]; screen_shift[X] = set_all(screen_shift_x); screen_shift[Y] = set_all(screen_shift_y); __m128 clip_space_position[3][4]; //__m128 vertex_colour[3][4]; float4_ new_position[4][3]; for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { __m128 vertex_position[4]; for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { vertex_position[i_triangle] = load_u(positions[i_triangle][i_vertex].f); //vertex_colour[i_vertex][i_triangle] = load_u(colour[i_triangle][i_vertex].f); } Transpose(vertex_position); //Transpose(vertex_colour[i_vertex]); __m128 depth = reciprocal(vertex_position[Z]); clip_space_position[i_vertex][X] = ((vertex_position[X] - screen_shift[X]) * r_screen_scale[X]) * depth; clip_space_position[i_vertex][Y] = ((vertex_position[Y] - screen_shift[Y]) * r_screen_scale[Y]) * depth; clip_space_position[i_vertex][Z] = depth; } __m128 a[3]; a[X] = clip_space_position[1][X] - clip_space_position[0][X]; a[Y] = clip_space_position[1][Y] - clip_space_position[0][Y]; a[Z] = clip_space_position[1][Z] - clip_space_position[0][Z]; __m128 b[3]; b[X] = clip_space_position[2][X] - clip_space_position[0][X]; b[Y] = clip_space_position[2][Y] - clip_space_position[0][Y]; b[Z] = clip_space_position[2][Z] - clip_space_position[0][Z]; __m128 normal[4]; normal[X] = (a[Y] * b[Z]) - (a[Z] * b[Y]); normal[Y] = (a[Z] * b[X]) - (a[X] * b[Z]); normal[Z] = (a[X] * b[Y]) - (a[Y] * b[X]); __m128 mag = (normal[X] * normal[X]) + (normal[Y] * normal[Y]) + (normal[Z] * normal[Z]); mag = _mm_rsqrt_ps(mag); normal[X] *= mag; normal[Y] *= mag; normal[Z] *= mag; float normal_4[3][4]; store_u(normal[X], normal_4[X]); store_u(normal[Y], normal_4[Y]); store_u(normal[Z], normal_4[Z]); float centre_4[3][4]; float extent_4[3][4]; const __m128 half = set_all(0.5f); for (__int32 i_axis = X; i_axis < W; i_axis++) { __m128 max; __m128 min; max = min = clip_space_position[0][i_axis]; max = max_vec(max_vec(max, clip_space_position[1][i_axis]), clip_space_position[2][i_axis]); min = min_vec(min_vec(min, clip_space_position[1][i_axis]), clip_space_position[2][i_axis]); store_u((max + min) * half, centre_4[i_axis]); store_u((max - min) * half, extent_4[i_axis]); } for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { Transpose(clip_space_position[i_vertex]); for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { store_u(clip_space_position[i_vertex][i_triangle], new_position[i_triangle][i_vertex].f); } } const __m128 zero = set_all(0.0f); const __m128 one = set_all(1.0f); enum { MAX_LIGHTS_PER_VERTEX = 128, }; for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) { __m128 centre[3]; __m128 extent[3]; for (__int32 i_axis = X; i_axis < W; i_axis++) { centre[i_axis] = set_all(centre_4[i_axis][i_triangle]); extent[i_axis] = set_all(extent_4[i_axis][i_triangle]); } float z_min = centre_4[Z][i_triangle] - extent_4[Z][i_triangle]; float z_max = centre_4[Z][i_triangle] + extent_4[Z][i_triangle]; __int32 bin_min = __int32(z_min / vertex_light_manager.bin_interval); __int32 bin_max = __int32(z_max / vertex_light_manager.bin_interval); bin_min = min(bin_min, vertex_light_manager_::NUM_BINS - 1); bin_max = min(bin_max, vertex_light_manager_::NUM_BINS - 1); bin_min = max(bin_min, 0); bin_max = max(bin_max, 0); //bin_max = bin_max >= 10 ? 0 : bin_max; //printf_s(" %i , %i \n", bin_min, bin_max); __int32 i_lights[MAX_LIGHTS_PER_VERTEX]; __int32 n_lights = 0; { for (__int32 i_bin = bin_min; i_bin <= bin_max; i_bin++) { const vertex_light_manager_::bin_& bin = vertex_light_manager.bin[i_bin]; for (__int32 i_light_4 = 0; i_light_4 < bin.n_lights; i_light_4 += 4) { const __int32 n = min(bin.n_lights - i_light_4, 4); __m128 light_position[4]; for (__int32 i_light = 0; i_light < n; i_light++) { __int32 index = vertex_light_manager.i_light[bin.i_start + i_light_4 + i_light]; light_position[i_light] = load_u(vertex_light_manager.light_sources[index].position.f); } Transpose(light_position); const __m128 light_extent = set_all(100.0f); __m128i is_valid = set_all(-1); is_valid &= abs(centre[X] - light_position[X]) < (extent[X] + light_extent); is_valid &= abs(centre[Y] - light_position[Y]) < (extent[Y] + light_extent); is_valid &= abs(centre[Z] - light_position[Z]) < (extent[Z] + light_extent); unsigned __int32 result_mask = store_mask(is_valid); for (__int32 i_light = 0; i_light < n; i_light++) { __int32 index = vertex_light_manager.i_light[bin.i_start + i_light_4 + i_light]; i_lights[n_lights] = index; n_lights += (result_mask >> i_light) & 0x1; } if (n_lights > MAX_LIGHTS_PER_VERTEX) { n_lights = MAX_LIGHTS_PER_VERTEX; break; } } } } for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) { __m128 vertex_position[3]; vertex_position[X] = set_all(new_position[i_triangle][i_vertex].x); vertex_position[Y] = set_all(new_position[i_triangle][i_vertex].y); vertex_position[Z] = set_all(new_position[i_triangle][i_vertex].z); __m128 vertex_colour[4]; vertex_colour[R] = set_all(0.0f); vertex_colour[G] = set_all(0.0f); vertex_colour[B] = set_all(0.0f); __m128 normal[3]; normal[X] = set_all(normal_4[X][i_triangle]); normal[Y] = set_all(normal_4[Y][i_triangle]); normal[Z] = set_all(normal_4[Z][i_triangle]); for (__int32 i_light_4 = 0; i_light_4 < n_lights; i_light_4 += 4) { const __int32 n = min(n_lights - i_light_4, 4); __m128 light_position[4]; __m128 light_colour[4]; unsigned __int32 mask = 0x0; float intensity_4[4]; for (__int32 i_light = 0; i_light < n; i_light++) { mask |= 0x1 << i_light; const __int32 index = i_lights[i_light_4 + i_light]; intensity_4[i_light] = vertex_light_manager.light_sources[index].intensity; light_position[i_light] = load_u(vertex_light_manager.light_sources[index].position.f); light_colour[i_light] = load_u(vertex_light_manager.light_sources[index].colour.f); } Transpose(light_position); Transpose(light_colour); __m128 light_intensity = load_u(intensity_4); __m128 light_ray[3]; light_ray[X] = vertex_position[X] - light_position[X]; light_ray[Y] = vertex_position[Y] - light_position[Y]; light_ray[Z] = vertex_position[Z] - light_position[Z]; __m128 mag = (light_ray[X] * light_ray[X]) + (light_ray[Y] * light_ray[Y]) + (light_ray[Z] * light_ray[Z]); __m128 r_mag = _mm_rsqrt_ps(mag); light_ray[X] *= r_mag; light_ray[Y] *= r_mag; light_ray[Z] *= r_mag; __m128 dot = (normal[X] * light_ray[X]) + (normal[Y] * light_ray[Y]) + (normal[Z] * light_ray[Z]); dot &= dot > zero; __m128 r_distance = reciprocal(one + mag); __m128 spec = (dot * dot) * r_distance; static const __m128 specular_coefficient = set_all(2000.0f); static const __m128 diffuse_coefficient = set_all(200.0f); //printf_s(" %f ", dot); __m128i loop_mask = load_mask[mask]; for (__int32 i_channel = R; i_channel < A; i_channel++) { __m128 final = spec * specular_coefficient * light_colour[i_channel] * light_intensity; final += r_distance * diffuse_coefficient * light_colour[i_channel] * light_intensity; vertex_colour[i_channel] += final & loop_mask; } } Transpose(vertex_colour); vertex_colour[0] += vertex_colour[1] + vertex_colour[2] + vertex_colour[3]; float4_ temp; store_u(vertex_colour[0], temp.f); colour[i_triangle][i_vertex].x += temp.x; colour[i_triangle][i_vertex].y += temp.y; colour[i_triangle][i_vertex].z += temp.z; } }
void RealMatrix::zero(void) { set_all(0.0); }