// This is called for every vecop pair in the vecop_buffer. The point // global has been set to the query point, and objpoint_or_material // has been set to the closest point in the current primitive. real update_distance_query() { // compute (point - objpoint_or_material) return add_scaled(point, objpoint_or_material, minus_one), // compare squared length of result to previous closest dist dot(vec3_out,vec3_out) < closest_dist_squared ? // if smaller, update closest_point and closest_dist_squared closest_point=objpoint_or_material, closest_dist_squared=dot_out_or_tmp : // otherwise do nothing 0; }
int QUERN_solve_with_CGNR(int m, int n, const int* A_row_start, const int* A_column_index, const double* A_value, const double* rhs, const int* R_row_start, const int* R_column_index, const double* R_value, int max_iterations, double absolute_convergence_tolerance, double* x, int* return_solved, int* return_iterations, double* return_residual_norm) { if(m<=0 || n<=0 || !A_row_start || !A_column_index || !A_value || !rhs || !R_row_start || !R_column_index || !R_value || !x || !return_solved || !return_iterations || !return_residual_norm) return QUERN_INPUT_ERROR; // default values *return_solved=0; *return_iterations=0; *return_residual_norm=two_norm(n, rhs); if(*return_residual_norm<=absolute_convergence_tolerance){ *return_solved=1; return QUERN_OK; } // allocate some room to work in double* working_vectors=(double*)std::malloc((3*n+m)*sizeof(double)); if(!working_vectors) return QUERN_OUT_OF_MEMORY; double* r=working_vectors; double* s=r+n; double* z=s+n; double* u=z+n; // set up CGNR int check; std::memset(x, 0, n*sizeof(double)); std::memcpy(r, rhs, n*sizeof(double)); std::memcpy(u, rhs, n*sizeof(double)); check=QUERN_solve_with_r_transpose_in_place(n, R_row_start, R_column_index, R_value, u); if(check){ std::free(working_vectors); return check; } check=QUERN_solve_with_r(n, R_row_start, R_column_index, R_value, u, z); if(check){ std::free(working_vectors); return check; } std::memcpy(s, z, n*sizeof(double)); double rho=two_norm_squared(n, u); // the main loop for(;;){ if(rho==0){ std::free(working_vectors); return QUERN_INPUT_ERROR; } check=QUERN_multiply(m, n, A_row_start, A_column_index, A_value, s, u); if(check){ std::free(working_vectors); return check; } check=QUERN_multiply_transpose(m, n, A_row_start, A_column_index, A_value, u, z); if(check){ std::free(working_vectors); return check; } double denom=two_norm_squared(m, u); if(denom==0){ std::free(working_vectors); return QUERN_INPUT_ERROR; } double alpha=rho/denom; add_scaled(n, x, alpha, s); add_scaled(n, r, -alpha, z); ++*return_iterations; *return_residual_norm=two_norm(n, r); if(*return_residual_norm<=absolute_convergence_tolerance){ *return_solved=1; break; } if(*return_iterations>max_iterations) break; std::memcpy(u, r, n*sizeof(double)); check=QUERN_solve_with_r_transpose_in_place(n, R_row_start, R_column_index, R_value, u); if(check){ std::free(working_vectors); return check; } check=QUERN_solve_with_r(n, R_row_start, R_column_index, R_value, u, z); if(check){ std::free(working_vectors); return check; } double rho_new=two_norm_squared(n, u); double beta=rho_new/rho; scale_and_add(n, beta, s, z); rho=rho_new; } std::free(working_vectors); return QUERN_OK; }
/** * @brief Run the test * * @param be_verbose Whether to print traces or not * @param incr Increment between TS values * @return true if test succeeds, false otherwise */ bool run_test(bool be_verbose, const unsigned int incr) { struct ts_sc_comp ts_sc_comp; /* the RTP TS encoding context */ struct ts_sc_decomp ts_sc_decomp; /* the RTP TS decoding context */ uint32_t value; /* the value to encode */ uint32_t value_encoded; /* the encoded value to decode */ uint32_t value_decoded; /* the decoded value */ unsigned int real_incr; int is_success = false; /* test fails by default */ int ret; uint64_t i; /* create the RTP TS encoding context */ ret = c_create_sc(&ts_sc_comp, ROHC_WLSB_WINDOW_WIDTH, NULL, NULL); if(ret != 1) { fprintf(stderr, "failed to initialize the RTP TS encoding context\n"); goto error; } /* create the RTP TS decoding context */ d_init_sc(&ts_sc_decomp, NULL, NULL); /* compute the initial value to encode */ if(incr == 0) { real_incr = (20 + 10) / 2; } else { real_incr = incr; } value = (0xffffffff - 50 * real_incr); /* encode then decode values from ranges [0xffffffff - 50 * incr, 0xffffffff] * and [0, 49 * incr] */ for(i = 1; i < 100; i++) { size_t required_bits; uint32_t required_bits_mask; uint32_t ts_stride; /* value to encode/decode */ if(incr == 0) { if((i % 2) == 0) { real_incr = 20; } else { real_incr = 10; } } else { real_incr = incr; } value += real_incr; if(value > 0xffffffff) { value = 0; } trace(be_verbose, "\t#%" PRIu64 ": encode value 0x%08x (+%u) ...\n", i, value, real_incr); /* update encoding context */ c_add_ts(&ts_sc_comp, value, i); /* transmit the required bits wrt to encoding state */ switch(ts_sc_comp.state) { case INIT_TS: /* transmit all bits without encoding */ trace(be_verbose, "\t\ttransmit all bits without encoding\n"); value_encoded = value; required_bits = 32; /* change for INIT_STRIDE state */ ts_sc_comp.state = INIT_STRIDE; /* simulate transmission */ /* decode received unscaled TS */ if(!ts_decode_unscaled_bits(&ts_sc_decomp, value_encoded, required_bits, &value_decoded)) { trace(be_verbose, "failed to decode received absolute unscaled TS\n"); goto destroy_ts_sc_comp; } break; case INIT_STRIDE: /* transmit all bits along with TS_STRIDE */ trace(be_verbose, "\t\ttransmit all bits without encoding " "and TS_STRIDE\n"); value_encoded = value; ts_stride = get_ts_stride(&ts_sc_comp); required_bits = 32; /* change for INIT_STRIDE state? */ ts_sc_comp.nr_init_stride_packets++; if(ts_sc_comp.nr_init_stride_packets >= ROHC_INIT_TS_STRIDE_MIN) { ts_sc_comp.state = SEND_SCALED; } /* simulate transmission */ /* decode received unscaled TS */ if(!ts_decode_unscaled_bits(&ts_sc_decomp, value_encoded, required_bits, &value_decoded)) { trace(be_verbose, "failed to decode received unscaled TS\n"); goto destroy_ts_sc_comp; } d_record_ts_stride(&ts_sc_decomp, ts_stride); break; case SEND_SCALED: /* transmit TS_SCALED */ trace(be_verbose, "\t\ttransmit some bits of TS_SCALED\n"); /* get TS_SCALED */ value_encoded = get_ts_scaled(&ts_sc_comp); /* determine how many bits of TS_SCALED we need to send */ required_bits = nb_bits_scaled(&ts_sc_comp); assert(required_bits <= 32); /* truncate the encoded TS_SCALED to the number of bits we send */ if(required_bits == 32) { required_bits_mask = 0xffffffff; } else { required_bits_mask = (1 << required_bits) - 1; } value_encoded = value_encoded & required_bits_mask; /* save the new TS_SCALED value */ add_scaled(&ts_sc_comp, i); /* simulate transmission */ /* decode TS */ if(required_bits > 0) { /* decode the received TS_SCALED value */ if(!ts_decode_scaled_bits(&ts_sc_decomp, value_encoded, required_bits, &value_decoded)) { trace(be_verbose, "failed to decode received TS_SCALED\n"); goto destroy_ts_sc_comp; } } else { /* deduct TS from SN */ value_decoded = ts_deduce_from_sn(&ts_sc_decomp, i); } break; default: trace(be_verbose, "unknown RTP TS encoding state, " "should not happen\n"); assert(0); goto destroy_ts_sc_comp; } trace(be_verbose, "\t\tencoded on %zu bits: 0x%04x\n", required_bits, value_encoded); /* check test result */ if(value != value_decoded) { fprintf(stderr, "original and decoded values do not match while " "testing value 0x%08x\n", value); goto destroy_ts_sc_comp; } /* update decoding context */ ts_update_context(&ts_sc_decomp, value_decoded, i); } /* test succeeds */ trace(be_verbose, "\ttest is successful\n"); is_success = true; destroy_ts_sc_comp: c_destroy_sc(&ts_sc_comp); error: return is_success; }
// Finally our main function int main(int argc_or_image_col, char** argv) { ////////////////////////////////////////////////// // Step 1: fill up the vecop_buffer with the correct primitives for // the text to display by parsing the font table. // // The outer loop here is over characters to print, which come // either from argv[1] (if argc > 1), or from progdata table above, // in which case they have to be XOR-ed with 5 to get the actual // text. for (// Was a string provided on the command line? textptr = argc_or_image_col>1 ? // If so, initialize textptr from argv[1] and clear XOR mask 1[xormask_or_quality=0, argv] : // else, initialize textptr from progdata. progdata; // Go until terminating 0 or text too wide *textptr && text_width<24; // Increment textptr each iteration. ++textptr) // Inner loop is over the font table encoded in progdata (see // explanation at top). For each text character, we need to try to // find the corresponding glyph in the font and push all of its // strokes into the vecop_buffer, two vectors at a time. for (// Initialize found to false // Initialize range_or_curglyph to zero ??? lo_or_found=range_or_curglyph=0, // Start 10 characters into the text (after "ioccc 2011") // but we increment fontptr before dereferencing it because // gross, so just offset by 9 here. fontptr=progdata+9; // Read the next byte from the font table (stop if we // hit a terminating 0). (fbyte_or_aacnt=*++fontptr) && // Keep going as long as one of these holds: // // - lo_or_found is zero (haven't found char) // - fbyte_or_aacnt >= 40 (lower ones are space !"#$%&') // // This means that once lo_or_found is nonzero and we hit a // START token or a null zero, we are done. // // Since logical AND is short-circuiting, the update to // dot_out_or_tmp only happens if found is true and the // current character is greater than or equal to 40. // // Once that happens, we exit the loop because all three // conditions inside these parens are true, making the // entire thing false when NOT-ed. !(lo_or_found && fbyte_or_aacnt<forty && (dot_out_or_tmp=text_width+=angle_or_width)); // See if we have found our glyph yet lo_or_found ? ////////////////////////////////////////////////// // We have found it -- we are in the current glyph. // The current byte should hold an OPCODE and and ARGUMENT. // Stash the OPCODE into lo_or_found and increment // fontptr. The ARGUMENT should be available by inspecting // fbyte_or_aacnt which still holds the byte that fontptr // was pointing to. lo_or_found=*fontptr++/32, // Now get the XCOORD (bits 34) and YCOORD (bits 210) from // second byte of stroke instructions. The XCOORD gets // added to the x-accumulator (dot_out_or_tmp) and then the // vector (XCOORD, YCOORD, 0) is pushed into the // vecop_buffer. This is either the start of a line // segment or the center of an arc. buffer_offset++[vecop_buffer] = make_vec3(dot_out_or_tmp+=*fontptr/8&3,*fontptr&7,0), // Time to push the second vector into the vecop_buffer. // In the case of an arc (OPCODE == 3), this is starting // angle and range, or in the case of a line segment // (OPCODE == 2 or OPCODE == 1) this is dx, dy. We need to mirror // the x-coordinate if OPCODE == 1. // // In any event, all of this information is hanging out in // the ARGUMENT, which is the lower 5 bits of // fbyte_or_aacnt (first byte of stroke instruction pair). // // Here we also update buffer_length here to be used later // in dist_to_scene. vecop_buffer[buffer_length=buffer_offset++] = make_vec3((fbyte_or_aacnt/8&3)*(lo_or_found<2?minus_one:1), (fbyte_or_aacnt&7)+1e-4, lo_or_found>2), // Just a NOP because the ternary operator we're inside of // here is of type int. 1 : ////////////////////////////////////////////////// // Glyph not found yet. // Try to update our found variable... (lo_or_found = ////////////////////////////////////////////////////////////////////// // Update cur glyph according to current font table byte. (range_or_curglyph = // Subtract 40 from current font table byte. Less than zero? (fbyte_or_aacnt-=forty) < 0 ? ////////////////////////////////////////////////// // Yes, less than zero, so this is a START token. // Glyph width given by byte - 34 = byte - 40 + 6 angle_or_width=fbyte_or_aacnt+6, // Increment cur glyph and mark state as started. hi_or_started=range_or_curglyph+1 : ////////////////////////////////////////////////// // Was font table byte nonzero after subtracting 40? fbyte_or_aacnt ? // Yes, nonzero. ( // Now see if we have started seeing font table // bytes yet, or if we are still going thru the // PPM header. hi_or_started? // Yes, we have started, so NOP. 0 : // Not started, so emit the current fbyte_or_aacnt (to // generate PPM header) output(fbyte_or_aacnt), // Comma says ignore results of previous ternary // operator and leave range_or_curglyph unchanged range_or_curglyph ) : ////////////////////////////////////////////////// // Font table byte was 40 (SETCUR token), so set // current glyph to next byte in font table. *++fontptr) ////////////////////////////////////////////////////////////////////// // Compare the newly-updated cur glyph to... == // The current text character, OR'ed with 32 to put in // range of 32-63 or 96-127 (forces lowercase), and XOR'ed // with mask to deobfuscate "ioccc 2011" from progdata. ((*textptr|32)^xormask_or_quality) && //////////////////////////////////////// // Need to clear found bit whenever we hit a SETCUR token. 1[fontptr]-forty) ); // Empty for loop ////////////////////////////////////////////////// // Step 2: Generate the dang image. // // All of the techniques here are based upon the PDF presentation at // http://iquilezles.org/www/material/nvscene2008/nvscene2008.htm // // Iterate over image rows. Note xormask_or_quality gets value 0 in // preview mode, and 3 in high-quality mode. for (xormask_or_quality=3*(argc_or_image_col<3); ++image_row<110; ) // Iterate over image columns. for (argc_or_image_col=-301; // Initialize the pixel color to zero, 600 cols total pix_color=zeros, ++argc_or_image_col<300; // Output pixel after each iteration. output(pix_color.c),output(pix_color.a),output(pix_color.t)) // Iterate over AA samples: either 1 (preview) or 4 (high-quality). for (fbyte_or_aacnt=minus_one; ++fbyte_or_aacnt<=xormask_or_quality;) // Shade this sample. This for loop iterates over the initial // ray as well as reflection rays (in high quality mode). for (// Start marching at the shared ray origin march_point=make_vec3(-4,4.6,29), // Starting direction is a function of image row/column // and AA sample number. ray_dir=normalize( add_scaled( add_scaled( add_scaled(zeros, normalize(make_vec3(5,0,2)), argc_or_image_col + argc_or_image_col + fbyte_or_aacnt/2 ), normalize(make_vec3(2,-73,0)), image_row+image_row+fbyte_or_aacnt%2), make_vec3(30.75,-6,-75), 20) ), // The initial ray contribution is 255 for preview mode // or 63 for each AA sample in high-quality mode // (adding 4 of them gets you 252 which is close // enough). // // Also, here bounces is initialized to 3 in // high-quality mode or 0 in preview mode. ray_contribution=hit= 255-(bounces=xormask_or_quality)*64; // The bounces variable acts as a counter for remaining // bounces; at the start, bounces is non-negative and hit // is non-zero so the loop always runs at least once. It // will stop when hit is 0 or hit is 1 and bounces is -1. hit*bounces+hit; // After each iteration (reflection), ray contribution // scales by 0.5. ray_contribution*=half) { // Perform the actual ray march using sphere tracing: for (// Initialize ray distance, current distance, and hit to 0 raydist_or_brightness=curdist_or_specular=hit=0; // Keep going until hit or ray distance exceeds 94 units. // Note ray distance always incremented by current. !hit && 94 > (raydist_or_brightness+= // Obtain distance to scene at current point curdist_or_specular=dist_to_scene( // Update current point by moving by current // distance along ray direction march_point = add_scaled( march_point, ray_dir, curdist_or_specular))); // After each ray update, set hit=1 if current // distance to scene primitive is less than 0.1 hit=curdist_or_specular<.01); // Done with ray marching! // // Now point is equal to march_point, closest_point holds // the closest point in the scene to the current ray // point, and objpoint_or_material holds the material // color of the closest object. // Now fake ambient occlusion loop (see iq's PDF for explanation): for (// Compute scene normal at intersection normal = normalize(add_scaled(point,closest_point,minus_one)), // This is actually included here to initialize the // sky color below (gross). dot_out_or_tmp = ray_dir.t*ray_dir.t, // Also used below but initialized here. sample_color = objpoint_or_material, // Start at full brightness raydist_or_brightness=1; // 5 iterations if we hit something, 0 if not (saves // wrapping for loop in if statement). ++curdist_or_specular<6*hit; // AO with exponential decay raydist_or_brightness -= clamp(curdist_or_specular / 3 - dist_to_scene( add_scaled(march_point, normal, curdist_or_specular/3))) / pow(2,curdist_or_specular)); // AO has been computed, time to get the final color of // this ray sample. Note sample_color has been initialized // to material of closest primative above. sample_color = hit ? // Did this ray hit? ////////////////////////////////////////////////// // Yes, the ray hit. // Get the Blinn-Phong specular coefficient as dot // product between normal and halfway vector, raised to // high power. curdist_or_specular = pow(clamp(dot(normal, // normalize halfway vector normalize( // create halfway vector add_scaled( // objpoint_or_material is now light direcection objpoint_or_material=normalize(make_vec3(minus_one,1,2)), ray_dir, minus_one)))), // raised to the 40th power forty), // Mix in white color for specular pix_color = add_scaled(pix_color, ones, ray_contribution*curdist_or_specular), // Take the brightness computed during AO and modulate // it with diffuse and ambient light. raydist_or_brightness *= // Diffuse - objpoint_or_material is light direction clamp(dot(normal, objpoint_or_material))*half*twothirds + // Ambient twothirds, // Modulate ray_contribution after hit ray_contribution *= bounces-- ? // Are there any bounces left? // Yes, there are bounces left, so this hit should // account for 2/3 of the remaining energy (the next // will account for the final 1/3). We need to remove // the additive component already taken by specular, // however. twothirds - twothirds * curdist_or_specular : // No, there are no bounces left, so just use up // all the energy not taken by specular. 1-curdist_or_specular, // Now after all of that, we're actually going to leave // sample_color unchanged (i.e. whatever closest // primitive material color was). sample_color : ////////////////////////////////////////////////// // Nope, ray missed. Remember when we initialized // dot_out_or_tmp to contain z^2 above? We now use that // to shade the sky, which gets white along the +/- z // axis, and blue elsewhere. make_vec3(dot_out_or_tmp, dot_out_or_tmp, 1); // Add the weighted sample_color into the pixel color. pix_color = add_scaled(pix_color, sample_color, ray_contribution*raydist_or_brightness); // Pop out from the object a bit before starting to march // the reflection ray so we don't immediately detect the // same intersection that we're on. march_point = add_scaled(march_point,normal,.1); // Update the ray direction to be the reflection direction // using the usual calculation. ray_dir = add_scaled(ray_dir,normal,-2*dot(ray_dir,normal)); } return 0; }
// For the point input, this will compute and return the distance to // the closest scene primative. It will also update the // objpoint_or_material to reflect the material of the closest object // (blue for text, red or white for floor). real dist_to_scene(vec3 point_or_vecop) { // This loop initializes global closest_dist_squared to a big number // and point to the function's argument. We will iterate over pairs // of vectors in the vecop_buffer using buffer_offset up to the // given buffer_length. for (closest_dist_squared=forty, point=point_or_vecop, buffer_offset=minus_one; // The first item in the vecop_buffer pair becomes objpoint // (arc center or line endpoint). The second item denotes line // displacement or arc angle. Note we also offset the // x-coordinate of the objpoint to approximately center the // text. The last thing we do here is bail out of the loop if // we are past end-of-buffer. objpoint_or_material = vecop_buffer[++buffer_offset], point_or_vecop = vecop_buffer[objpoint_or_material.c+=8.8-text_width*.45, ++buffer_offset], buffer_offset<=buffer_length; // After each loop iteration, we should update the distance // query but of course this doesn't get evaluated until after // the code below. update_distance_query()) // Currently objpoint is arc center or line endpoint. We must // update it to the closest point on the primitive (arc or line) // to the current point. First we check if we are arc (op.t!=0) or // line (op.t=1): objpoint_or_material = point_or_vecop.t ? ////////////////////////////////////////////////// // We are an arc // Quantize angles to 90 degree increments dot_out_or_tmp = M_PI*half, // Get the actual angle and divide by 90 degrees. We will now // need to clamp the angle to deal with the arc endpoints // nicely. angle_or_width=atan2(point.a-objpoint_or_material.a, point.c-objpoint_or_material.c)/dot_out_or_tmp, // op.c encodes lower bound of angle lo_or_found=point_or_vecop.c-2, // op.a encodes upper bound of angle range_or_curglyph=point_or_vecop.a+1, // Get the upper end of the range hi_or_started=lo_or_found+range_or_curglyph, // Now clamp! Note we scale overall result by 90 degrees angle_or_width = dot_out_or_tmp*( // did we wrap past the upper end of the range? angle_or_width>hi_or_started+half*range_or_curglyph ? // if so go low lo_or_found : // otherwise greater than upper end? angle_or_width > hi_or_started ? // then go hi hi_or_started : // did we wrap past the lower end of the range? angle_or_width<lo_or_found-half*range_or_curglyph ? // go hi hi_or_started : // otherwise less than lower end? angle_or_width<lo_or_found ? // go lo lo_or_found : // no clamp needed angle_or_width), // Now we can finally offset the objpoint (arc center) by angle // (note radius = 1), which is convenient. add_scaled(objpoint_or_material, make_vec3(cos(angle_or_width), sin(angle_or_width), 0), 1) : ////////////////////////////////////////////////// // Nope, not arc, a line segment. // In this case op is just the displacement of endpoint from // startpoint so we can compute the nearest point along the line // using the standard formula. add_scaled(objpoint_or_material, point_or_vecop, clamp(dot( add_scaled(point, objpoint_or_material, minus_one), point_or_vecop) / dot(point_or_vecop, point_or_vecop) ) ); ////////////////////////////////////////////////// // Done with for loop. // Now we need to check distance to the floor, which exists // everywhere at a coordinate of y = -0.9. here we update the // objpoint by copying the point and setting y. objpoint_or_material=point; objpoint_or_material.a = -.9; // Here we are creating a nice checkerboard texture by XOR'ing the x // and z coordinates mod 8. lo_or_found = point.c/8+8; lo_or_found ^= range_or_curglyph=point.t/8+8; // Finally, we are going to save the material. First we update the // distance query based upon the floor objpoint: objpoint_or_material = update_distance_query() ? // if the query updated, we are the floor, so red or white lo_or_found&1 ? make_vec3(twothirds,0,0) : ones : // otherwise the query didn't update so we are blue. make_vec3(twothirds,twothirds,1); // Finally we return the distance to closest point, offset by 0.45 // (to give the primitives some thickness) return sqrt(closest_dist_squared)-.45; }
// Normalize by abusing scaled add function: // // n / ||n|| = zero + n * pow(||n||, -0.5) // vec3 normalize(vec3 a) { return add_scaled(zeros, a, pow(dot(a,a),-half)); }