// Print a pseudorandom DNA sequence that is number_Of_Characters_To_Create // characters long and made up of the nucleotides specified in // nucleotides_Information and occurring at the frequencies specified in // nucleotides_Information. The output is also wrapped to MAXIMUM_LINE_WIDTH // columns. static void generate_And_Wrap_Pseudorandom_DNA_Sequence( const nucleotide_info nucleotides_Information[], const intnative_t number_Of_Nucleotides, const intnative_t number_Of_Characters_To_Create){ // Cumulate the probabilities. Note that the probability is being multiplied // by IM because later on we'll also be calling the random number generator // with a value that is multiplied by IM. Since the random number generator // does a division by IM this allows the compiler to cancel out the // multiplication and division by IM with each other without requiring any // changes to the random number generator code whose code was explicitly // defined in the rules. float cumulative_Probabilities[number_Of_Nucleotides], cumulative_Probability=0.0; for(intnative_t i=0; i<number_Of_Nucleotides; i++){ cumulative_Probability+=nucleotides_Information[i].probability; cumulative_Probabilities[i]=cumulative_Probability*IM; } // blocks is a circular queue that stores the blocks while they are being // processed. next_Block_To_Output contains the index of the first block in // the queue which is also the next block that should be output once it is // ready. char blocks[BLOCKS_TO_USE][CHARACTERS_PER_BLOCK+LINES_PER_BLOCK]; intnative_t next_Block_To_Output=0; // block_Statuses contains a status value for each block in the circular // queue. // -A value of -1 means that block is not in use. // -A value of 0 means that block is in use and being processed. // -A positive value means that block is ready to be output and the value // is its length. intnative_t block_Statuses[BLOCKS_TO_USE]; for(intnative_t i=0; i<BLOCKS_TO_USE; block_Statuses[i++]=-1); intnative_t current_Number_Of_Characters_To_Create= number_Of_Characters_To_Create; // Limit the number_Of_Threads_To_Use to three threads since the bottleneck // for this program is the speed at which the pseudorandom generator can be // ran at which is only fast enough to keep about two other threads busy. // Using more threads will start slowing down the program due to the // overhead from additional thread management and resource usage. Using more // threads will also use more CPU time too since normally waiting OpenMP // threads will use spinlocks. #ifdef _OPENMP intnative_t number_Of_Threads_To_Use=omp_get_num_procs(); if(number_Of_Threads_To_Use>3) number_Of_Threads_To_Use=3; omp_set_num_threads(number_Of_Threads_To_Use); #endif #pragma omp parallel for schedule(guided) for(intnative_t current_Block_Number=0; current_Block_Number< (number_Of_Characters_To_Create+CHARACTERS_PER_BLOCK-1)/ CHARACTERS_PER_BLOCK; current_Block_Number++){ intnative_t block_To_Use, block_Length; float pseudorandom_Numbers[CHARACTERS_PER_BLOCK]; // Only one thread can be outputting blocks or generating pseudorandom // numbers at a time in order to ensure they are done in the correct // order. #pragma omp critical { // Find the first unused block (if any) and set that as the // block_To_Use for outputting the nucleotide sequence to. block_To_Use=next_Block_To_Output; for(intnative_t i=0; i<BLOCKS_TO_USE; i++, block_To_Use=(block_To_Use+1)%BLOCKS_TO_USE){ if(block_Statuses[block_To_Use]==-1) break; } // If no unused block was found then block_To_Use will be restored // to next_Block_To_Output and we will have to wait for it to finish // processing, output that block, and then use that block. while(block_Statuses[block_To_Use]==0){ #pragma omp flush(block_Statuses) } // Output any blocks that are ready to be output. output_Blocks(blocks, block_Statuses, &next_Block_To_Output, BLOCKS_TO_USE); // Update the status for block_To_Use to reflect that it is now // being processed. block_Statuses[block_To_Use]++; // Figure out what the block_Length should be and decrement // current_Number_Of_Characters_To_Create by that amount. block_Length=CHARACTERS_PER_BLOCK; if(current_Number_Of_Characters_To_Create<CHARACTERS_PER_BLOCK) block_Length=current_Number_Of_Characters_To_Create; current_Number_Of_Characters_To_Create-=block_Length; // Get the pseudorandom_Numbers to use for this block. for(intnative_t pseudorandom_Number_Index=0; pseudorandom_Number_Index<block_Length; pseudorandom_Numbers[pseudorandom_Number_Index++]= get_LCG_Pseudorandom_Number(IM)); } // Start processing the pseudorandom_Numbers and generate the // corresponding block of nucleotides that will be output later by // filling block_To_Use with characters from nucleotides_Information[] // that are selected by looking up the pseudorandom number. char * line=blocks[block_To_Use]; for(intnative_t column=0, pseudorandom_Number_Index=0; pseudorandom_Number_Index<block_Length; pseudorandom_Number_Index++){ const float r=pseudorandom_Numbers[pseudorandom_Number_Index]; // Count the number of nucleotides with a probability less than what // was selected by the random number generator and then use that // count as an index for the nucleotide to select. It's arguable // whether this qualifies as a linear search but I guess you can say // that you're doing a linear search for all the nucleotides with a // probability less than what was selected by the random number // generator and then just counting how many matches were found. // With a small number of nucleotides this can be faster than doing // a more normal linear search (although in some cases it may // generate different results) and a couple of the other programs // already do this as well so we will too. intnative_t count=0; for(intnative_t i=0; i<number_Of_Nucleotides; i++) if(cumulative_Probabilities[i]<=r) count++; line[column]=nucleotides_Information[count].letter; // If we reach the end of the line, reset the column counter and // advance to the next line. if(++column==MAXIMUM_LINE_WIDTH){ column=0; line+=MAXIMUM_LINE_WIDTH+1; } } // Update the block_Statuses so that this block_To_Use gets output // later. block_Statuses[block_To_Use]=block_Length; } // Output the remaining blocks. output_Blocks(blocks, block_Statuses, &next_Block_To_Output, BLOCKS_TO_USE); }
// Print a pseudorandom DNA sequence that is number_Of_Characters_To_Create // characters long and made up of the nucleotides specified in // nucleotides_Information and occurring at the frequencies specified in // nucleotides_Information. The output is also wrapped to MAXIMUM_LINE_WIDTH // columns. static void generate_And_Wrap_Pseudorandom_DNA_Sequence( const nucleotide_info nucleotides_Information[], const intnative_t number_Of_Nucleotides, const intnative_t number_Of_Characters_To_Create){ // Cumulate the probabilities. float cumulative_Probabilities[number_Of_Nucleotides], cumulative_Probability=0.0; for(intnative_t i=0; i<number_Of_Nucleotides; i++){ cumulative_Probability+=nucleotides_Information[i].probability; cumulative_Probabilities[i]=cumulative_Probability*LOOKUP_TABLE_SCALE; } // Adjust the last probability so that nothing will go past it. cumulative_Probabilities[number_Of_Nucleotides-1]=LOOKUP_TABLE_SIZE; // Create and fill the nucleotide_Indexes_Lookup_Table which will allow us // to later lookup a probability and quickly find the approximate index for // the nucleotide with that selected probability. uint8_t nucleotide_Indexes_Lookup_Table[LOOKUP_TABLE_SIZE], current_Index=0; for(intnative_t probability=0; probability<LOOKUP_TABLE_SIZE; probability++){ while(probability>=cumulative_Probabilities[current_Index]) current_Index++; nucleotide_Indexes_Lookup_Table[probability]=current_Index; } char line[MAXIMUM_LINE_WIDTH+1]; line[MAXIMUM_LINE_WIDTH]='\n'; for(intnative_t current_Number_Of_Characters_To_Create= number_Of_Characters_To_Create; current_Number_Of_Characters_To_Create>0;){ // Figure out the length of the line we need to write. If it's less than // MAXIMUM_LINE_WIDTH then we also need to add a line feed in the right // spot too. intnative_t line_Length=MAXIMUM_LINE_WIDTH; if(current_Number_Of_Characters_To_Create<MAXIMUM_LINE_WIDTH){ line_Length=current_Number_Of_Characters_To_Create; line[line_Length]='\n'; } // Fill up the line with characters from nucleotides_Information[] that // are selected by looking up a pseudorandom number. for(intnative_t column=0; column<line_Length; column++){ const float r=get_LCG_Pseudorandom_Number(); // Lookup the probability in the lookup table and then use the // resulting index as the index where we should start the linear // search for the correct nucleotide at. intnative_t index=nucleotide_Indexes_Lookup_Table[(intnative_t)r]; while(cumulative_Probabilities[index]<=r) index++; line[column]=nucleotides_Information[index].letter; } // Output the line to stdout and update the // current_Number_Of_Characters_To_Create. fwrite(line, line_Length+1, 1, stdout); current_Number_Of_Characters_To_Create-=line_Length; } }
// Print a pseudorandom DNA sequence that is number_Of_Characters_To_Create // characters long and made up of the nucleotides specified in // nucleotides_Information and occurring at the frequencies specified in // nucleotides_Information. The output is also wrapped to MAXIMUM_LINE_WIDTH // columns. static void generate_And_Wrap_Pseudorandom_DNA_Sequence( const nucleotide_info nucleotides_Information[], const intnative_t number_Of_Nucleotides, const intnative_t number_Of_Characters_To_Create){ // Cumulate the probabilities. Note that the probability is being multiplied // by IM because later on we'll also be calling the random number generator // with a value that is multiplied by IM. Since the random number generator // does a division by IM this allows the compiler to cancel out the // multiplication and division by IM with each other without requiring any // changes to the random number generator code whose code was explicitly // defined in the rules. float cumulative_Probabilities[number_Of_Nucleotides], cumulative_Probability=0.0; for(intnative_t i=0; i<number_Of_Nucleotides; i++){ cumulative_Probability+=nucleotides_Information[i].probability; cumulative_Probabilities[i]=cumulative_Probability*IM; } char line[MAXIMUM_LINE_WIDTH+1]; line[MAXIMUM_LINE_WIDTH]='\n'; for(intnative_t current_Number_Of_Characters_To_Create= number_Of_Characters_To_Create; current_Number_Of_Characters_To_Create>0;){ // Figure out the length of the line we need to write. If it's less than // MAXIMUM_LINE_WIDTH then we also need to add a line feed in the right // spot too. intnative_t line_Length=MAXIMUM_LINE_WIDTH; if(current_Number_Of_Characters_To_Create<MAXIMUM_LINE_WIDTH){ line_Length=current_Number_Of_Characters_To_Create; line[line_Length]='\n'; } // Fill up the line with characters from nucleotides_Information[] that // are selected by looking up a pseudorandom number. for(intnative_t column=0; column<line_Length; column++){ const float r=get_LCG_Pseudorandom_Number(IM); // Count the number of nucleotides with a probability less than what // was selected by the random number generator and then use that // count as an index for the nucleotide to select. It's arguable // whether this qualifies as a linear search but I guess you can say // that you're doing a linear search for all the nucleotides with a // probability less than what was selected by the random number // generator and then just counting how many matches were found. // With a small number of nucleotides this can be faster than doing // a more normal linear search (although in some cases it may // generate different results) and a couple of the other programs // already do this as well so we will too. intnative_t count=0; for(intnative_t i=0; i<number_Of_Nucleotides; i++) if(cumulative_Probabilities[i]<=r) count++; line[column]=nucleotides_Information[count].letter; } // Output the line to stdout and update the // current_Number_Of_Characters_To_Create. //fwrite(line, line_Length+1, 1, stdout); current_Number_Of_Characters_To_Create-=line_Length; } }