Beispiel #1
0
// Print a pseudorandom DNA sequence that is number_Of_Characters_To_Create
// characters long and made up of the nucleotides specified in
// nucleotides_Information and occurring at the frequencies specified in
// nucleotides_Information. The output is also wrapped to MAXIMUM_LINE_WIDTH
// columns.
static void generate_And_Wrap_Pseudorandom_DNA_Sequence(
  const nucleotide_info nucleotides_Information[],
  const intnative_t number_Of_Nucleotides,
  const intnative_t number_Of_Characters_To_Create){

	// Cumulate the probabilities. Note that the probability is being multiplied
	// by IM because later on we'll also be calling the random number generator
	// with a value that is multiplied by IM. Since the random number generator
	// does a division by IM this allows the compiler to cancel out the
	// multiplication and division by IM with each other without requiring any
	// changes to the random number generator code whose code was explicitly
	// defined in the rules.
	float cumulative_Probabilities[number_Of_Nucleotides],
	  cumulative_Probability=0.0;
	for(intnative_t i=0; i<number_Of_Nucleotides; i++){
		cumulative_Probability+=nucleotides_Information[i].probability;
		cumulative_Probabilities[i]=cumulative_Probability*IM;
	}

	// blocks is a circular queue that stores the blocks while they are being
	// processed. next_Block_To_Output contains the index of the first block in
	// the queue which is also the next block that should be output once it is
	// ready.
	char blocks[BLOCKS_TO_USE][CHARACTERS_PER_BLOCK+LINES_PER_BLOCK];
	intnative_t next_Block_To_Output=0;

	// block_Statuses contains a status value for each block in the circular
	// queue.
	//  -A value of -1 means that block is not in use.
	//  -A value of 0 means that block is in use and being processed.
	//  -A positive value means that block is ready to be output and the value
	//   is its length.
	intnative_t block_Statuses[BLOCKS_TO_USE];
	for(intnative_t i=0; i<BLOCKS_TO_USE; block_Statuses[i++]=-1);

	intnative_t current_Number_Of_Characters_To_Create=
	  number_Of_Characters_To_Create;

	// Limit the number_Of_Threads_To_Use to three threads since the bottleneck
	// for this program is the speed at which the pseudorandom generator can be
	// ran at which is only fast enough to keep about two other threads busy.
	// Using more threads will start slowing down the program due to the
	// overhead from additional thread management and resource usage. Using more
	// threads will also use more CPU time too since normally waiting OpenMP
	// threads will use spinlocks.
	#ifdef _OPENMP
		intnative_t number_Of_Threads_To_Use=omp_get_num_procs();
		if(number_Of_Threads_To_Use>3) number_Of_Threads_To_Use=3;
		omp_set_num_threads(number_Of_Threads_To_Use);
	#endif

	#pragma omp parallel for schedule(guided)
	for(intnative_t current_Block_Number=0; current_Block_Number<
	  (number_Of_Characters_To_Create+CHARACTERS_PER_BLOCK-1)/
	  CHARACTERS_PER_BLOCK; current_Block_Number++){

		intnative_t block_To_Use, block_Length;
		float pseudorandom_Numbers[CHARACTERS_PER_BLOCK];

		// Only one thread can be outputting blocks or generating pseudorandom
		// numbers at a time in order to ensure they are done in the correct
		// order.
		#pragma omp critical
		{
			// Find the first unused block (if any) and set that as the
			// block_To_Use for outputting the nucleotide sequence to.
			block_To_Use=next_Block_To_Output;
			for(intnative_t i=0; i<BLOCKS_TO_USE; i++,
			  block_To_Use=(block_To_Use+1)%BLOCKS_TO_USE){
				if(block_Statuses[block_To_Use]==-1)
					break;
			}

			// If no unused block was found then block_To_Use will be restored
			// to next_Block_To_Output and we will have to wait for it to finish
			// processing, output that block, and then use that block.
			while(block_Statuses[block_To_Use]==0){
				#pragma omp flush(block_Statuses)
			}

			// Output any blocks that are ready to be output.
			output_Blocks(blocks, block_Statuses, &next_Block_To_Output,
			  BLOCKS_TO_USE);

			// Update the status for block_To_Use to reflect that it is now
			// being processed.
			block_Statuses[block_To_Use]++;

			// Figure out what the block_Length should be and decrement
			// current_Number_Of_Characters_To_Create by that amount.
			block_Length=CHARACTERS_PER_BLOCK;
			if(current_Number_Of_Characters_To_Create<CHARACTERS_PER_BLOCK)
				block_Length=current_Number_Of_Characters_To_Create;
			current_Number_Of_Characters_To_Create-=block_Length;

			// Get the pseudorandom_Numbers to use for this block.
			for(intnative_t pseudorandom_Number_Index=0;
			  pseudorandom_Number_Index<block_Length;
			  pseudorandom_Numbers[pseudorandom_Number_Index++]=
			  get_LCG_Pseudorandom_Number(IM));
		}


		// Start processing the pseudorandom_Numbers and generate the
		// corresponding block of nucleotides that will be output later by
		// filling block_To_Use with characters from nucleotides_Information[]
		// that are selected by looking up the pseudorandom number.
		char * line=blocks[block_To_Use];
		for(intnative_t column=0, pseudorandom_Number_Index=0;
		  pseudorandom_Number_Index<block_Length; pseudorandom_Number_Index++){
			const float r=pseudorandom_Numbers[pseudorandom_Number_Index];

			// Count the number of nucleotides with a probability less than what
			// was selected by the random number generator and then use that
			// count as an index for the nucleotide to select. It's arguable
			// whether this qualifies as a linear search but I guess you can say
			// that you're doing a linear search for all the nucleotides with a
			// probability less than what was selected by the random number
			// generator and then just counting how many matches were found.
			// With a small number of nucleotides this can be faster than doing
			// a more normal linear search (although in some cases it may
			// generate different results) and a couple of the other programs
			// already do this as well so we will too.
			intnative_t count=0;
			for(intnative_t i=0; i<number_Of_Nucleotides; i++)
				if(cumulative_Probabilities[i]<=r)
					count++;

			line[column]=nucleotides_Information[count].letter;

			// If we reach the end of the line, reset the column counter and
			// advance to the next line.
			if(++column==MAXIMUM_LINE_WIDTH){
				column=0;
				line+=MAXIMUM_LINE_WIDTH+1;
			}
		}


		// Update the block_Statuses so that this block_To_Use gets output
		// later.
		block_Statuses[block_To_Use]=block_Length;
	}

	// Output the remaining blocks.
	output_Blocks(blocks, block_Statuses, &next_Block_To_Output, BLOCKS_TO_USE);
}
// Print a pseudorandom DNA sequence that is number_Of_Characters_To_Create
// characters long and made up of the nucleotides specified in
// nucleotides_Information and occurring at the frequencies specified in
// nucleotides_Information. The output is also wrapped to MAXIMUM_LINE_WIDTH
// columns.
static void generate_And_Wrap_Pseudorandom_DNA_Sequence(
  const nucleotide_info nucleotides_Information[],
  const intnative_t number_Of_Nucleotides,
  const intnative_t number_Of_Characters_To_Create){

   // Cumulate the probabilities.
   float cumulative_Probabilities[number_Of_Nucleotides],
     cumulative_Probability=0.0;
   for(intnative_t i=0; i<number_Of_Nucleotides; i++){
      cumulative_Probability+=nucleotides_Information[i].probability;
      cumulative_Probabilities[i]=cumulative_Probability*LOOKUP_TABLE_SCALE;
   }

   // Adjust the last probability so that nothing will go past it.
   cumulative_Probabilities[number_Of_Nucleotides-1]=LOOKUP_TABLE_SIZE;

   // Create and fill the nucleotide_Indexes_Lookup_Table which will allow us
   // to later lookup a probability and quickly find the approximate index for
   // the nucleotide with that selected probability.
   uint8_t nucleotide_Indexes_Lookup_Table[LOOKUP_TABLE_SIZE], current_Index=0;
   for(intnative_t probability=0; probability<LOOKUP_TABLE_SIZE;
     probability++){
      while(probability>=cumulative_Probabilities[current_Index])
         current_Index++;

      nucleotide_Indexes_Lookup_Table[probability]=current_Index;
   }

   char line[MAXIMUM_LINE_WIDTH+1];
   line[MAXIMUM_LINE_WIDTH]='\n';

   for(intnative_t current_Number_Of_Characters_To_Create=
     number_Of_Characters_To_Create;
     current_Number_Of_Characters_To_Create>0;){
      // Figure out the length of the line we need to write. If it's less than
      // MAXIMUM_LINE_WIDTH then we also need to add a line feed in the right
      // spot too.
      intnative_t line_Length=MAXIMUM_LINE_WIDTH;
      if(current_Number_Of_Characters_To_Create<MAXIMUM_LINE_WIDTH){
         line_Length=current_Number_Of_Characters_To_Create;
         line[line_Length]='\n';
      }

      // Fill up the line with characters from nucleotides_Information[] that
      // are selected by looking up a pseudorandom number.
      for(intnative_t column=0; column<line_Length; column++){
         const float r=get_LCG_Pseudorandom_Number();

         // Lookup the probability in the lookup table and then use the
         // resulting index as the index where we should start the linear
         // search for the correct nucleotide at.
         intnative_t index=nucleotide_Indexes_Lookup_Table[(intnative_t)r];
         while(cumulative_Probabilities[index]<=r)
            index++;

         line[column]=nucleotides_Information[index].letter;
      }

      // Output the line to stdout and update the
      // current_Number_Of_Characters_To_Create.
      fwrite(line, line_Length+1, 1, stdout);
      current_Number_Of_Characters_To_Create-=line_Length;
   }
}
Beispiel #3
0
// Print a pseudorandom DNA sequence that is number_Of_Characters_To_Create
// characters long and made up of the nucleotides specified in
// nucleotides_Information and occurring at the frequencies specified in
// nucleotides_Information. The output is also wrapped to MAXIMUM_LINE_WIDTH
// columns.
static void generate_And_Wrap_Pseudorandom_DNA_Sequence(
                                                        const nucleotide_info nucleotides_Information[],
                                                        const intnative_t number_Of_Nucleotides,
                                                        const intnative_t number_Of_Characters_To_Create){
    
    // Cumulate the probabilities. Note that the probability is being multiplied
    // by IM because later on we'll also be calling the random number generator
    // with a value that is multiplied by IM. Since the random number generator
    // does a division by IM this allows the compiler to cancel out the
    // multiplication and division by IM with each other without requiring any
    // changes to the random number generator code whose code was explicitly
    // defined in the rules.
    float cumulative_Probabilities[number_Of_Nucleotides],
    cumulative_Probability=0.0;
    for(intnative_t i=0; i<number_Of_Nucleotides; i++){
        cumulative_Probability+=nucleotides_Information[i].probability;
        cumulative_Probabilities[i]=cumulative_Probability*IM;
    }
    
    char line[MAXIMUM_LINE_WIDTH+1];
    line[MAXIMUM_LINE_WIDTH]='\n';
    
    for(intnative_t current_Number_Of_Characters_To_Create=
        number_Of_Characters_To_Create;
        current_Number_Of_Characters_To_Create>0;){
        // Figure out the length of the line we need to write. If it's less than
        // MAXIMUM_LINE_WIDTH then we also need to add a line feed in the right
        // spot too.
        intnative_t line_Length=MAXIMUM_LINE_WIDTH;
        if(current_Number_Of_Characters_To_Create<MAXIMUM_LINE_WIDTH){
            line_Length=current_Number_Of_Characters_To_Create;
            line[line_Length]='\n';
        }
        
        // Fill up the line with characters from nucleotides_Information[] that
        // are selected by looking up a pseudorandom number.
        for(intnative_t column=0; column<line_Length; column++){
            const float r=get_LCG_Pseudorandom_Number(IM);
            
            // Count the number of nucleotides with a probability less than what
            // was selected by the random number generator and then use that
            // count as an index for the nucleotide to select. It's arguable
            // whether this qualifies as a linear search but I guess you can say
            // that you're doing a linear search for all the nucleotides with a
            // probability less than what was selected by the random number
            // generator and then just counting how many matches were found.
            // With a small number of nucleotides this can be faster than doing
            // a more normal linear search (although in some cases it may
            // generate different results) and a couple of the other programs
            // already do this as well so we will too.
            intnative_t count=0;
            for(intnative_t i=0; i<number_Of_Nucleotides; i++)
                if(cumulative_Probabilities[i]<=r)
                    count++;
            
            line[column]=nucleotides_Information[count].letter;
        }
        
        // Output the line to stdout and update the
        // current_Number_Of_Characters_To_Create.
        //fwrite(line, line_Length+1, 1, stdout);
        current_Number_Of_Characters_To_Create-=line_Length;
    }
}