Exemple #1
0
int
fastq_read_check (char *begin, int length, char model, bloom * bl, 
                  float tole_rate, F_set * File_head)
{
  char *p = begin;
  int distance = length;
  int signal = 0, result = 0;
  char *previous, *key = (char *) malloc (bl->k_mer * sizeof (char) + 1);

  while (distance > bl->k_mer)
    {
      if (signal == 1)
	break;

      if (distance >= bl->k_mer)
	{
	  memcpy (key, p, sizeof (char) * bl->k_mer);	//need to be tested
	  key[bl->k_mer] = '\0';
	  p += bl->k_mer;
	  previous = p;
	  distance -= bl->k_mer;
	}

      else
	{
	  memcpy (key, previous + distance, sizeof (char) * bl->k_mer);
	  p += (bl->k_mer - distance);
	  signal = 1;
	}

      if (model == 'r')
	rev_trans (key);

      if (bloom_check (bl, key))
	{
	  result =
	    fastq_full_check (bl, begin, length, model, tole_rate, File_head);
	  if (result > 0)
	    return result;
	  else if (model == 'n')
	    break;
	}

    }				//outside while
  if (model == 'r')
    return 0;
  else
    return fastq_read_check (begin, length, 'r', bl, tole_rate, File_head);
}
Exemple #2
0
/*cut the reads from the string and process them one by one*/
void read_process (bloom * bl, Queue * info, Queue * tail, F_set * File_head, float sampling_rate, float tole_rate, char mode, char fmt_type)
{
	char *start_point = info->location;
	char *next_job = NULL, *temp = NULL, *previous_point = NULL, *temp_next = NULL;
	int result = 0;
	next_job = check_fmt (info, tail, start_point, fmt_type);
	// make sure it can handle DOS and Unix format ('\r\n' and '\n')
	// XXX: what about OSX sinle '\n' ('a0' in hexa)?
	if (next_job == NULL)
		return;
	while (start_point != next_job) 
		{
		if (mode == 'c')
		{
			if (sampling_rate<1)
				temp = jump (start_point, fmt_type, sampling_rate);
			else
				temp = start_point;
		// function for fast/proportional scan
			if (start_point != temp)
			{
				start_point = temp;
				continue;
			}
		}
		// skip to the next read if needed
		#pragma omp atomic
		File_head->reads_num++;
		// atomic process for summing reads number
		previous_point = start_point;
		start_point = get_right_sp (start_point, fmt_type);
		// skip the ID line
		if (fmt_type == '@')
		{
			//identify read as fastq format read and pass it to fastq_read_check to process
			result = fastq_read_check (start_point, strchr (start_point, '\n') - start_point, 'n', bl, tole_rate, File_head);
			start_point = strchr (start_point, '\n') + 1;
			start_point = strchr (start_point, '\n') + 1;
			start_point = strchr (start_point, '\n') + 1;
		}
		else
		{
			temp_next = strchr(start_point+1,'>');
			if (temp_next == NULL)
				temp_next = next_job;
			//identify read as fasta format read and pass it to fasta_read_check to process
			result = fasta_read_check (start_point, temp_next-start_point, 'n', bl, tole_rate, File_head);
			start_point = temp_next;
		}
		if (result>0)
		{
                	 #pragma omp atomic
                         File_head->reads_contam++;
			 if (mode == 'r')
			 	{
					#pragma omp critical
					{
						memcpy(_contam,previous_point,start_point-previous_point);
						_contam+=(start_point-previous_point);
					}
				}
		}
		else
		{
			if (mode == 'r')
				{
					#pragma omp critical
					{
                                        	memcpy(_clean,previous_point,start_point-previous_point);
                                        	_clean+=(start_point-previous_point);
					}
				}
		}
	}	// outside while
}