int fastq_read_check (char *begin, int length, char model, bloom * bl, float tole_rate, F_set * File_head) { char *p = begin; int distance = length; int signal = 0, result = 0; char *previous, *key = (char *) malloc (bl->k_mer * sizeof (char) + 1); while (distance > bl->k_mer) { if (signal == 1) break; if (distance >= bl->k_mer) { memcpy (key, p, sizeof (char) * bl->k_mer); //need to be tested key[bl->k_mer] = '\0'; p += bl->k_mer; previous = p; distance -= bl->k_mer; } else { memcpy (key, previous + distance, sizeof (char) * bl->k_mer); p += (bl->k_mer - distance); signal = 1; } if (model == 'r') rev_trans (key); if (bloom_check (bl, key)) { result = fastq_full_check (bl, begin, length, model, tole_rate, File_head); if (result > 0) return result; else if (model == 'n') break; } } //outside while if (model == 'r') return 0; else return fastq_read_check (begin, length, 'r', bl, tole_rate, File_head); }
/*cut the reads from the string and process them one by one*/ void read_process (bloom * bl, Queue * info, Queue * tail, F_set * File_head, float sampling_rate, float tole_rate, char mode, char fmt_type) { char *start_point = info->location; char *next_job = NULL, *temp = NULL, *previous_point = NULL, *temp_next = NULL; int result = 0; next_job = check_fmt (info, tail, start_point, fmt_type); // make sure it can handle DOS and Unix format ('\r\n' and '\n') // XXX: what about OSX sinle '\n' ('a0' in hexa)? if (next_job == NULL) return; while (start_point != next_job) { if (mode == 'c') { if (sampling_rate<1) temp = jump (start_point, fmt_type, sampling_rate); else temp = start_point; // function for fast/proportional scan if (start_point != temp) { start_point = temp; continue; } } // skip to the next read if needed #pragma omp atomic File_head->reads_num++; // atomic process for summing reads number previous_point = start_point; start_point = get_right_sp (start_point, fmt_type); // skip the ID line if (fmt_type == '@') { //identify read as fastq format read and pass it to fastq_read_check to process result = fastq_read_check (start_point, strchr (start_point, '\n') - start_point, 'n', bl, tole_rate, File_head); start_point = strchr (start_point, '\n') + 1; start_point = strchr (start_point, '\n') + 1; start_point = strchr (start_point, '\n') + 1; } else { temp_next = strchr(start_point+1,'>'); if (temp_next == NULL) temp_next = next_job; //identify read as fasta format read and pass it to fasta_read_check to process result = fasta_read_check (start_point, temp_next-start_point, 'n', bl, tole_rate, File_head); start_point = temp_next; } if (result>0) { #pragma omp atomic File_head->reads_contam++; if (mode == 'r') { #pragma omp critical { memcpy(_contam,previous_point,start_point-previous_point); _contam+=(start_point-previous_point); } } } else { if (mode == 'r') { #pragma omp critical { memcpy(_clean,previous_point,start_point-previous_point); _clean+=(start_point-previous_point); } } } } // outside while }