/*-------------------------------------*/ int fasta_read_check (char *begin, char *next, char model, bloom * bl, float tole_rate, F_set * File_head) { char *p = strchr (begin + 1, '\n') + 1; if (!p || *p == '>') return 1; int n, m, result, count_enter; char *key = (char *) malloc ((bl->k_mer + 1) * sizeof (char)); char *pre_key = (char *) malloc ((bl->k_mer + 1) * sizeof (char)); key[bl->k_mer] = '\0'; while (p != next) { while (n < bl->k_mer) { if (p[m] == '>' || p[m] == '\0') { m--; break; } if (p[m] != '\r' && p[m] != '\n') key[n++] = p[m]; else count_enter++; m++; } //inner while if (m == 0) break; if (strlen (key) == bl->k_mer) memcpy (pre_key, key, sizeof (char) * (bl->k_mer + 1)); else { char *temp_key = (char *) malloc (bl->k_mer * sizeof (char)); memcpy (temp_key, pre_key + strlen (key), bl->k_mer - strlen (key)); memcpy (temp_key + bl->k_mer - strlen (key), key, sizeof (char) * (strlen (key) + 1)); free (key); key = temp_key; } p += m; n = 0; m = 0; if (model == 'r') rev_trans (key); if (bloom_check (bl, key)) { result = fasta_full_check (bl, begin, next, model, tole_rate, File_head); if (result > 0) return result; //else if (model == 'n') //use recursion to check the sequence forward and backward // return fasta_read_check (begin, next, 'r', bl); else if (model == 'n') break; } //memset (key, 0, bl->k_mer); } //outside while if (model == 'r') return 0; else return fasta_read_check (begin, next, 'r', bl, tole_rate, File_head); }
/*cut the reads from the string and process them one by one*/ void read_process (bloom * bl, Queue * info, Queue * tail, F_set * File_head, float sampling_rate, float tole_rate, char mode, char fmt_type) { char *start_point = info->location; char *next_job = NULL, *temp = NULL, *previous_point = NULL, *temp_next = NULL; int result = 0; next_job = check_fmt (info, tail, start_point, fmt_type); // make sure it can handle DOS and Unix format ('\r\n' and '\n') // XXX: what about OSX sinle '\n' ('a0' in hexa)? if (next_job == NULL) return; while (start_point != next_job) { if (mode == 'c') { if (sampling_rate<1) temp = jump (start_point, fmt_type, sampling_rate); else temp = start_point; // function for fast/proportional scan if (start_point != temp) { start_point = temp; continue; } } // skip to the next read if needed #pragma omp atomic File_head->reads_num++; // atomic process for summing reads number previous_point = start_point; start_point = get_right_sp (start_point, fmt_type); // skip the ID line if (fmt_type == '@') { //identify read as fastq format read and pass it to fastq_read_check to process result = fastq_read_check (start_point, strchr (start_point, '\n') - start_point, 'n', bl, tole_rate, File_head); start_point = strchr (start_point, '\n') + 1; start_point = strchr (start_point, '\n') + 1; start_point = strchr (start_point, '\n') + 1; } else { temp_next = strchr(start_point+1,'>'); if (temp_next == NULL) temp_next = next_job; //identify read as fasta format read and pass it to fasta_read_check to process result = fasta_read_check (start_point, temp_next-start_point, 'n', bl, tole_rate, File_head); start_point = temp_next; } if (result>0) { #pragma omp atomic File_head->reads_contam++; if (mode == 'r') { #pragma omp critical { memcpy(_contam,previous_point,start_point-previous_point); _contam+=(start_point-previous_point); } } } else { if (mode == 'r') { #pragma omp critical { memcpy(_clean,previous_point,start_point-previous_point); _clean+=(start_point-previous_point); } } } } // outside while }