/* CGAT * read and make tables * BA -> NA */ static void CGAT(){ FILE *IN=NULL, *IN_b=NULL, *IN_rev=NULL; int **table_value=NULL; unsigned short **table_num=NULL; int state_a2, state_b=0, state_rev; Fasta *fst1 = fasta_new(), *fst2 = fasta_new(), *fst_rev = fasta_new(); clock_t start, start1, end0, end1, end2; /*----- do BA and NA for each fasta-pair-----*/ /* genomeB file open */ IN_b = my_fopen_r(idata_b->seqname); par.OUT = my_fopen_w(par.outputfile); for(idata_b->cnt=0; idata_b->cnt < idata_b->fstnum; idata_b->cnt++){ start = clock(); /* MakeTable */ read_multifasta(IN_b, fst2, FORWARD, &state_b); table_value = (int **)my_malloc(idata_b->blocknum[idata_b->cnt] * sizeof(int *), "table_b_value"); table_num = (unsigned short **)my_malloc(idata_b->blocknum[idata_b->cnt] * sizeof(unsigned short *), "table_b_num"); Make_SeedTable(fst2, table_value, table_num); end0 = clock(); if(opt.debug) printf("MakeTable time: %.2f sec.\n", (double)(end0-start)/CLOCKS_PER_SEC); IN = my_fopen_r(idata_a->seqname); IN_rev = my_fopen_r(idata_a->seqname); state_a2=0; state_rev=0; for(idata_a->cnt=0; idata_a->cnt < idata_a->fstnum; idata_a->cnt++){ printf("\ngenomeA-fasta%d (%d blocks) - genomeB-fasta%d (%d blocks)\n", idata_a->cnt+1, idata_a->blocknum[idata_a->cnt], idata_b->cnt+1, idata_b->blocknum[idata_b->cnt]); start1 = clock(); /*--- BA: the results are stored in aln_for/rev ---*/ BA(&aln_for, table_value, table_num, FORWARD); if(reverse) BA(&aln_rev, table_value, table_num, REVERSE); if(idata_a->cnt == idata_a->fstnum -1) table_b_delete(table_value, table_num, idata_b->blocknum[idata_b->cnt]); end1 = clock(); if(opt.debug) printf("BA time: %.2f sec.\n", (double)(end1-start1)/CLOCKS_PER_SEC); /*--- (if -b is on) output BA result and skip NA ---*/ if(block){ output_BAresult(); continue; } /*--- NA: detailed alignmend within colonies in bl ---*/ NA(IN, fst1, fst2, &aln_for, FORWARD, &state_a2); if(reverse) NA(IN_rev, fst_rev, fst2, &aln_rev, REVERSE, &state_rev); end2 = clock(); if(opt.debug) printf("NA time: %.2f sec.\n", (double)(end2-end1)/CLOCKS_PER_SEC); } free(fst2->head); free(fst2->body); fclose(IN); fclose(IN_rev); } if(opt.boundary) output_fastaboundary(); free(fst1); free(fst2); free(fst_rev); fclose(IN_b); fclose(par.OUT); }
// generate multifasta from fasta file multifasta* read_fasta_file (char* filename) { char ch; // open file FILE *filestream = fopen(filename, "rt"); // if file could not be opened ... if (filestream == NULL) { printf("ERROR: Can not open File %s!\n", filename); return NULL; } // initialize multifasta with 20 entries multifasta* mfast = multifasta_new(20); fasta* seq = NULL; int newline = 1; int header = 0; int sequence = 0; int comment = 0; do { // read current char ch = fgetc (filestream); // check if line just started if (newline) { // check if its just another newline newline = (ch == '\n'); // check if it is a > (header starts) if (ch == '>') { // if header => must not happen if (header) { puts ("ERROR: File is not FASTA (no header after header allowed)!"); // free multifasta, close filestream and return NULL fclose(filestream); multifasta_delete (mfast); return NULL; } // now the header follows header = 1; // is sequence preceeded => finish sequence and add to multifasta if (sequence) { multifasta_add_fasta (mfast, seq); fasta_add_base(seq, '\0'); sequence = 0; } // create new fasta struct seq = fasta_new (MAX_LINE); } // if comment line is starting else if (ch == '#') { comment = 1; } // if a base is the first character else if (isBase(ch)) { // if already a sequence is beeing read => just add base if (sequence) { fasta_add_base(seq, ch); } // else start sequence else { fasta_add_base(seq, ch); sequence = 1; // if no header preceeded => then it's not FASTA if (!header) { puts("ERROR: File is not FASTA (hader missing)!"); // free multifasta, close filestream and return NULL fclose(filestream); multifasta_delete (mfast); return NULL; } header = 0; } } // if no base character within sequence; // empty lines are ok and file may also end else if (!newline && ch != EOF) { // free multifasta, close filestream and return NULL fclose(filestream); printf("ERROR: non-fasta conform line detected! %c is not an allowed symbol!\n", ch); multifasta_delete (mfast); return NULL; } } // if we didn't start a newline else { // check if its a newline character newline = (ch == '\n'); // check if comment and if its a newline => now comment ends // all other chars are ignored (because its a comment) if (comment) { if (newline) { comment = 0; } } // if reading a header else if (header) { // if there is no newline => add it to sequence header if (!newline) fasta_add_header_char(seq, ch); else fasta_add_header_char(seq, '\0'); // do NOT set header = 0 (need for checking correct FASTA) } // thats ok because after a sequence started, there is never (header) else if (sequence) { if (isBase(ch)) fasta_add_base(seq, ch); else if (!newline && ch != EOF) { // free multifasta, close filestream and return NULL fclose(filestream); printf("ERROR: non-fasta conform line detected! %c is not an allowed symbol!\n",ch); multifasta_delete (mfast); return NULL; } } } } while (ch != EOF); // stop if EOF is reached // file may not end with header or without seuqence if (header || !sequence) { puts ("ERROR: File is not FASTA (sequence missing)!"); fclose(filestream); multifasta_delete (mfast); return NULL; } // finish last added sequence multifasta_add_fasta (mfast, seq); fasta_add_base(seq, '\0'); fclose(filestream); return mfast; }