Exemple #1
0
/* CGAT
 * read and make tables
 * BA -> NA 
 */
static void CGAT(){
  FILE *IN=NULL, *IN_b=NULL, *IN_rev=NULL;
  int **table_value=NULL;
  unsigned short **table_num=NULL;
  int state_a2, state_b=0, state_rev;
  Fasta *fst1 = fasta_new(), *fst2 = fasta_new(), *fst_rev = fasta_new();
  clock_t start, start1, end0, end1, end2;

  /*----- do BA and NA for each fasta-pair-----*/
  /* genomeB file open */
  IN_b = my_fopen_r(idata_b->seqname);
  par.OUT = my_fopen_w(par.outputfile);

  for(idata_b->cnt=0; idata_b->cnt < idata_b->fstnum; idata_b->cnt++){
    start = clock();

    /* MakeTable */
    read_multifasta(IN_b, fst2, FORWARD, &state_b);
    table_value = (int **)my_malloc(idata_b->blocknum[idata_b->cnt] * sizeof(int *), "table_b_value");
    table_num = (unsigned short **)my_malloc(idata_b->blocknum[idata_b->cnt] * sizeof(unsigned short *), "table_b_num");
    Make_SeedTable(fst2, table_value, table_num);

    end0 = clock();
    if(opt.debug) printf("MakeTable time: %.2f sec.\n", (double)(end0-start)/CLOCKS_PER_SEC);

    IN     = my_fopen_r(idata_a->seqname);
    IN_rev = my_fopen_r(idata_a->seqname);
    state_a2=0; state_rev=0;

    for(idata_a->cnt=0; idata_a->cnt < idata_a->fstnum; idata_a->cnt++){      
      printf("\ngenomeA-fasta%d (%d blocks) - genomeB-fasta%d (%d blocks)\n",
	     idata_a->cnt+1, idata_a->blocknum[idata_a->cnt], idata_b->cnt+1, idata_b->blocknum[idata_b->cnt]);
      start1 = clock();

      /*--- BA: the results are stored in aln_for/rev ---*/
      BA(&aln_for, table_value, table_num, FORWARD);
      if(reverse) BA(&aln_rev, table_value, table_num, REVERSE);
      if(idata_a->cnt == idata_a->fstnum -1) table_b_delete(table_value, table_num, idata_b->blocknum[idata_b->cnt]);

      end1 = clock();
      if(opt.debug) printf("BA time: %.2f sec.\n", (double)(end1-start1)/CLOCKS_PER_SEC);
      /*--- (if -b is on) output BA result and skip NA ---*/
      if(block){
	output_BAresult();
	continue;
      }

      /*--- NA: detailed alignmend within colonies in bl ---*/
      NA(IN, fst1, fst2, &aln_for, FORWARD, &state_a2);
      if(reverse) NA(IN_rev, fst_rev, fst2, &aln_rev, REVERSE, &state_rev);

      end2 = clock();
      if(opt.debug) printf("NA time: %.2f sec.\n", (double)(end2-end1)/CLOCKS_PER_SEC);
    }

    free(fst2->head);
    free(fst2->body);
    fclose(IN);
    fclose(IN_rev);
  }

  if(opt.boundary) output_fastaboundary();

  free(fst1); free(fst2); free(fst_rev);
  fclose(IN_b);
  fclose(par.OUT);
}
Exemple #2
0
// generate multifasta from fasta file
multifasta* read_fasta_file (char* filename)
{
  char ch;

  // open file
  FILE *filestream = fopen(filename, "rt");
  
  // if file could not be opened ...
  if (filestream == NULL)
  {
    printf("ERROR: Can not open File %s!\n", filename);
    return NULL;
  }

  // initialize multifasta with 20 entries
  multifasta* mfast = multifasta_new(20);

  fasta* seq = NULL;
  
  int newline = 1;
  int header = 0;
  int sequence = 0;
  int comment = 0;
  do
  {
    // read current char
    ch = fgetc (filestream);
    // check if line just started
    if (newline)
    {
      // check if its just another newline
      newline = (ch == '\n');
      // check if it is a > (header starts)
      if (ch == '>')
      {
        // if header => must not happen
        if (header)
        {
          puts ("ERROR: File is not FASTA (no header after header allowed)!");
          // free multifasta, close filestream and return NULL
          fclose(filestream);
          multifasta_delete (mfast);
          return NULL;
        }
        // now the header follows
        header = 1;
        // is sequence preceeded => finish sequence and add to multifasta
        if (sequence)
        {
          multifasta_add_fasta (mfast, seq);
          fasta_add_base(seq, '\0');
          sequence = 0;
        }
        // create new fasta struct
        seq = fasta_new (MAX_LINE);
       
      }
      // if comment line is starting
      else if (ch == '#')
      {
        comment = 1;
      }
      // if a base is the first character
      else if (isBase(ch))
      {
        // if already a sequence is beeing read => just add base
        if (sequence)
        {
          fasta_add_base(seq, ch);
        }
        // else start sequence
        else
        {
          fasta_add_base(seq, ch);
          sequence = 1;
          // if no header preceeded => then it's not FASTA
          if (!header)
          {
            puts("ERROR: File is not FASTA (hader missing)!");
            // free multifasta, close filestream and return NULL
            fclose(filestream);
            multifasta_delete (mfast);
            return NULL;
          }
          header = 0;
        }
      }
      // if no base character within sequence;
      // empty lines are ok and file may also end
      else if (!newline && ch != EOF)
      {
        // free multifasta, close filestream and return NULL
        fclose(filestream);
        printf("ERROR: non-fasta conform line detected! %c is not an allowed symbol!\n", ch);
        multifasta_delete (mfast);
        return NULL;
      }
    }
    // if we didn't start a newline
    else
    {
      // check if its a newline character
      newline = (ch == '\n');
      // check if comment and if its a newline => now comment ends
      // all other chars are ignored (because its a comment)
      if (comment)
      {
        if (newline)
        {
          comment = 0;
        }
      }
      // if reading a header
      else if (header)
      {
        // if there is no newline => add it to sequence header
        if (!newline)
          fasta_add_header_char(seq, ch);
        else
          fasta_add_header_char(seq, '\0');
        // do NOT set header = 0 (need for checking correct FASTA)
      }
      // thats ok because after a sequence started, there is never (header)
      else if (sequence)
      {
        if (isBase(ch))
          fasta_add_base(seq, ch);
        else if (!newline && ch != EOF)
        {
          // free multifasta, close filestream and return NULL
          fclose(filestream);
          printf("ERROR: non-fasta conform line detected! %c is not an allowed symbol!\n",ch);
          multifasta_delete (mfast);
          return NULL;
        }
      }
    }
  } while (ch != EOF); // stop if EOF is reached

  // file may not end with header or without seuqence
  if (header || !sequence)
  {
    puts ("ERROR: File is not FASTA (sequence missing)!");
    fclose(filestream);
    multifasta_delete (mfast);
    return NULL;
  }

  // finish last added sequence
  multifasta_add_fasta (mfast, seq);
  fasta_add_base(seq, '\0');

  fclose(filestream);

  return mfast;
}