Пример #1
0
void scale_image(                     //scale an image
                 IMAGE &image,        //source image
                 IMAGE &target_image  //target image
                ) {
  inT32 xsize, ysize, new_xsize, new_ysize;
  IMAGELINE line, new_line;
  int *hires, *lores, *oldhires, *oldlores;
  int i, j, n, oldn, row, col;
  int offset = 0;                //not used here
  float factor;
  uinT8 curr_colour, new_colour;
  int dummy = -1;
  IMAGE image2;                  //horiz scaled image

  xsize = image.get_xsize ();
  ysize = image.get_ysize ();
  new_xsize = target_image.get_xsize ();
  new_ysize = target_image.get_ysize ();
  if (new_ysize > new_xsize)
    new_line.init (new_ysize);
  else
    new_line.init (new_xsize);

  factor = (float) xsize / (float) new_xsize;

  hires = (int *) calloc (xsize, sizeof (int));
  lores = (int *) calloc (new_xsize, sizeof (int));
  oldhires = (int *) calloc (xsize, sizeof (int));
  oldlores = (int *) calloc (new_xsize, sizeof (int));
  if ((hires == NULL) || (lores == NULL) || (oldhires == NULL)
  || (oldlores == NULL)) {
    fprintf (stderr, "Calloc error in scale_image\n");
    err_exit();
  }

  image2.create (new_xsize, ysize, image.get_bpp ());

  oldn = 0;
  /* do first row separately because hires[col-1] doesn't make sense here */
  image.fast_get_line (0, 0, xsize, &line);
  /* each line nominally begins with white */
  curr_colour = 1;
  n = 0;
  for (i = 0; i < xsize; i++) {
    new_colour = *(line.pixels + i);
    if (new_colour != curr_colour) {
      hires[n] = i;
      n++;
      curr_colour = new_colour;
    }
  }
  if (offset != 0)
    for (i = 0; i < n; i++)
      hires[i] += offset;

  if (n > new_xsize) {
    tprintf ("Too many transitions (%d) on line 0\n", n);
    scale_image_cop_out(image,
                        target_image,
                        factor,
                        hires,
                        lores,
                        oldhires,
                        oldlores);
    return;
  }
  else if (n > 0)
    dyn_prog (n, hires, lores, new_xsize, &dummy, &dummy, 0, factor);
  else
    lores[0] = new_xsize;

  curr_colour = 1;
  j = 0;
  for (i = 0; i < new_xsize; i++) {
    if (lores[j] == i) {
      curr_colour = 1 - curr_colour;
      j++;
    }
    *(new_line.pixels + i) = curr_colour;
  }
  image2.put_line (0, 0, new_xsize, &new_line, 0);

  for (i = 0; i < n; i++) {
    oldhires[i] = hires[i];
    oldlores[i] = lores[i];
  }

  for (i = n; i < oldn; i++) {
    oldhires[i] = 0;
    oldlores[i] = 0;
  }
  oldn = n;

  for (row = 1; row < ysize; row++) {
    image.fast_get_line (0, row, xsize, &line);
    /* each line nominally begins with white */
    curr_colour = 1;
    n = 0;
    for (i = 0; i < xsize; i++) {
      new_colour = *(line.pixels + i);
      if (new_colour != curr_colour) {
        hires[n] = i;
        n++;
        curr_colour = new_colour;
      }
    }
    for (i = n; i < oldn; i++) {
      hires[i] = 0;
      lores[i] = 0;
    }
    if (offset != 0)
      for (i = 0; i < n; i++)
        hires[i] += offset;

    if (n > new_xsize) {
      tprintf ("Too many transitions (%d) on line %d\n", n, row);
      scale_image_cop_out(image,
                          target_image,
                          factor,
                          hires,
                          lores,
                          oldhires,
                          oldlores);
      return;
    }
    else if (n > 0)
      dyn_prog(n, hires, lores, new_xsize, oldhires, oldlores, oldn, factor);
    else
      lores[0] = new_xsize;

    curr_colour = 1;
    j = 0;
    for (i = 0; i < new_xsize; i++) {
      if (lores[j] == i) {
        curr_colour = 1 - curr_colour;
        j++;
      }
      *(new_line.pixels + i) = curr_colour;
    }
    image2.put_line (0, row, new_xsize, &new_line, 0);

    for (i = 0; i < n; i++) {
      oldhires[i] = hires[i];
      oldlores[i] = lores[i];
    }
    for (i = n; i < oldn; i++) {
      oldhires[i] = 0;
      oldlores[i] = 0;
    }
    oldn = n;
  }

  free(hires);
  free(lores);
  free(oldhires);
  free(oldlores);

  /* NOW DO THE VERTICAL SCALING from image2 to target_image*/

  xsize = new_xsize;
  factor = (float) ysize / (float) new_ysize;
  offset = 0;

  hires = (int *) calloc (ysize, sizeof (int));
  lores = (int *) calloc (new_ysize, sizeof (int));
  oldhires = (int *) calloc (ysize, sizeof (int));
  oldlores = (int *) calloc (new_ysize, sizeof (int));
  if ((hires == NULL) || (lores == NULL) || (oldhires == NULL)
  || (oldlores == NULL)) {
    fprintf (stderr, "Calloc error in scale_image (vert)\n");
    err_exit();
  }

  oldn = 0;
  /* do first col separately because hires[col-1] doesn't make sense here */
  image2.get_column (0, 0, ysize, &line, 0);
  /* each line nominally begins with white */
  curr_colour = 1;
  n = 0;
  for (i = 0; i < ysize; i++) {
    new_colour = *(line.pixels + i);
    if (new_colour != curr_colour) {
      hires[n] = i;
      n++;
      curr_colour = new_colour;
    }
  }

  if (offset != 0)
    for (i = 0; i < n; i++)
      hires[i] += offset;

  if (n > new_ysize) {
    tprintf ("Too many transitions (%d) on column 0\n", n);
    scale_image_cop_out(image,
                        target_image,
                        factor,
                        hires,
                        lores,
                        oldhires,
                        oldlores);
    return;
  }
  else if (n > 0)
    dyn_prog (n, hires, lores, new_ysize, &dummy, &dummy, 0, factor);
  else
    lores[0] = new_ysize;

  curr_colour = 1;
  j = 0;
  for (i = 0; i < new_ysize; i++) {
    if (lores[j] == i) {
      curr_colour = 1 - curr_colour;
      j++;
    }
    *(new_line.pixels + i) = curr_colour;
  }
  target_image.put_column (0, 0, new_ysize, &new_line, 0);

  for (i = 0; i < n; i++) {
    oldhires[i] = hires[i];
    oldlores[i] = lores[i];
  }
  for (i = n; i < oldn; i++) {
    oldhires[i] = 0;
    oldlores[i] = 0;
  }
  oldn = n;

  for (col = 1; col < xsize; col++) {
    image2.get_column (col, 0, ysize, &line, 0);
    /* each line nominally begins with white */
    curr_colour = 1;
    n = 0;
    for (i = 0; i < ysize; i++) {
      new_colour = *(line.pixels + i);
      if (new_colour != curr_colour) {
        hires[n] = i;
        n++;
        curr_colour = new_colour;
      }
    }
    for (i = n; i < oldn; i++) {
      hires[i] = 0;
      lores[i] = 0;
    }

    if (offset != 0)
      for (i = 0; i < n; i++)
        hires[i] += offset;

    if (n > new_ysize) {
      tprintf ("Too many transitions (%d) on column %d\n", n, col);
      scale_image_cop_out(image,
                          target_image,
                          factor,
                          hires,
                          lores,
                          oldhires,
                          oldlores);
      return;
    }
    else if (n > 0)
      dyn_prog(n, hires, lores, new_ysize, oldhires, oldlores, oldn, factor);
    else
      lores[0] = new_ysize;

    curr_colour = 1;
    j = 0;
    for (i = 0; i < new_ysize; i++) {
      if (lores[j] == i) {
        curr_colour = 1 - curr_colour;
        j++;
      }
      *(new_line.pixels + i) = curr_colour;
    }
    target_image.put_column (col, 0, new_ysize, &new_line, 0);

    for (i = 0; i < n; i++) {
      oldhires[i] = hires[i];
      oldlores[i] = lores[i];
    }
    for (i = n; i < oldn; i++) {
      oldhires[i] = 0;
      oldlores[i] = 0;
    }
    oldn = n;
  }
  free(hires);
  free(lores);
  free(oldhires);
  free(oldlores);
}
Пример #2
0
/* reiterate_assembly
   Args: (1) a pointer to a sequence to be used as the new reference
         (2) a MapAlignmentP big enough to store all the alignments
	 (3) a FSDB with sequences to be realigned
	 (4) a AlignmentP big enough for the alignments
	 (5) a front PWAlnFragP for storing front alignments
	 (6) a back PWAlnFragP for storing back alignments
	 (7) a PSSMP with the forward substitution matrices
	 (8) a PSSMP with the revcom substitution matrices
   Aligns all the FragSeqs from fsdb to the new reference, using the
   as and ae fields to narrow down where the alignment happens
   Resets the maln and writes all the results there
   Returns void
*/
void reiterate_assembly( char* new_ref_seq, int iter_num,
			 MapAlignmentP maln,
			 FSDB fsdb, AlignmentP a, 
			 PWAlnFragP front_pwaln,
			 PWAlnFragP back_pwaln, 
			 PSSMP ancsubmat,
			 PSSMP rcancsubmat ) {
  int i, j,
    ref_len,
    ref_start, 
    ref_end,
    ref_frag_len, 
    max_score,
    rc_score,
    aln_seq_len;
  FragSeqP fs;
  char iter_ref_id[MAX_ID_LEN + 1];
  char tmp_rc[INIT_ALN_SEQ_LEN + 1];
  char iter_ref_desc[] = "iteration assembly";

  /* Set up maln->ref
     Keep his seq separate from the external assembly because that
     is malloced and freed elsewhere
  */
  sprintf( iter_ref_id, "ConsAssem.%d", iter_num );
  free( maln->ref->seq );
  if ( maln->ref->rcseq != NULL ) {
    free( maln->ref->rcseq );
  }
  free( maln->ref->gaps );

  ref_len = strlen( new_ref_seq );
  maln->ref->seq = (char*)save_malloc((ref_len + 1)* sizeof(char));
  strcpy( maln->ref->seq, new_ref_seq );
  maln->ref->rcseq = NULL; // never again!
  /* Keep the ID and description the same if this is the 1st
     iteration. Otherwise, set it to the generic ones */
  if ( iter_num > 1 ) {
    strcpy( maln->ref->id, iter_ref_id );
    strcpy( maln->ref->desc, iter_ref_desc );
  }

  maln->ref->seq_len = ref_len;
  maln->ref->size = (ref_len+1);

  if ( maln->ref->circular ) {
    add_ref_wrap( maln->ref );
  }
  else {
    maln->ref->wrap_seq_len = maln->ref->seq_len;
  }
  maln->ref->gaps = 
    (int*)save_malloc((maln->ref->wrap_seq_len+1) * sizeof(int));
  for( i = 0; i <= maln->ref->wrap_seq_len; i++ ) {
    maln->ref->gaps[i] = 0;
  }

  /* Reset its AlnSeqArray ->ins to all point to null */
  for ( i = 0; i < maln->num_aln_seqs; i++ ) {
    aln_seq_len = strlen(maln->AlnSeqArray[i]->seq);
    for ( j = 0; j < aln_seq_len; j++ ) {
      /* We couldn't have malloced any sequence for
	 inserts past our length; anything non-NULL
	 out there is cruft */
      if ( maln->AlnSeqArray[i]->ins[j] != NULL ) {
	free( maln->AlnSeqArray[i]->ins[j] );
	maln->AlnSeqArray[i]->ins[j] = NULL;
      }
    }
  }

  /* Now, remake the hpcl and hprl arrays if hp_special */
  if ( a->hp ) {
    free( a->hpcl );
    free( a->hpcs );
    a->hpcl = (int*)save_malloc(maln->ref->wrap_seq_len*sizeof(int));
    a->hpcs = (int*)save_malloc(maln->ref->wrap_seq_len*sizeof(int));
    pop_hpl_and_hps( maln->ref->seq, 
		     maln->ref->wrap_seq_len,
		     a->hpcl, a->hpcs );     
  }

  /* Reset the number of aligned sequences in the maln */
  maln->num_aln_seqs = 0;

  /* OK, ref is set up. Let's go through all the sequences in fsdb
     and re-align them to the new reference. 
     If it's a revcom alignment,
     just use the rcancsubmat */
  for( i = 0; i < fsdb->num_fss; i++ ) {
    fs = fsdb->fss[i];

    /* Special case of distant reference and 
       !fs->strand_known => try to realign both strands
       against the entire reference to learn the 
       strand and alignment region
    */
    if ( maln->distant_ref &&
	 (fs->strand_known == 0 ) &&
	 (iter_num > 1) ) {
      ref_start = 0;
      ref_end = maln->ref->wrap_seq_len;
      ref_frag_len = ref_end - ref_start;
      a->seq1 = &maln->ref->seq[0];
      a->len1 = ref_frag_len;
      pop_s1c_in_a( a );
      a->seq2 = fs->seq;
      a->len2 = strlen( a->seq2 );
      pop_s2c_in_a( a );
      if ( a->hp ) {
	pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs );
	pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs );
      }
      /* Align it! */
      dyn_prog( a );
      /* Find the best forward score */
      max_score = max_sg_score( a );
      if ( max_score > FIRST_ROUND_SCORE_CUTOFF ) {
	fs->strand_known = 1;
	fs->rc = 0;
	find_align_begin( a );
	fs->as = a->abc;
	fs->ae = a->aec;
	fs->score = max_score;
      }

      /* Now, try reverse complement */
      aln_seq_len = strlen( fs->seq );
      a->submat = rcancsubmat;
      for ( j = 0; j < aln_seq_len; j++ ) {
	tmp_rc[j] = revcom_char(fs->seq[aln_seq_len-(j+1)]);
      }
      tmp_rc[aln_seq_len] = '\0';
      a->seq2 = tmp_rc;
      pop_s2c_in_a( a );
      if ( a->hp ) {
	pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs );
	pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs );
      }
      dyn_prog( a );
      max_score = max_sg_score( a );
      if ( (max_score > FIRST_ROUND_SCORE_CUTOFF) &&
	   (max_score > fs->score) ) {
	fs->strand_known = 1;
	fs->rc = 1;
	find_align_begin( a );
	fs->as = a->abc;
	fs->ae = a->aec;
	fs->score = max_score;
	strcpy( fs->seq, tmp_rc );
      }
    }

    /* Do we know the strand (either because we've always
       known it or we just learned it, doesn't matter) */
    if ( fs->strand_known ) {
      if ( fs->rc ) {
	a->submat = rcancsubmat;
      }
      else {
	a->submat = ancsubmat;
      }

      a->seq2 = fs->seq;
      a->len2 = strlen( a->seq2 );
      pop_s2c_in_a( a );

      /* Set up the alignment limits on the reference */
      if ( ((fs->as - REALIGN_BUFFER) < 0 ) ) {
	ref_start = 0;
      }
      else {
	ref_start = (fs->as - REALIGN_BUFFER);
      }
      if ( (fs->ae + REALIGN_BUFFER + 1) > 
	   maln->ref->wrap_seq_len ) {
	ref_end = maln->ref->wrap_seq_len;
      }
      else {
	ref_end = fs->ae + REALIGN_BUFFER;
      }

      /* Check to make sure the regions encompassed by ref_start to
	 ref_end is reasonable given how long this fragment is. If
	 not, just realign this whole mofo again because the reference
	 has probably changed a lot between iterations */
      if ( (ref_start + a->len2) > ref_end ) {
	ref_start = 0;
	ref_end = maln->ref->wrap_seq_len;
      }
    
      ref_frag_len = ref_end - ref_start;
      a->seq1 = &maln->ref->seq[ref_start];
      a->len1 = ref_frag_len;
      pop_s1c_in_a( a );
      
      /* If we want the homopolymer discount, the necessary arrays of
	 hp starts and lengths must be set up anew */
      if ( a->hp ) {
	pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs );
	pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs );
      }

      /* Align it! */
      dyn_prog( a );
    
      /* Find the best score */
      max_score = max_sg_score( a );

      find_align_begin( a );

      /* First, put all alignment in front_pwaln */
      populate_pwaln_to_begin( a, front_pwaln );
      
      /* Load up front_pwaln */
      strcpy( front_pwaln->ref_id, maln->ref->id );
      strcpy( front_pwaln->ref_desc, maln->ref->desc );
      
      strcpy( front_pwaln->frag_id, fs->id );
      strcpy( front_pwaln->frag_desc, fs->desc );
      
      front_pwaln->trimmed = fs->trimmed;
      front_pwaln->revcom  = fs->rc;
      front_pwaln->num_inputs = fs->num_inputs;
      front_pwaln->segment = 'a';
      front_pwaln->score = a->best_score;
  
      front_pwaln->start = a->abc + ref_start;
      front_pwaln->end   = a->aec + ref_start;

      /* Update stats for this FragSeq */
      fs->as = a->abc + ref_start;
      fs->ae = a->aec + ref_start;
      fs->unique_best = 1;
      fs->score = a->best_score;

      if ( front_pwaln->end > maln->ref->seq_len ) {
	/* This alignment wraps around - adjust the end to
	   demonstrate this for split_maln check */
	front_pwaln->end = front_pwaln->end - maln->ref->seq_len;
      }

      if ( front_pwaln->start > front_pwaln->end ) {
	/* Move wrapped bit to back_pwaln */
	split_pwaln( front_pwaln, back_pwaln, maln->ref->seq_len );
	merge_pwaln_into_maln( front_pwaln, maln );
	fs->front_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1];
	merge_pwaln_into_maln( back_pwaln, maln );
	fs->back_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1];
      }
      else { 
	merge_pwaln_into_maln( front_pwaln, maln );
	fs->front_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1];
      }
    }
  }
  return;
}