void scale_image( //scale an image IMAGE &image, //source image IMAGE &target_image //target image ) { inT32 xsize, ysize, new_xsize, new_ysize; IMAGELINE line, new_line; int *hires, *lores, *oldhires, *oldlores; int i, j, n, oldn, row, col; int offset = 0; //not used here float factor; uinT8 curr_colour, new_colour; int dummy = -1; IMAGE image2; //horiz scaled image xsize = image.get_xsize (); ysize = image.get_ysize (); new_xsize = target_image.get_xsize (); new_ysize = target_image.get_ysize (); if (new_ysize > new_xsize) new_line.init (new_ysize); else new_line.init (new_xsize); factor = (float) xsize / (float) new_xsize; hires = (int *) calloc (xsize, sizeof (int)); lores = (int *) calloc (new_xsize, sizeof (int)); oldhires = (int *) calloc (xsize, sizeof (int)); oldlores = (int *) calloc (new_xsize, sizeof (int)); if ((hires == NULL) || (lores == NULL) || (oldhires == NULL) || (oldlores == NULL)) { fprintf (stderr, "Calloc error in scale_image\n"); err_exit(); } image2.create (new_xsize, ysize, image.get_bpp ()); oldn = 0; /* do first row separately because hires[col-1] doesn't make sense here */ image.fast_get_line (0, 0, xsize, &line); /* each line nominally begins with white */ curr_colour = 1; n = 0; for (i = 0; i < xsize; i++) { new_colour = *(line.pixels + i); if (new_colour != curr_colour) { hires[n] = i; n++; curr_colour = new_colour; } } if (offset != 0) for (i = 0; i < n; i++) hires[i] += offset; if (n > new_xsize) { tprintf ("Too many transitions (%d) on line 0\n", n); scale_image_cop_out(image, target_image, factor, hires, lores, oldhires, oldlores); return; } else if (n > 0) dyn_prog (n, hires, lores, new_xsize, &dummy, &dummy, 0, factor); else lores[0] = new_xsize; curr_colour = 1; j = 0; for (i = 0; i < new_xsize; i++) { if (lores[j] == i) { curr_colour = 1 - curr_colour; j++; } *(new_line.pixels + i) = curr_colour; } image2.put_line (0, 0, new_xsize, &new_line, 0); for (i = 0; i < n; i++) { oldhires[i] = hires[i]; oldlores[i] = lores[i]; } for (i = n; i < oldn; i++) { oldhires[i] = 0; oldlores[i] = 0; } oldn = n; for (row = 1; row < ysize; row++) { image.fast_get_line (0, row, xsize, &line); /* each line nominally begins with white */ curr_colour = 1; n = 0; for (i = 0; i < xsize; i++) { new_colour = *(line.pixels + i); if (new_colour != curr_colour) { hires[n] = i; n++; curr_colour = new_colour; } } for (i = n; i < oldn; i++) { hires[i] = 0; lores[i] = 0; } if (offset != 0) for (i = 0; i < n; i++) hires[i] += offset; if (n > new_xsize) { tprintf ("Too many transitions (%d) on line %d\n", n, row); scale_image_cop_out(image, target_image, factor, hires, lores, oldhires, oldlores); return; } else if (n > 0) dyn_prog(n, hires, lores, new_xsize, oldhires, oldlores, oldn, factor); else lores[0] = new_xsize; curr_colour = 1; j = 0; for (i = 0; i < new_xsize; i++) { if (lores[j] == i) { curr_colour = 1 - curr_colour; j++; } *(new_line.pixels + i) = curr_colour; } image2.put_line (0, row, new_xsize, &new_line, 0); for (i = 0; i < n; i++) { oldhires[i] = hires[i]; oldlores[i] = lores[i]; } for (i = n; i < oldn; i++) { oldhires[i] = 0; oldlores[i] = 0; } oldn = n; } free(hires); free(lores); free(oldhires); free(oldlores); /* NOW DO THE VERTICAL SCALING from image2 to target_image*/ xsize = new_xsize; factor = (float) ysize / (float) new_ysize; offset = 0; hires = (int *) calloc (ysize, sizeof (int)); lores = (int *) calloc (new_ysize, sizeof (int)); oldhires = (int *) calloc (ysize, sizeof (int)); oldlores = (int *) calloc (new_ysize, sizeof (int)); if ((hires == NULL) || (lores == NULL) || (oldhires == NULL) || (oldlores == NULL)) { fprintf (stderr, "Calloc error in scale_image (vert)\n"); err_exit(); } oldn = 0; /* do first col separately because hires[col-1] doesn't make sense here */ image2.get_column (0, 0, ysize, &line, 0); /* each line nominally begins with white */ curr_colour = 1; n = 0; for (i = 0; i < ysize; i++) { new_colour = *(line.pixels + i); if (new_colour != curr_colour) { hires[n] = i; n++; curr_colour = new_colour; } } if (offset != 0) for (i = 0; i < n; i++) hires[i] += offset; if (n > new_ysize) { tprintf ("Too many transitions (%d) on column 0\n", n); scale_image_cop_out(image, target_image, factor, hires, lores, oldhires, oldlores); return; } else if (n > 0) dyn_prog (n, hires, lores, new_ysize, &dummy, &dummy, 0, factor); else lores[0] = new_ysize; curr_colour = 1; j = 0; for (i = 0; i < new_ysize; i++) { if (lores[j] == i) { curr_colour = 1 - curr_colour; j++; } *(new_line.pixels + i) = curr_colour; } target_image.put_column (0, 0, new_ysize, &new_line, 0); for (i = 0; i < n; i++) { oldhires[i] = hires[i]; oldlores[i] = lores[i]; } for (i = n; i < oldn; i++) { oldhires[i] = 0; oldlores[i] = 0; } oldn = n; for (col = 1; col < xsize; col++) { image2.get_column (col, 0, ysize, &line, 0); /* each line nominally begins with white */ curr_colour = 1; n = 0; for (i = 0; i < ysize; i++) { new_colour = *(line.pixels + i); if (new_colour != curr_colour) { hires[n] = i; n++; curr_colour = new_colour; } } for (i = n; i < oldn; i++) { hires[i] = 0; lores[i] = 0; } if (offset != 0) for (i = 0; i < n; i++) hires[i] += offset; if (n > new_ysize) { tprintf ("Too many transitions (%d) on column %d\n", n, col); scale_image_cop_out(image, target_image, factor, hires, lores, oldhires, oldlores); return; } else if (n > 0) dyn_prog(n, hires, lores, new_ysize, oldhires, oldlores, oldn, factor); else lores[0] = new_ysize; curr_colour = 1; j = 0; for (i = 0; i < new_ysize; i++) { if (lores[j] == i) { curr_colour = 1 - curr_colour; j++; } *(new_line.pixels + i) = curr_colour; } target_image.put_column (col, 0, new_ysize, &new_line, 0); for (i = 0; i < n; i++) { oldhires[i] = hires[i]; oldlores[i] = lores[i]; } for (i = n; i < oldn; i++) { oldhires[i] = 0; oldlores[i] = 0; } oldn = n; } free(hires); free(lores); free(oldhires); free(oldlores); }
/* reiterate_assembly Args: (1) a pointer to a sequence to be used as the new reference (2) a MapAlignmentP big enough to store all the alignments (3) a FSDB with sequences to be realigned (4) a AlignmentP big enough for the alignments (5) a front PWAlnFragP for storing front alignments (6) a back PWAlnFragP for storing back alignments (7) a PSSMP with the forward substitution matrices (8) a PSSMP with the revcom substitution matrices Aligns all the FragSeqs from fsdb to the new reference, using the as and ae fields to narrow down where the alignment happens Resets the maln and writes all the results there Returns void */ void reiterate_assembly( char* new_ref_seq, int iter_num, MapAlignmentP maln, FSDB fsdb, AlignmentP a, PWAlnFragP front_pwaln, PWAlnFragP back_pwaln, PSSMP ancsubmat, PSSMP rcancsubmat ) { int i, j, ref_len, ref_start, ref_end, ref_frag_len, max_score, rc_score, aln_seq_len; FragSeqP fs; char iter_ref_id[MAX_ID_LEN + 1]; char tmp_rc[INIT_ALN_SEQ_LEN + 1]; char iter_ref_desc[] = "iteration assembly"; /* Set up maln->ref Keep his seq separate from the external assembly because that is malloced and freed elsewhere */ sprintf( iter_ref_id, "ConsAssem.%d", iter_num ); free( maln->ref->seq ); if ( maln->ref->rcseq != NULL ) { free( maln->ref->rcseq ); } free( maln->ref->gaps ); ref_len = strlen( new_ref_seq ); maln->ref->seq = (char*)save_malloc((ref_len + 1)* sizeof(char)); strcpy( maln->ref->seq, new_ref_seq ); maln->ref->rcseq = NULL; // never again! /* Keep the ID and description the same if this is the 1st iteration. Otherwise, set it to the generic ones */ if ( iter_num > 1 ) { strcpy( maln->ref->id, iter_ref_id ); strcpy( maln->ref->desc, iter_ref_desc ); } maln->ref->seq_len = ref_len; maln->ref->size = (ref_len+1); if ( maln->ref->circular ) { add_ref_wrap( maln->ref ); } else { maln->ref->wrap_seq_len = maln->ref->seq_len; } maln->ref->gaps = (int*)save_malloc((maln->ref->wrap_seq_len+1) * sizeof(int)); for( i = 0; i <= maln->ref->wrap_seq_len; i++ ) { maln->ref->gaps[i] = 0; } /* Reset its AlnSeqArray ->ins to all point to null */ for ( i = 0; i < maln->num_aln_seqs; i++ ) { aln_seq_len = strlen(maln->AlnSeqArray[i]->seq); for ( j = 0; j < aln_seq_len; j++ ) { /* We couldn't have malloced any sequence for inserts past our length; anything non-NULL out there is cruft */ if ( maln->AlnSeqArray[i]->ins[j] != NULL ) { free( maln->AlnSeqArray[i]->ins[j] ); maln->AlnSeqArray[i]->ins[j] = NULL; } } } /* Now, remake the hpcl and hprl arrays if hp_special */ if ( a->hp ) { free( a->hpcl ); free( a->hpcs ); a->hpcl = (int*)save_malloc(maln->ref->wrap_seq_len*sizeof(int)); a->hpcs = (int*)save_malloc(maln->ref->wrap_seq_len*sizeof(int)); pop_hpl_and_hps( maln->ref->seq, maln->ref->wrap_seq_len, a->hpcl, a->hpcs ); } /* Reset the number of aligned sequences in the maln */ maln->num_aln_seqs = 0; /* OK, ref is set up. Let's go through all the sequences in fsdb and re-align them to the new reference. If it's a revcom alignment, just use the rcancsubmat */ for( i = 0; i < fsdb->num_fss; i++ ) { fs = fsdb->fss[i]; /* Special case of distant reference and !fs->strand_known => try to realign both strands against the entire reference to learn the strand and alignment region */ if ( maln->distant_ref && (fs->strand_known == 0 ) && (iter_num > 1) ) { ref_start = 0; ref_end = maln->ref->wrap_seq_len; ref_frag_len = ref_end - ref_start; a->seq1 = &maln->ref->seq[0]; a->len1 = ref_frag_len; pop_s1c_in_a( a ); a->seq2 = fs->seq; a->len2 = strlen( a->seq2 ); pop_s2c_in_a( a ); if ( a->hp ) { pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs ); pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs ); } /* Align it! */ dyn_prog( a ); /* Find the best forward score */ max_score = max_sg_score( a ); if ( max_score > FIRST_ROUND_SCORE_CUTOFF ) { fs->strand_known = 1; fs->rc = 0; find_align_begin( a ); fs->as = a->abc; fs->ae = a->aec; fs->score = max_score; } /* Now, try reverse complement */ aln_seq_len = strlen( fs->seq ); a->submat = rcancsubmat; for ( j = 0; j < aln_seq_len; j++ ) { tmp_rc[j] = revcom_char(fs->seq[aln_seq_len-(j+1)]); } tmp_rc[aln_seq_len] = '\0'; a->seq2 = tmp_rc; pop_s2c_in_a( a ); if ( a->hp ) { pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs ); pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs ); } dyn_prog( a ); max_score = max_sg_score( a ); if ( (max_score > FIRST_ROUND_SCORE_CUTOFF) && (max_score > fs->score) ) { fs->strand_known = 1; fs->rc = 1; find_align_begin( a ); fs->as = a->abc; fs->ae = a->aec; fs->score = max_score; strcpy( fs->seq, tmp_rc ); } } /* Do we know the strand (either because we've always known it or we just learned it, doesn't matter) */ if ( fs->strand_known ) { if ( fs->rc ) { a->submat = rcancsubmat; } else { a->submat = ancsubmat; } a->seq2 = fs->seq; a->len2 = strlen( a->seq2 ); pop_s2c_in_a( a ); /* Set up the alignment limits on the reference */ if ( ((fs->as - REALIGN_BUFFER) < 0 ) ) { ref_start = 0; } else { ref_start = (fs->as - REALIGN_BUFFER); } if ( (fs->ae + REALIGN_BUFFER + 1) > maln->ref->wrap_seq_len ) { ref_end = maln->ref->wrap_seq_len; } else { ref_end = fs->ae + REALIGN_BUFFER; } /* Check to make sure the regions encompassed by ref_start to ref_end is reasonable given how long this fragment is. If not, just realign this whole mofo again because the reference has probably changed a lot between iterations */ if ( (ref_start + a->len2) > ref_end ) { ref_start = 0; ref_end = maln->ref->wrap_seq_len; } ref_frag_len = ref_end - ref_start; a->seq1 = &maln->ref->seq[ref_start]; a->len1 = ref_frag_len; pop_s1c_in_a( a ); /* If we want the homopolymer discount, the necessary arrays of hp starts and lengths must be set up anew */ if ( a->hp ) { pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs ); pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs ); } /* Align it! */ dyn_prog( a ); /* Find the best score */ max_score = max_sg_score( a ); find_align_begin( a ); /* First, put all alignment in front_pwaln */ populate_pwaln_to_begin( a, front_pwaln ); /* Load up front_pwaln */ strcpy( front_pwaln->ref_id, maln->ref->id ); strcpy( front_pwaln->ref_desc, maln->ref->desc ); strcpy( front_pwaln->frag_id, fs->id ); strcpy( front_pwaln->frag_desc, fs->desc ); front_pwaln->trimmed = fs->trimmed; front_pwaln->revcom = fs->rc; front_pwaln->num_inputs = fs->num_inputs; front_pwaln->segment = 'a'; front_pwaln->score = a->best_score; front_pwaln->start = a->abc + ref_start; front_pwaln->end = a->aec + ref_start; /* Update stats for this FragSeq */ fs->as = a->abc + ref_start; fs->ae = a->aec + ref_start; fs->unique_best = 1; fs->score = a->best_score; if ( front_pwaln->end > maln->ref->seq_len ) { /* This alignment wraps around - adjust the end to demonstrate this for split_maln check */ front_pwaln->end = front_pwaln->end - maln->ref->seq_len; } if ( front_pwaln->start > front_pwaln->end ) { /* Move wrapped bit to back_pwaln */ split_pwaln( front_pwaln, back_pwaln, maln->ref->seq_len ); merge_pwaln_into_maln( front_pwaln, maln ); fs->front_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1]; merge_pwaln_into_maln( back_pwaln, maln ); fs->back_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1]; } else { merge_pwaln_into_maln( front_pwaln, maln ); fs->front_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1]; } } } return; }