void grow_ids_list(IDsListP ids) { int new_size, i, k; char** ids_array; char* first_id; new_size = (ids->size) * 2; ids_array = (char**)save_malloc(new_size * sizeof(char*)); first_id = (char*)save_malloc(ids->size * MAX_ID_LEN * sizeof(char)); /* Point first half of new ids_array to old half of pointers */ for (i = 0; i < ids->size; i++) { ids_array[i] = ids->ids[i]; } k = 0; /* Point secod half of new ids_array to new half of pointers */ for (i = ids->size; i < new_size; i++) { ids_array[i] = &first_id[(k++ * MAX_ID_LEN)]; } /* Free old ids */ free(ids->ids); ids->ids = ids_array; ids->size = new_size; }
/* init_FSDB Arguments: void Returns: FSDB (pointer to struct fragseqdb) / NULL if not enough memories Used for initializing a new database of FragSeqs. Allocates enough memoery for INIT_NUM_ALN_SEQS of these */ FSDB init_FSDB ( void ) { int i; FSDB fsdb; FragSeqP first_seq; /* First, allocate the memories */ fsdb = (FSDB)save_malloc(sizeof(FragSeqDB)); if ( fsdb == NULL ) { return NULL; } first_seq = (FragSeqP)save_malloc(INIT_NUM_ALN_SEQS * sizeof(FragSeq)); if ( first_seq == NULL ) { return NULL; } fsdb->fss = (FragSeqP*)save_malloc(INIT_NUM_ALN_SEQS * sizeof( FragSeqP )); if ( fsdb->fss == NULL ) { return NULL; } for ( i = 0; i < INIT_NUM_ALN_SEQS; i++ ) { fsdb->fss[i] = &first_seq[i]; } fsdb->size = INIT_NUM_ALN_SEQS; fsdb->num_fss = 0; return fsdb; }
IntSet *intset_new(unsigned long maxvalue, unsigned long nofelements) { IntSet* set = NULL; assert(nofelements <= maxvalue); save_malloc(set, sizeof(*set)); save_malloc(set->elements, sizeof(*set->elements) * nofelements); set->maxvalue = maxvalue; set->count = nofelements; set->current = 0; return set; }
static add_mem_list(my_mem_list_struct_t **header, char *ptr, int isize, int iline, char *pcfile) { my_mem_list_struct_t *p; if (ptr == NULL) return; my_pthread_mutex_lock(&g_ptmmem); for (p=*header; p!=NULL; p=p->pnext) { if (p->ptr == ptr) { fprintf(stderr, "add_mem_list error in %s(%s %d).!", __FILE__, pcfile, iline); my_pthread_mutex_unlock(&g_ptmmem); exit(-1); } } p = (my_mem_list_struct_t *)save_malloc(sizeof(my_mem_list_struct_t)); if (p == NULL) { fprintf(stderr, "not enough memory in %s.\n", __FILE__); my_pthread_mutex_unlock(&g_ptmmem); exit(-1); } p->ptr = ptr; p->isize = isize; p->iline = iline; p->pcfile = save_strdup(pcfile); p->pnext = *header; *header = p; my_pthread_mutex_unlock(&g_ptmmem); }
static add_fp_list(my_fp_list_struct_t **header, FILE *fp, int iline, char *pcfile) { pthread_t pid; my_fp_list_struct_t *p; if (fp == NULL) return; pid=0;//pid = pthread_self(); my_pthread_mutex_lock(&g_ptmfp); for (p=*header; p!=NULL; p=p->pnext) { if (p->fp == fp && p->pid == pid) { fprintf(stderr, "add_fp_list error in %s(%s %d).!", __FILE__, pcfile, iline); my_pthread_mutex_unlock(&g_ptmfp); exit(-1); } } p = (my_fp_list_struct_t *)save_malloc(sizeof(my_fp_list_struct_t)); if (p == NULL) { fprintf(stderr, "not enough memory in %s.\n", __FILE__); my_pthread_mutex_unlock(&g_ptmfp); exit(-1); } p->fp = fp; p->iline = iline; p->pcfile = save_strdup(pcfile); p->pnext = *header; p->pid = pid; *header = p; my_pthread_mutex_unlock(&g_ptmfp); }
/* grow_FSDB Args: (1) FSDB (fsdb) to be made twice as big Returns: 1 if success; 0 if failure (not enough memories) Grows an FSDB by allocating another chunk of memory for the FragSeqs as big as the one it already has. Note, it *DOES NOT* throw away the one it already has. Then, the fsdb->fss array is replaced by one twice as big. The pointers to all the existing FragSeqs are copied over and the new ones are set up. The size is reset, too. The old fsdb->fss array is freed */ int grow_FSDB( FSDB fsdb ) { int i, j, new_size; FragSeqP first_seq; FragSeqP* new_fss; new_size = fsdb->size * 2; /* DEBUG INFO */ if ( DEBUG ) { fprintf( stderr, "Growing fsdb from %d to %d\n", (int)fsdb->size, new_size ); } /* Allocate another chunck of memories as big as the one it has now, doubling its size */ first_seq = (FragSeqP)save_malloc(fsdb->size * sizeof(FragSeq)); if ( first_seq == NULL ) { return 0; } /* Now, allocate the *new* array of pointers for fsdb->fss But, assign this to new_fss for now because we need to keep fsdb->fss so we can copy over the pointers it already has! */ new_fss = (FragSeqP*)save_malloc(new_size * sizeof(FragSeqP)); if ( new_fss == NULL ) { return 0; } /* Point the pointers to the pointees */ for( i = 0; i < fsdb->size; i++ ) { new_fss[i] = fsdb->fss[i]; } j = 0; for( i = fsdb->size; i < new_size; i++ ) { new_fss[i] = &first_seq[j++]; } /* Now, free the old fsdb->fss and slot in the new one */ free( fsdb->fss ); fsdb->fss = new_fss; fsdb->size = new_size; return 1; }
void *my_malloc(size_t nbytes, int iline, char *pcfile) { void *p; p = save_malloc(nbytes); add_mem_list(&g_pmemheader, p, nbytes, iline, pcfile); return p; }
/* Grow the space for a sequence (an array of char) to twice its current size Copy its current contents into the new sequence Free the now unused old memory */ char* grow_seq(char* seq, int size) { int i; char* new_seq; new_seq = (char*)save_malloc( 2 * size ); for (i = 0; i < size; i++) { new_seq[i] = seq[i]; } free(seq); return new_seq; }
/* This IDsList */ IDsListP init_ids_list(void) { IDsListP ids; char** ids_array; char* first_id; int i; // allocate the IDsList ids = (IDsListP)save_malloc(sizeof(IDsList)); ids_array = (char**)save_malloc(INIT_NUM_IDS * sizeof( char* )); first_id = (char*)save_malloc(INIT_NUM_IDS * MAX_ID_LEN * sizeof(char)); for (i = 0; i < INIT_NUM_IDS; i++) { ids_array[i] = &first_id[i*MAX_ID_LEN]; } ids->num_ids = 0; ids->sorted = 0; ids->ids = ids_array; return ids; }
/* permutateAlphabet: build all permutations out of an given alphabet. string: the alphabet over which should be circled strsize: the size of the alphabet k: the size of the array which used for the sub alphabet's */ void permutateAlphabet(const char* string, const long strsize, const long k) { unsigned long i; unsigned long* array; save_malloc(array, sizeof(unsigned long) * k); for (i = 0; i < k; ++i) { array[i] = 0; } while (!maximumReached(array, strsize)) { printArray(string, array, k); increment(array, strsize, k); } free(array); }
/* Takes a pointer to a populated PWAlnFrag (pwaln) and a pointer to a populated MapAlignent (maln) Does: 1. Adds this aligned sequence, without gaps to maln->AlnSeqArray, growing this array if necessary 2. Populates the gaps array of this newly aligned fragment to indicate where its gaps are relative to the reference 3. Updates the gaps array of the reference sequence (maln->ref->gaps[]) and the gaps array of all aligned fragments to accomodate any new gaps this new fragment may require Returns: 1 (TRUE) if success 0 (FALSE) if failure */ int merge_pwaln_into_maln(PWAlnFragP pwaln, MapAlignmentP maln) { int i, j, aln_len, ref_frag_len, ref_pos, gap_compare, mind_the_gap, seq_pos, offset; char c, f; char* ins_seq; AlnSeqP asp; int this_ref_gaps[(2*INIT_ALN_SEQ_LEN) + 1]; // Grow array of aligned sequences if necessary if (maln->num_aln_seqs >= maln->size) { if ( !(grow_alns_map_alignment(maln))) { return 0; } } // Get a pointer to this next AlnSeq asp = maln->AlnSeqArray[maln->num_aln_seqs]; // Copy over all the details thusfar strcpy(asp->id, pwaln->frag_id); strcpy(asp->desc, pwaln->frag_desc); asp->score = pwaln->score; asp->start = pwaln->start; asp->end = pwaln->end; asp->revcom = pwaln->revcom; asp->trimmed = pwaln->trimmed; asp->segment = pwaln->segment; asp->num_inputs = pwaln->num_inputs; aln_len = strlen(pwaln->frag_seq); /* Copy the fragment aligned sequence string, gap characters and all, into asp->seq */ mind_the_gap = 0; j = 0; seq_pos = 0; this_ref_gaps[seq_pos] = 0; for (i = 0; i < aln_len; i++) { c = pwaln->ref_seq[i]; f = pwaln->frag_seq[i]; if (c == '-') { this_ref_gaps[seq_pos]++; if (mind_the_gap) { // Extending an already started gap ins_seq[j++] = pwaln->frag_seq[i]; } else { // Starting a new gap ins_seq = (char*)save_malloc(MAX_INS_LEN * sizeof(char)); j = 0; ins_seq[j++] = f; } mind_the_gap = 1; } else { // Not a gap if (mind_the_gap) { // Just finished a gap, add \0 to inserted sequence ins_seq[j] = '\0'; asp->ins[seq_pos] = ins_seq; } else { // Not a gap here asp->ins[seq_pos] = NULL; } asp->seq[seq_pos++] = f; this_ref_gaps[seq_pos] = 0; mind_the_gap = 0; } } /* Add string terminator, just in case */ asp->seq[seq_pos] = '\0'; // Now, go through these ref seq gaps and see if they were already // known before ref_frag_len = asp->end - asp->start + 1; for (i = 0; i < ref_frag_len; i++) { ref_pos = asp->start + i; gap_compare = this_ref_gaps[i] - maln->ref->gaps[ref_pos]; if (gap_compare > 0) { /* Longer gap in this fragment than known before so we must make maln->ref->gaps[ref_pos] longer to accomodate it */ maln->ref->gaps[ref_pos] += gap_compare; } } maln->num_aln_seqs++; return 1; }
void col_print_cons(char* consensus, char* aln_ref, int* cov, int* ref_poss, MapAlignmentP maln) { int len, i; char c; int* starts_f; int* starts_r; int* ends_f; int* ends_r; AlnSeqP as; len = strlen(consensus); starts_f = (int* )save_malloc(len * sizeof(int)); starts_r = (int* )save_malloc(len * sizeof(int)); ends_f = (int* )save_malloc(len * sizeof(int)); ends_r = (int* )save_malloc(len * sizeof(int)); /* Initialize everything to zero */ for (i = 0; i < len; i++) { starts_f[i] = 0; starts_r[i] = 0; ends_f[i] = 0; ends_r[i] = 0; } /* Now, go through all the aligned fragments and update the starts and ends arrays based on where each fragment... starts and ends! */ for (i = 0; i < maln->num_aln_seqs; i++) { as = maln->AlnSeqArray[i]; if (as->revcom) { switch (as->segment) { case 'f': /* only the start is correct for front fragments */ starts_r[as->start]++; break; case 'b': /* only the end is correct for back fragments */ ends_r[as->end]++; break; default: starts_r[as->start]++; ends_r[as->end]++; break; } } /* Not reverse complement */ else { switch (as->segment) { case 'f': /* only the start is correct for front fragments */ starts_f[as->start]++; break; case 'b': /* only the end is correct for back fragments */ ends_f[as->end]++; break; default: starts_f[as->start]++; ends_f[as->end]++; break; } } } printf("# Columns:\n"); printf("# 1. Assembly consensus base\n"); printf("# 2. Reference %s base\n", maln->ref->id); printf("# 3. Coverage (number of reads overlapping this position)\n"); printf("# 4. Coordinate on reference sequence (1-based)\n"); printf("# 5. Number of fragments on forward strand that start here\n"); printf("# 6. Number of fragments on reverse strand that start here\n"); printf("# 7. Number of fragments on forward strand that end here\n"); printf("# 8. Number of fragments on reverse strand that end here\n"); for (i = 0; i < len; i++) { if ( !((consensus[i] == '-') && (aln_ref[i] == '-') )) { if (consensus[i] == ' ') { c = 'X'; } else { c = consensus[i]; } printf("%c\t%c\t%d\t%d\t%d\t%d\t%d\t%d\n", c, aln_ref[i], cov[i], (ref_poss[i]+1), starts_f[ref_poss[i]], starts_r[ref_poss[i]], ends_f[ref_poss[i]], ends_r[ref_poss[i]]); } } }
/* For a given region, defined by reg_start and reg_end, show the refence sequence, the consensus sequence, and the sequence of all the fragments that overlap this region at all. */ void print_region( MapAlignmentP maln, int reg_start, int reg_end, int out_format, int in_color ) { int i, ref_pos, ref_gaps, j, cons_pos, ins_len; int num_gaps = 0; int ins_seq_len; int read_out_pos; char* consensus; char* aln_ref; char* read_reg; char* ins_cons; char* read_str; char* read_id; char* ins_seq; int* ins_cov; BaseCountsP bcs; AlnSeqP aln_seq; PSSMP psm; /* Make sure region doesn't go off edge */ if (reg_start < 1) { reg_start = 1; } if (reg_end > maln->ref->seq_len) { reg_end = maln->ref->seq_len; } bcs = (BaseCountsP)save_malloc(sizeof(BaseCounts)); reset_base_counts(bcs); /* Find how many gaps are in this region */ for (i = reg_start-1; i <= reg_end; i++) { num_gaps += maln->ref->gaps[i]; } /* Make char arrays long enough for the sequence plus gaps for the reference, the consensus, and a single read. These will be populated and output by the rest of this function. */ consensus = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10) * sizeof(char)); aln_ref = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10) * sizeof(char)); read_reg = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10) * sizeof(char)); /* Make char and int array for insert consensus and insert coverage to be used whenever needed */ ins_cons = (char*)save_malloc(MAX_INS_LEN * sizeof(char)); ins_cov = (int* )save_malloc(MAX_INS_LEN * sizeof(int)); cons_pos = 0; for (ref_pos = reg_start - 1; ref_pos < reg_end; ref_pos++) { ref_gaps = maln->ref->gaps[ref_pos]; /* Add these gaps to the reference aligned string and the inserted sequence to the consensus[] */ if (ref_gaps > 0) { find_ins_cons(maln, ref_pos, ins_cons, ins_cov, out_format); for (j = 0; j < ref_gaps; j++) { aln_ref[cons_pos] = '-'; consensus[cons_pos] = ins_cons[j]; cons_pos++; } } /* Re-zero all the base counts */ reset_base_counts(bcs); /* Find all the aligned fragments that include this position and make a consensus from it */ for (j = 0; j < maln->num_aln_seqs; j++) { aln_seq = maln->AlnSeqArray[j]; /* Does this aligned fragment cover this position? */ if ( (aln_seq->start <= ref_pos) && // checked (aln_seq->end >= ref_pos)) { if (aln_seq->revcom) { psm = maln->rpsm; } else { psm = maln->fpsm; } add_base(aln_seq->seq[ref_pos - aln_seq->start], bcs, psm, aln_seq->smp[ref_pos - aln_seq->start]); } } consensus[cons_pos] = find_consensus(bcs, maln->cons_code); aln_ref[cons_pos] = maln->ref->seq[ref_pos]; cons_pos++; } consensus[cons_pos] = '\0'; aln_ref[cons_pos] = '\0'; /* Now print the reference and the consensus */ if (out_format == 61) { fasta_aln_print(aln_ref, maln->ref->id); fasta_aln_print(consensus, "Consensus"); } else { if (in_color) { printf("%-20.20s ", maln->ref->id); color_print(aln_ref); printf("%-20.20s ", "Consensus"); color_print(consensus); } else printf("%-20.20s %s\n%-20s %s\n", maln->ref->id, aln_ref, "Consensus", consensus); } /* Alloc memories for the string to hold each read (plus .'s outside) and alloc memories for the special id which is the regular ID plus the code for whether it's truncated, reverse complemented, and the number of input sequence */ read_str = (char*)save_malloc(strlen(aln_ref) * sizeof(char) + 1); read_id = (char*)save_malloc((MAX_ID_LEN + 4) * sizeof(char) + 1); /* Find every sequence that overlaps this region and print the overlapping segment */ for (j = 0; j < maln->num_aln_seqs; j++) { aln_seq = maln->AlnSeqArray[j]; if (alnseq_ol_reg(aln_seq, (reg_start-1), (reg_end-1)) ) { read_out_pos = 0; if (aln_seq->trimmed) { read_id[0] = 't'; } else { read_id[0] = '_'; } if (aln_seq->revcom) { read_id[1] = 'r'; } else { read_id[1] = '_'; } sprintf( &read_id[2], "%0.2d", aln_seq->num_inputs ); read_id[4] = '\0'; strcat(read_id, aln_seq->id); if (out_format == 6) { printf("%-20.20s ", read_id); } for (ref_pos = reg_start - 1; ref_pos < reg_end; ref_pos++) { ref_gaps = maln->ref->gaps[ref_pos]; /* Check to make sure that this fragment has started and not ended by this ref_pos */ if ( (aln_seq->start <= ref_pos) && // checked (aln_seq->end >= ref_pos)) { if (ref_gaps > 0) { if (aln_seq->ins[ref_pos - aln_seq->start] == NULL) { ins_len = 0; } else { ins_len = strlen(aln_seq->ins[ref_pos - aln_seq->start]); } if (aln_seq->start == ref_pos) { // Exactly at the beginning of this frag for (i = 0; i < ref_gaps; i++) { read_str[read_out_pos++] = '.'; // printf( "." ); } } else { // Just a normal, interior gapped position if (ins_len > 0) { ins_seq = aln_seq->ins[ref_pos - aln_seq->start]; ins_seq_len = strlen(ins_seq); for (i = 0; i < ins_seq_len; i++) { read_str[read_out_pos++] = ins_seq[i]; } // printf( "%s", aln_seq->ins[ref_pos - aln_seq->start] ); } for (i = 0; i < (ref_gaps - ins_len); i++) { read_str[read_out_pos++] = '-'; // printf( "-" ); } } } read_str[read_out_pos++] = aln_seq->seq[ref_pos - aln_seq->start]; //printf( "%c", aln_seq->seq[ref_pos - aln_seq->start] ); } else { // This fragment doesn't actually cover this base for (i = 0; i < ref_gaps; i++) { // print this . for all ref gaps read_str[read_out_pos++] = '.'; // printf( "." ); } read_str[read_out_pos++] = '.'; //printf( "." ); } } read_str[read_out_pos] = '\0'; if (out_format == 61) { fasta_aln_print(read_str, read_id); } else { if (in_color) { color_print(read_str); } else printf("%s\n", read_str); } } } free(bcs); free(consensus); free(aln_ref); free(read_reg); free(ins_cons); free(ins_cov); free(read_str); free(read_id); }
/* Takes a MapAlignmentP and a position where some of the aligned fragments have an insert relative to the reference. That is, maln->ref->gaps[position] > 0. Populates the char* ins_cons and int* cons_cov arrays with the consensus sequence and consensus coverage, respectively. These must be appropriately sized elsewhere. If out_format is the special value of 4, then we just show these differences now and do not return anything. */ void find_ins_cons(MapAlignmentP maln, int pos, char* ins_cons, int* cons_cov, int out_format) { int i, j, ins_len, this_frag_ins_len; char* ins_seq; AlnSeqP aln_seq; BaseCountsP* bcs_array; BaseCountsP first_bcs; PSSMP psm; ins_len = maln->ref->gaps[pos]; bcs_array = (BaseCountsP*)save_malloc(ins_len * sizeof(BaseCountsP)); first_bcs = (BaseCountsP)save_malloc(ins_len * sizeof(BaseCounts)); for (i = 0; i < ins_len; i++) { bcs_array[i] = &first_bcs[i]; reset_base_counts(bcs_array[i]); } for (i = 0; i < maln->num_aln_seqs; i++) { aln_seq = maln->AlnSeqArray[i]; /* Does this aligned fragment cover this position? */ if ( (aln_seq->start < pos) && // It does not cover this position //if it starts exactly here because the gap is, by convention, //just upstream of this position (aln_seq->end >= pos)) { if (aln_seq->revcom) { psm = maln->rpsm; } else { psm = maln->fpsm; } /* Does it have some actual inserted sequence? */ ins_seq = aln_seq->ins[pos - aln_seq->start]; if (ins_seq == NULL) { for (j = 0; j < ins_len; j++) { add_base( '-', bcs_array[j], psm, aln_seq->smp[pos - aln_seq->start]); } } else { this_frag_ins_len = strlen(ins_seq); for (j = 0; j < ins_len; j++) { if (j < this_frag_ins_len) { add_base(ins_seq[j], bcs_array[j], psm, aln_seq->smp[pos - aln_seq->start]); } else { add_base( '-', bcs_array[j], psm, aln_seq->smp[pos - aln_seq->start]); } } } } } for (j = 0; j < ins_len; j++) { ins_cons[j] = find_consensus(bcs_array[j], maln->cons_code); cons_cov[j] = bcs_array[j]->cov; if ( (out_format == 4) && !(ins_cons[j] == '-')) { show_single_pos(pos, '-', ins_cons[j], bcs_array[j]); } if (out_format == 41) { show_single_pos(pos, '-', ins_cons[j], bcs_array[j]); } } free(first_bcs); free(bcs_array); }
int main( int argc, char* argv[] ) { char mat_fn[MAX_FN_LEN+1]; char maln_fn[MAX_FN_LEN+1]; char fastq_out_fn[MAX_FN_LEN+1]; char maln_root[MAX_FN_LEN+1]; char ref_fn[MAX_FN_LEN+1]; char frag_fn[MAX_FN_LEN+1]; char adapter_code[2]; // place to keep the argument for -a (which adapter to trim) char* c_time; // place to keep asctime string char* test_id; int ich; int any_arg = 0; int Hard_cut = 0; // If 0 => use dynamic score cutoff, if > 0 use this instead int circular = 0; // Boolean, TRUE if reference sequence is circular int make_fastq = 0; // Boolean, TRUE if we should also output fastq database of seqs in assembly int seq_code = 0; // code to indicate sequence input format; 0 => fasta; 1 => fastq int do_adapter_trimming = 0; // Boolean, TRUE if we should try to trim // adapter from input sequences int iterate = 0; //Boolean, TRUE means interate the assembly until convergence // on an assembled sequence int FINAL_ONLY = 0; //Boolean, TRUE means only write out the final assembly maln file // FALSE (default) means write out each one int ids_rest = 0; // Boolean, TRUE means restrict analysis to IDs in input file int repeat_filt = 0; //Boolean, TRUE means remove sequences that are repeats, // keeping best-scoring representative int repeat_qual_filt = 0; //Boolean, TRUE means remove sequences that are repeats, // keeping best quality score sum representative int just_outer_coords = 1; // Boolean, TRUE means just use strand, start, and end to // determine if sequences are redundant int SCORE_CUT_SET = 0; //Boolean, TRUE means user has set a length/score cutoff line int seen_seqs = 0; int hp_special = 0; // Boolean, TRUE means user wants hp gap special discount int distant_ref = 0; // Boolean, TRUE means the initial reference sequence is // known to be distantly related so keep trying to align all // sequences each round int kmer_filt_len = -1; // length of kmer filtering, if user wants it; otherwise // special value of -1 indicates this is unset int soft_mask = 0; //Boolean; TRUE => do not use kmers that are all lower-case // FALSE => DO use all kmers, regardless of case int iter_num; // Number of iterations of assembly done int collapse = 0; // Boolean; TRUE => collapse input sequences in FSDB to improve // sequence quality // FALSE => (default) keep all sequences double slope = DEF_S; // Set these to default unless, until user changes double intercept = DEF_N; // them MapAlignmentP maln, // Contains all fragments initially better // than FIRST_ROUND_SCORE_CUTOFF culled_maln; // Contains all fragments with scores // better than SCORE_CUTOFF AlignmentP fw_align, rc_align, adapt_align; PSSMP ancsubmat = init_flatsubmat(); PSSMP rcancsubmat = revcom_submat(ancsubmat); const PSSMP flatsubmat = init_flatsubmat(); KPL* fkpa; // Place to keep forward kmer array if user requested kmer KPL* rkpa; // Place to keep reverse kmer array if user requested kmer IDsListP good_ids; FragSeqP frag_seq; PWAlnFragP front_pwaln, back_pwaln; FSDB fsdb; // Database to hold sequences to iterate over FILE* FF; time_t curr_time; char maln_root_def[] = "assembly.maln.iter"; extern int optind; extern char* optarg; char neand_adapt[] = "GTCAGACACGCAACAGGGGATAGGCAAGGCACACAGGGGATAGG"; char stand_adapt[] = "CTGAGACACGCAACAGGGGATAGGCAAGGCACACAGGGGATAGG"; char user_def_adapt[128]; char* adapter; // set to either neand_adapt or stand_adapt based on user preference adapter = neand_adapt; // Default is Neandertal char* assembly_cons; char* last_assembly_cons; int cc = 1; // consensus code for calling consensus base int i; /* Set the default output filename until the user overrides it */ strcpy( maln_root, maln_root_def ); /* Process command line arguments */ while( (ich=getopt( argc, argv, "s:r:f:m:a:p:H:I:S:N:k:q:FTciuhDMUAC" )) != -1 ) { switch(ich) { case 'c' : circular = 1; break; case 'q' : make_fastq = 1; strcpy( fastq_out_fn, optarg ); case 'C' : collapse = 1; break; case 'i' : iterate = 1; break; case 'h' : hp_special = 1; break; case 'u' : repeat_filt = 1; break; case 'A' : just_outer_coords = 0; break; case 'U' : repeat_qual_filt = 1; break; case 'D' : distant_ref = 1; break; case 'p' : cc = atoi( optarg ); any_arg = 1; break; case 'I' : good_ids = parse_ids( optarg ); ids_rest = 1; break; case 'H' : Hard_cut = atoi( optarg ); if ( Hard_cut <= 0 ) { fprintf( stderr, "Hard cutoff (-H) must be positive\n" ); help(); exit( 0 ); } any_arg = 1; break; case 'M' : soft_mask = 1; break; case 's' : strcpy( mat_fn, optarg ); free( ancsubmat ); // trash the flat submat we initialized with ancsubmat = read_pssm( mat_fn ); free( rcancsubmat ); // trash the init rcsubmat, too rcancsubmat = revcom_submat( ancsubmat ); any_arg = 1; break; case 'r' : strcpy( ref_fn, optarg ); any_arg = 1; break; case 'k' : kmer_filt_len = atoi( optarg ); any_arg = 1; break; case 'f' : strcpy( frag_fn, optarg ); any_arg = 1; break; case 'm' : strcpy( maln_root, optarg ); any_arg = 1; break; case 'T' : do_adapter_trimming = 1; break; case 'a' : if ( strlen( optarg ) > 127 ) { fprintf( stderr, "That adapter is too big!\nMIA will use the standard adapter.\n" ); adapter = stand_adapt; } else { strcpy( user_def_adapt, optarg ); if ( strlen( user_def_adapt ) > 1 ) { adapter = user_def_adapt; } else { if ( !( (user_def_adapt[0] == 'n') || (user_def_adapt[0] == 'N') ) ) { adapter = stand_adapt; } else { adapter = neand_adapt; } } } break; case 'S' : slope = atof( optarg ); SCORE_CUT_SET = 1; break; case 'N' : intercept = atof( optarg ); SCORE_CUT_SET = 1; break; case 'F' : FINAL_ONLY = 1; break; default : help(); exit( 0 ); } } if ( !any_arg ) { help(); exit( 0 ); } if ( optind != argc ) { fprintf( stderr, "There seems to be some extra cruff on the command line that mia does not understand.\n" ); } /* Start the clock... */ curr_time = time(NULL); // c_time = (char*)save_malloc(64*sizeof(char)); // c_time = asctime(localtime(&curr_time)); /* Announce that we're starting */ fprintf( stderr, "Starting assembly of %s\nusing %s\nas reference at %s\n", frag_fn, ref_fn, asctime(localtime(&curr_time)) ); /* Set up the maln structure */ maln = (MapAlignmentP)init_map_alignment(); maln->cons_code = cc; if ( maln == NULL ) { fprintf( stderr, "Not enough memories for this\n" ); exit( 1 ); } /* Set the distant_ref flag */ maln->distant_ref = distant_ref; /* Set up the FSDB for keeping good-scoring sequence in memory */ fsdb = init_FSDB(); if ( fsdb == NULL ) { fprintf( stderr, "Not enough memories for holding sequences\n" ); exit( 1 ); } /* Read in the reference sequence and make reverse complement, too*/ if ( read_fasta_ref( maln->ref, ref_fn ) != 1 ) { fprintf( stderr, "Problem reading reference sequence file %s\n", ref_fn ); exit( 1 ); } /* Add wrap-around sequence (rc, too) and set maln->ref->circular if it's circular */ if ( circular ) { add_ref_wrap( maln->ref ); } else { maln->ref->wrap_seq_len = maln->ref->seq_len; } /* Add space for the gaps array */ maln->ref->gaps = (int*)save_malloc((maln->ref->wrap_seq_len+1) * sizeof(int)); for( i = 0; i <= maln->ref->wrap_seq_len; i++ ) { maln->ref->gaps[i] = 0; } /* Set up fkpa and rkpa for list of kmers in the reference (forward and revcom strand) if user wants kmer filtering */ if ( kmer_filt_len > 0 ) { fprintf( stderr, "Making kmer list for k-mer filtering...\n" ); fkpa = init_kpa(kmer_filt_len); rkpa = init_kpa(kmer_filt_len); /* kmer_list = (KmersP)pop_kmers( maln->ref, kmer_filt_len ); */ populate_kpa( fkpa, maln->ref->seq, maln->ref->wrap_seq_len, kmer_filt_len, soft_mask ); populate_kpa( rkpa, maln->ref->rcseq, maln->ref->wrap_seq_len, kmer_filt_len, soft_mask ); } /* Now kmer arrays have been made if requested. We can upper case the reference sequences. */ make_ref_upper( maln->ref ); /* Set up FragSeqP to point to a FragSeq */ frag_seq = (FragSeqP)save_malloc(sizeof(FragSeq)); /* Set up the alignment structures for forward and reverse complement alignments */ fw_align = (AlignmentP)init_alignment( INIT_ALN_SEQ_LEN, (maln->ref->wrap_seq_len + (2*INIT_ALN_SEQ_LEN)), 0, hp_special ); rc_align = (AlignmentP)init_alignment( INIT_ALN_SEQ_LEN, (maln->ref->wrap_seq_len + (2*INIT_ALN_SEQ_LEN)), 1, hp_special ); /* Set up the alignment structure for adapter trimming, if user wants that */ if ( do_adapter_trimming ) { adapt_align = (AlignmentP)init_alignment( INIT_ALN_SEQ_LEN, INIT_ALN_SEQ_LEN, 0, hp_special ); /* Setup the flatsubmat */ //flatsubmat = init_flatsubmat(); adapt_align->submat = flatsubmat; adapt_align->seq2 = adapter; adapt_align->len2 = strlen( adapt_align->seq2 ); pop_s2c_in_a( adapt_align ); if ( hp_special ) { pop_hpl_and_hps( adapt_align->seq2, adapt_align->len2, adapt_align->hprl, adapt_align->hprs ); } /* Set for a semi-global that pays a penalty for unaligning the beginning of the adapter, but not for the end of the adapter. This is because if the sequence read (align->seq1) ends, then we won't see any more of the adapter. When we search for the best alignment, we'll only look in the last column, requiring that all of align->seq1 is accounted for */ adapt_align->sg5 = 1; adapt_align->sg3 = 0; } fw_align->seq1 = maln->ref->seq; rc_align->seq1 = maln->ref->rcseq; if ( circular ) { fw_align->len1 = maln->ref->wrap_seq_len; rc_align->len1 = maln->ref->wrap_seq_len; } else { fw_align->len1 = maln->ref->seq_len; rc_align->len1 = maln->ref->seq_len; } /* Now the reference sequence and its reverse complement are prepared, put the s1c lookup codes in */ pop_s1c_in_a( fw_align ); pop_s1c_in_a( rc_align ); if ( hp_special ) { pop_hpl_and_hps( fw_align->seq1, fw_align->len1, fw_align->hpcl, fw_align->hpcs ); pop_hpl_and_hps( rc_align->seq1, rc_align->len1, rc_align->hpcl, rc_align->hpcs ); } /* One by one, go through the input file of fragments to be aligned. Align them to the reference. For each fragment generating an alignment score better than the cutoff, merge it into the maln alignment. Keep track of those that don't, too. */ FF = fileOpen( frag_fn, "r" ); seq_code = find_input_type( FF ); //LOG = fileOpen( log_fn, "w" ); front_pwaln = (PWAlnFragP)save_malloc( sizeof(PWAlnFrag)); back_pwaln = (PWAlnFragP)save_malloc( sizeof(PWAlnFrag)); /* Give some space to remember the IDs as we see them */ test_id = (char*)save_malloc(MAX_ID_LEN * sizeof(char)); /* Announce we're strarting alignment of fragments */ fprintf( stderr, "Starting to align sequences to the reference...\n" ); while( read_next_seq( FF, frag_seq, seq_code ) ) { seen_seqs++; strcpy( test_id, frag_seq->id ); if ( DEBUG ) { fprintf( stderr, "%s\n", frag_seq->id ); } if ( !ids_rest || ( bsearch( &test_id, good_ids->ids, good_ids->num_ids, sizeof(char*), idCmp ) != NULL ) ) { if ( do_adapter_trimming ) { /* Trim sequence (set frag_seg->trimmed and frag_seg->trim_point field) */ trim_frag( frag_seq, adapter, adapt_align ); } else { frag_seq->trimmed = 0; } /* Check if kmer filtering. If so, filter */ if ( new_kmer_filter( frag_seq, fkpa, rkpa, kmer_filt_len, fw_align, rc_align ) ) { /* Align this fragment to the reference and write the result into pwaln; use the ancsubmat, not the reverse complemented rcsancsubmat during this first iteration because all sequence is forward strand */ fw_align->submat = ancsubmat; rc_align->submat = ancsubmat; if ( sg_align( maln, frag_seq, fsdb, fw_align, rc_align, front_pwaln, back_pwaln ) == 0 ) { fprintf( stderr, "Problem handling %s\n", frag_seq->id ); } } } if ( seen_seqs % 1000 == 0 ) { fprintf( stderr, "." ); } if ( seen_seqs % 80000 == 0 ) { fprintf( stderr, "\n" ); } } /* Now, fsdb is complete and points to all the things in maln. So we can fill in the AlnSeqP->smp array for everything in the maln->AlnSeqArray to know which matrices to use for *CALLING* a consensus; Conveniently, there are pointers to all of these in the fss->fss[X]->front|back_asp */ pop_smp_from_FSDB( fsdb, PSSM_DEPTH ); //fprintf( LOG, "__Finished with initial alignments__" ); //fflush( LOG ); fprintf( stderr, "\n" ); iter_num = 1; /* Now, we need a new MapAlignment, culled_maln, that is big enough to hold all the unique guys from maln */ culled_maln = init_culled_map_alignment( maln ); /* Filtering repeats announcement */ fprintf( stderr, "Repeat and score filtering\n" ); /* If user wants to filter against repeats by alignment score, do it */ if ( repeat_filt ) { /* Sort fsdb by fsdb->as */ sort_fsdb( fsdb ); /* Now, everything is sorted in fsdb, so I can easily see which guys are unique by as, ae, and rc fields */ set_uniq_in_fsdb( fsdb, just_outer_coords ); } /* If user wants to filter against repeats by q-score sum, do it */ if ( repeat_qual_filt ) { /* Sort fsdb by fsdb->as */ sort_fsdb_qscore( fsdb ); /* Now, everything is sorted in fsdb, so I can easily see which guys are unique by as, ae, and rc fields */ set_uniq_in_fsdb( fsdb, just_outer_coords ); } /* Now, we know which sequences are unique, so make a culled_maln with just the unique guys */ cull_maln_from_fsdb( culled_maln, fsdb, Hard_cut, SCORE_CUT_SET, slope, intercept ); fclose(FF); /* Tell the culled_maln which matrices to use for assembly */ culled_maln->fpsm = ancsubmat; culled_maln->rpsm = rcancsubmat; sort_aln_frags( culled_maln ); //invalidates fsdb->front|back_asp fields! fw_align->submat = ancsubmat; fw_align->sg5 = 1; fw_align->sg3 = 1; last_assembly_cons = (char*)save_malloc((maln->ref->seq_len +1) * sizeof(char)); strncpy( last_assembly_cons, maln->ref->seq, maln->ref->seq_len ); last_assembly_cons[maln->ref->seq_len] = '\0'; /* Re-align everything with revcomped sequence and substitution matrices, but first unmask all alignment positions and collapse sequences if requested */ memset(fw_align->align_mask, 1, fw_align->len1); if ( collapse ) { collapse_FSDB( fsdb, Hard_cut, SCORE_CUT_SET, slope, intercept ); } reiterate_assembly( last_assembly_cons, iter_num, maln, fsdb, fw_align, front_pwaln, back_pwaln, ancsubmat, rcancsubmat ); pop_smp_from_FSDB( fsdb, PSSM_DEPTH ); fprintf( stderr, "Repeat and score filtering\n" ); if ( repeat_filt ) { sort_fsdb( fsdb ); set_uniq_in_fsdb( fsdb, just_outer_coords ); } if ( repeat_qual_filt ) { sort_fsdb_qscore( fsdb ); set_uniq_in_fsdb( fsdb, just_outer_coords ); } cull_maln_from_fsdb( culled_maln, fsdb, Hard_cut, SCORE_CUT_SET, slope, intercept ); /* Tell the culled_maln which matrices to use for assembly */ culled_maln->fpsm = ancsubmat; culled_maln->rpsm = rcancsubmat; //invalidates fsdb->front|back_asp fields! sort_aln_frags( culled_maln ); sprintf( maln_fn, "%s.%d", maln_root, iter_num ); if ( !iterate || !FINAL_ONLY ) { write_ma( maln_fn, culled_maln ); if ( make_fastq ) { write_fastq( fastq_out_fn, fsdb ); } } /* Are we iterating (re-aligning to the a new consensus? */ if (iterate) { /* New assembly consensus announcement */ fprintf( stderr, "Generating new assembly consensus\n" ); assembly_cons = consensus_assembly_string( culled_maln ); while( ( strcmp( assembly_cons, last_assembly_cons ) != 0) && (iter_num < MAX_ITER) ) { /* Another round...*/ iter_num++; free( last_assembly_cons ); last_assembly_cons = assembly_cons; fprintf( stderr, "Starting assembly iteration %d\n", iter_num ); /* If the user wants collapsed sequences, now is the time */ if ( collapse ) { collapse_FSDB( fsdb, Hard_cut, SCORE_CUT_SET, slope, intercept ); } reiterate_assembly( assembly_cons, iter_num, maln, fsdb, fw_align, front_pwaln, back_pwaln, ancsubmat, rcancsubmat ); pop_smp_from_FSDB( fsdb, PSSM_DEPTH ); fprintf( stderr, "Repeat and score filtering\n" ); if ( repeat_filt ) { sort_fsdb( fsdb ); set_uniq_in_fsdb( fsdb, just_outer_coords ); } if ( repeat_qual_filt ) { sort_fsdb_qscore( fsdb ); set_uniq_in_fsdb( fsdb, just_outer_coords ); } cull_maln_from_fsdb( culled_maln, fsdb, Hard_cut, SCORE_CUT_SET, slope, intercept ); /* Tell the culled_maln which matrices to use for assembly */ culled_maln->fpsm = ancsubmat; culled_maln->rpsm = rcancsubmat; //invalidates fsdb->front|back_asp fields! sort_aln_frags( culled_maln ); sprintf( maln_fn, "%s.%d", maln_root, iter_num ); if ( !FINAL_ONLY ) { fprintf( stderr, "Writing maln file for iteration %d\n", iter_num ); write_ma( maln_fn, culled_maln ); } assembly_cons = consensus_assembly_string( culled_maln ); } /* Convergence? */ if ( strcmp( assembly_cons, last_assembly_cons ) == 0 ) { fprintf( stderr, "Assembly convergence - writing final maln\n" ); write_ma( maln_fn, culled_maln ); } else { fprintf( stderr, "Assembly did not converge after % rounds, quitting\n" ); write_ma( maln_fn, culled_maln ); } if ( make_fastq ) { write_fastq( fastq_out_fn, fsdb ); } } /* No iteration, but we must still re-align everything with revcomped sequence and substitution matrices to keep scores comparable to what they would have been had we iterated */ /* Announce we're finished */ curr_time = time(NULL); // c_time = asctime(localtime(&curr_time)); fprintf( stderr, "Assembly finished at %s\n", asctime(localtime(&curr_time)) ); exit( 0 ); }
/* reiterate_assembly Args: (1) a pointer to a sequence to be used as the new reference (2) a MapAlignmentP big enough to store all the alignments (3) a FSDB with sequences to be realigned (4) a AlignmentP big enough for the alignments (5) a front PWAlnFragP for storing front alignments (6) a back PWAlnFragP for storing back alignments (7) a PSSMP with the forward substitution matrices (8) a PSSMP with the revcom substitution matrices Aligns all the FragSeqs from fsdb to the new reference, using the as and ae fields to narrow down where the alignment happens Resets the maln and writes all the results there Returns void */ void reiterate_assembly( char* new_ref_seq, int iter_num, MapAlignmentP maln, FSDB fsdb, AlignmentP a, PWAlnFragP front_pwaln, PWAlnFragP back_pwaln, PSSMP ancsubmat, PSSMP rcancsubmat ) { int i, j, ref_len, ref_start, ref_end, ref_frag_len, max_score, rc_score, aln_seq_len; FragSeqP fs; char iter_ref_id[MAX_ID_LEN + 1]; char tmp_rc[INIT_ALN_SEQ_LEN + 1]; char iter_ref_desc[] = "iteration assembly"; /* Set up maln->ref Keep his seq separate from the external assembly because that is malloced and freed elsewhere */ sprintf( iter_ref_id, "ConsAssem.%d", iter_num ); free( maln->ref->seq ); if ( maln->ref->rcseq != NULL ) { free( maln->ref->rcseq ); } free( maln->ref->gaps ); ref_len = strlen( new_ref_seq ); maln->ref->seq = (char*)save_malloc((ref_len + 1)* sizeof(char)); strcpy( maln->ref->seq, new_ref_seq ); maln->ref->rcseq = NULL; // never again! /* Keep the ID and description the same if this is the 1st iteration. Otherwise, set it to the generic ones */ if ( iter_num > 1 ) { strcpy( maln->ref->id, iter_ref_id ); strcpy( maln->ref->desc, iter_ref_desc ); } maln->ref->seq_len = ref_len; maln->ref->size = (ref_len+1); if ( maln->ref->circular ) { add_ref_wrap( maln->ref ); } else { maln->ref->wrap_seq_len = maln->ref->seq_len; } maln->ref->gaps = (int*)save_malloc((maln->ref->wrap_seq_len+1) * sizeof(int)); for( i = 0; i <= maln->ref->wrap_seq_len; i++ ) { maln->ref->gaps[i] = 0; } /* Reset its AlnSeqArray ->ins to all point to null */ for ( i = 0; i < maln->num_aln_seqs; i++ ) { aln_seq_len = strlen(maln->AlnSeqArray[i]->seq); for ( j = 0; j < aln_seq_len; j++ ) { /* We couldn't have malloced any sequence for inserts past our length; anything non-NULL out there is cruft */ if ( maln->AlnSeqArray[i]->ins[j] != NULL ) { free( maln->AlnSeqArray[i]->ins[j] ); maln->AlnSeqArray[i]->ins[j] = NULL; } } } /* Now, remake the hpcl and hprl arrays if hp_special */ if ( a->hp ) { free( a->hpcl ); free( a->hpcs ); a->hpcl = (int*)save_malloc(maln->ref->wrap_seq_len*sizeof(int)); a->hpcs = (int*)save_malloc(maln->ref->wrap_seq_len*sizeof(int)); pop_hpl_and_hps( maln->ref->seq, maln->ref->wrap_seq_len, a->hpcl, a->hpcs ); } /* Reset the number of aligned sequences in the maln */ maln->num_aln_seqs = 0; /* OK, ref is set up. Let's go through all the sequences in fsdb and re-align them to the new reference. If it's a revcom alignment, just use the rcancsubmat */ for( i = 0; i < fsdb->num_fss; i++ ) { fs = fsdb->fss[i]; /* Special case of distant reference and !fs->strand_known => try to realign both strands against the entire reference to learn the strand and alignment region */ if ( maln->distant_ref && (fs->strand_known == 0 ) && (iter_num > 1) ) { ref_start = 0; ref_end = maln->ref->wrap_seq_len; ref_frag_len = ref_end - ref_start; a->seq1 = &maln->ref->seq[0]; a->len1 = ref_frag_len; pop_s1c_in_a( a ); a->seq2 = fs->seq; a->len2 = strlen( a->seq2 ); pop_s2c_in_a( a ); if ( a->hp ) { pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs ); pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs ); } /* Align it! */ dyn_prog( a ); /* Find the best forward score */ max_score = max_sg_score( a ); if ( max_score > FIRST_ROUND_SCORE_CUTOFF ) { fs->strand_known = 1; fs->rc = 0; find_align_begin( a ); fs->as = a->abc; fs->ae = a->aec; fs->score = max_score; } /* Now, try reverse complement */ aln_seq_len = strlen( fs->seq ); a->submat = rcancsubmat; for ( j = 0; j < aln_seq_len; j++ ) { tmp_rc[j] = revcom_char(fs->seq[aln_seq_len-(j+1)]); } tmp_rc[aln_seq_len] = '\0'; a->seq2 = tmp_rc; pop_s2c_in_a( a ); if ( a->hp ) { pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs ); pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs ); } dyn_prog( a ); max_score = max_sg_score( a ); if ( (max_score > FIRST_ROUND_SCORE_CUTOFF) && (max_score > fs->score) ) { fs->strand_known = 1; fs->rc = 1; find_align_begin( a ); fs->as = a->abc; fs->ae = a->aec; fs->score = max_score; strcpy( fs->seq, tmp_rc ); } } /* Do we know the strand (either because we've always known it or we just learned it, doesn't matter) */ if ( fs->strand_known ) { if ( fs->rc ) { a->submat = rcancsubmat; } else { a->submat = ancsubmat; } a->seq2 = fs->seq; a->len2 = strlen( a->seq2 ); pop_s2c_in_a( a ); /* Set up the alignment limits on the reference */ if ( ((fs->as - REALIGN_BUFFER) < 0 ) ) { ref_start = 0; } else { ref_start = (fs->as - REALIGN_BUFFER); } if ( (fs->ae + REALIGN_BUFFER + 1) > maln->ref->wrap_seq_len ) { ref_end = maln->ref->wrap_seq_len; } else { ref_end = fs->ae + REALIGN_BUFFER; } /* Check to make sure the regions encompassed by ref_start to ref_end is reasonable given how long this fragment is. If not, just realign this whole mofo again because the reference has probably changed a lot between iterations */ if ( (ref_start + a->len2) > ref_end ) { ref_start = 0; ref_end = maln->ref->wrap_seq_len; } ref_frag_len = ref_end - ref_start; a->seq1 = &maln->ref->seq[ref_start]; a->len1 = ref_frag_len; pop_s1c_in_a( a ); /* If we want the homopolymer discount, the necessary arrays of hp starts and lengths must be set up anew */ if ( a->hp ) { pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs ); pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs ); } /* Align it! */ dyn_prog( a ); /* Find the best score */ max_score = max_sg_score( a ); find_align_begin( a ); /* First, put all alignment in front_pwaln */ populate_pwaln_to_begin( a, front_pwaln ); /* Load up front_pwaln */ strcpy( front_pwaln->ref_id, maln->ref->id ); strcpy( front_pwaln->ref_desc, maln->ref->desc ); strcpy( front_pwaln->frag_id, fs->id ); strcpy( front_pwaln->frag_desc, fs->desc ); front_pwaln->trimmed = fs->trimmed; front_pwaln->revcom = fs->rc; front_pwaln->num_inputs = fs->num_inputs; front_pwaln->segment = 'a'; front_pwaln->score = a->best_score; front_pwaln->start = a->abc + ref_start; front_pwaln->end = a->aec + ref_start; /* Update stats for this FragSeq */ fs->as = a->abc + ref_start; fs->ae = a->aec + ref_start; fs->unique_best = 1; fs->score = a->best_score; if ( front_pwaln->end > maln->ref->seq_len ) { /* This alignment wraps around - adjust the end to demonstrate this for split_maln check */ front_pwaln->end = front_pwaln->end - maln->ref->seq_len; } if ( front_pwaln->start > front_pwaln->end ) { /* Move wrapped bit to back_pwaln */ split_pwaln( front_pwaln, back_pwaln, maln->ref->seq_len ); merge_pwaln_into_maln( front_pwaln, maln ); fs->front_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1]; merge_pwaln_into_maln( back_pwaln, maln ); fs->back_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1]; } else { merge_pwaln_into_maln( front_pwaln, maln ); fs->front_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1]; } } } return; }