Exemplo n.º 1
0
void grow_ids_list(IDsListP ids) {
	int new_size, i, k;
	char** ids_array;
	char* first_id;
	new_size = (ids->size) * 2;

	ids_array = (char**)save_malloc(new_size * sizeof(char*));
	first_id = (char*)save_malloc(ids->size * MAX_ID_LEN * sizeof(char));

	/* Point first half of new ids_array to old half of pointers */
	for (i = 0; i < ids->size; i++) {
		ids_array[i] = ids->ids[i];
	}
	k = 0;
	/* Point secod half of new ids_array to new half of pointers */
	for (i = ids->size; i < new_size; i++) {
		ids_array[i] = &first_id[(k++ * MAX_ID_LEN)];
	}

	/* Free old ids */
	free(ids->ids);

	ids->ids = ids_array;
	ids->size = new_size;
}
Exemplo n.º 2
0
/* init_FSDB
   Arguments: void
   Returns: FSDB (pointer to struct fragseqdb) / NULL if
    not enough memories
   Used for initializing a new database of FragSeqs. Allocates
   enough memoery for INIT_NUM_ALN_SEQS of these
*/
FSDB init_FSDB ( void ) {
  int i;
  FSDB fsdb;
  FragSeqP first_seq;

  /* First, allocate the memories */
  fsdb = (FSDB)save_malloc(sizeof(FragSeqDB));
  if ( fsdb == NULL ) {
    return NULL;
  }

  first_seq = (FragSeqP)save_malloc(INIT_NUM_ALN_SEQS *
			       sizeof(FragSeq));
  if ( first_seq == NULL ) {
    return NULL;
  }

  fsdb->fss = (FragSeqP*)save_malloc(INIT_NUM_ALN_SEQS *
				sizeof( FragSeqP ));
  if ( fsdb->fss == NULL ) {
    return NULL;
  }

  for ( i = 0; i < INIT_NUM_ALN_SEQS; i++ ) {
    fsdb->fss[i] = &first_seq[i];
  }

  fsdb->size = INIT_NUM_ALN_SEQS;
  fsdb->num_fss = 0;

  return fsdb;
}
Exemplo n.º 3
0
IntSet *intset_new(unsigned long maxvalue, unsigned long nofelements)
{
  IntSet* set = NULL;
  assert(nofelements <= maxvalue);
  save_malloc(set, sizeof(*set));
  save_malloc(set->elements, sizeof(*set->elements) * nofelements);

  set->maxvalue = maxvalue;
  set->count = nofelements;
  set->current = 0;

  return set;
}
Exemplo n.º 4
0
static add_mem_list(my_mem_list_struct_t **header, char *ptr, int isize, int iline, char *pcfile)
{
	my_mem_list_struct_t *p;
	if (ptr == NULL) return;

	my_pthread_mutex_lock(&g_ptmmem);
	for (p=*header; p!=NULL; p=p->pnext)
	{
		if (p->ptr == ptr)
		{
			fprintf(stderr, "add_mem_list error in %s(%s %d).!", __FILE__, pcfile, iline);
			my_pthread_mutex_unlock(&g_ptmmem);
			exit(-1);
		}
	}

	p = (my_mem_list_struct_t *)save_malloc(sizeof(my_mem_list_struct_t));
	if (p == NULL)
	{
		fprintf(stderr, "not enough memory in %s.\n", __FILE__);
		my_pthread_mutex_unlock(&g_ptmmem);
		exit(-1);
	}

	p->ptr = ptr;
	p->isize = isize;
	p->iline = iline;
	p->pcfile = save_strdup(pcfile);
	p->pnext = *header;
	*header = p;
	my_pthread_mutex_unlock(&g_ptmmem);
}
Exemplo n.º 5
0
static add_fp_list(my_fp_list_struct_t **header, FILE *fp, int iline, char *pcfile)
{
	pthread_t pid;
	my_fp_list_struct_t *p;
	if (fp == NULL) return;

	pid=0;//pid = pthread_self();
	my_pthread_mutex_lock(&g_ptmfp);
	for (p=*header; p!=NULL; p=p->pnext)
	{
		if (p->fp == fp && p->pid == pid)
		{
			fprintf(stderr, "add_fp_list error in %s(%s %d).!", __FILE__, pcfile, iline);
			my_pthread_mutex_unlock(&g_ptmfp);
			exit(-1);
		}
	}

	p = (my_fp_list_struct_t *)save_malloc(sizeof(my_fp_list_struct_t));
	if (p == NULL)
	{
		fprintf(stderr, "not enough memory in %s.\n", __FILE__);
		my_pthread_mutex_unlock(&g_ptmfp);
		exit(-1);
	}

	p->fp = fp;
	p->iline = iline;
	p->pcfile = save_strdup(pcfile);
	p->pnext = *header;
	p->pid = pid;
	*header = p;
	my_pthread_mutex_unlock(&g_ptmfp);
}
Exemplo n.º 6
0
/* grow_FSDB
   Args: (1) FSDB (fsdb) to be made twice as big
   Returns: 1 if success; 0 if failure (not enough memories)
   Grows an FSDB by allocating another chunk of memory for
   the FragSeqs as big as the one it already has. Note, it
   *DOES NOT* throw away the one it already has. Then, the
   fsdb->fss array is replaced by one twice as big. The
   pointers to all the existing FragSeqs are copied over
   and the new ones are set up. The size is reset, too.
   The old fsdb->fss array is freed
*/
int grow_FSDB( FSDB fsdb ) {
  int i, j, new_size;
  FragSeqP first_seq;
  FragSeqP* new_fss;

  new_size = fsdb->size * 2;

  /* DEBUG INFO */
  if ( DEBUG ) {
    fprintf( stderr, "Growing fsdb from %d to %d\n",
	     (int)fsdb->size, new_size );
  }

  /* Allocate another chunck of memories as big as the
     one it has now, doubling its size */
  first_seq = (FragSeqP)save_malloc(fsdb->size *
			       sizeof(FragSeq));
  if ( first_seq == NULL ) {
    return 0;
  }

  /* Now, allocate the *new* array of pointers for fsdb->fss
     But, assign this to new_fss for now because we need to
     keep fsdb->fss so we can copy over the pointers it already
     has!
  */
  new_fss = (FragSeqP*)save_malloc(new_size * sizeof(FragSeqP));
  if ( new_fss == NULL ) {
    return 0;
  }

  /* Point the pointers to the pointees */
  for( i = 0; i < fsdb->size; i++ ) {
    new_fss[i] = fsdb->fss[i];
  }
  j = 0;
  for( i = fsdb->size; i < new_size; i++ ) {
    new_fss[i] = &first_seq[j++];
  }

  /* Now, free the old fsdb->fss and slot in the new one */
  free( fsdb->fss );
  fsdb->fss  = new_fss;
  fsdb->size = new_size;
  return 1;
}
Exemplo n.º 7
0
void *my_malloc(size_t nbytes, int iline, char *pcfile)
{
	void *p;
	p = save_malloc(nbytes);

	add_mem_list(&g_pmemheader, p, nbytes, iline, pcfile);
	return p;
}
Exemplo n.º 8
0
/* Grow the space for a sequence (an array of char)
 to twice its current size
 Copy its current contents into the new sequence
 Free the now unused old memory
 */
char* grow_seq(char* seq, int size) {
  int i;
  char* new_seq;
  new_seq = (char*)save_malloc( 2 * size );
  for (i = 0; i < size; i++) {
    new_seq[i] = seq[i];
  }
  free(seq);
  return new_seq;
}
Exemplo n.º 9
0
/* This IDsList */
IDsListP init_ids_list(void) {
	IDsListP ids;
	char** ids_array;
	char* first_id;
	int i;

	// allocate the IDsList
	ids = (IDsListP)save_malloc(sizeof(IDsList));

	ids_array = (char**)save_malloc(INIT_NUM_IDS * sizeof( char* ));
	first_id = (char*)save_malloc(INIT_NUM_IDS * MAX_ID_LEN * sizeof(char));

	for (i = 0; i < INIT_NUM_IDS; i++) {
		ids_array[i] = &first_id[i*MAX_ID_LEN];
	}

	ids->num_ids = 0;
	ids->sorted = 0;
	ids->ids = ids_array;
	return ids;
}
Exemplo n.º 10
0
/*
 permutateAlphabet:
 build all permutations out of an given alphabet.

 string: the alphabet over which should be circled
 strsize: the size of the alphabet
 k: the size of the array which used for the sub alphabet's
 */
void permutateAlphabet(const char* string, const long strsize, const long k) {
  unsigned long i;
  unsigned long* array;

  save_malloc(array, sizeof(unsigned long) * k);
  for (i = 0; i < k; ++i) {
    array[i] = 0;
  }

  while (!maximumReached(array, strsize)) {
    printArray(string, array, k);
    increment(array, strsize, k);
  }

  free(array);
}
Exemplo n.º 11
0
/* Takes a pointer to a populated PWAlnFrag (pwaln) and
 a pointer to a populated MapAlignent (maln)
 Does:
 1. Adds this aligned sequence, without gaps to maln->AlnSeqArray,
 growing this array if necessary
 2. Populates the gaps array of this newly aligned fragment to
 indicate where its gaps are relative to the reference
 3. Updates the gaps array of the reference sequence (maln->ref->gaps[])
 and the gaps array of all aligned fragments to accomodate any
 new gaps this new fragment may require
 Returns: 1 (TRUE) if success
 0 (FALSE) if failure
 */
int merge_pwaln_into_maln(PWAlnFragP pwaln, MapAlignmentP maln) {
  int i, j, aln_len, ref_frag_len, ref_pos, gap_compare, mind_the_gap,
    seq_pos, offset;
  char c, f;
  char* ins_seq;
  AlnSeqP asp;
  int this_ref_gaps[(2*INIT_ALN_SEQ_LEN) + 1];
  
  // Grow array of aligned sequences if necessary
  if (maln->num_aln_seqs >= maln->size) {
    if ( !(grow_alns_map_alignment(maln))) {
      return 0;
    }
  }
  
  // Get a pointer to this next AlnSeq
  asp = maln->AlnSeqArray[maln->num_aln_seqs];
  
  // Copy over all the details thusfar
  strcpy(asp->id, pwaln->frag_id);
  strcpy(asp->desc, pwaln->frag_desc);
  asp->score      = pwaln->score;
  asp->start      = pwaln->start;
  asp->end        = pwaln->end;
  asp->revcom     = pwaln->revcom;
  asp->trimmed    = pwaln->trimmed;
  asp->segment    = pwaln->segment;
  asp->num_inputs = pwaln->num_inputs;
  aln_len = strlen(pwaln->frag_seq);

  /* Copy the fragment aligned sequence string, gap characters
     and all, into asp->seq 
  */
  mind_the_gap = 0;
  j = 0;
  seq_pos = 0;
  this_ref_gaps[seq_pos] = 0;
  for (i = 0; i < aln_len; i++) {
    c = pwaln->ref_seq[i];
    f = pwaln->frag_seq[i];
    if (c == '-') {
      this_ref_gaps[seq_pos]++;
      if (mind_the_gap) {
	// Extending an already started gap
	ins_seq[j++] = pwaln->frag_seq[i];
      } else {
	// Starting a new gap
	ins_seq = (char*)save_malloc(MAX_INS_LEN * sizeof(char));
	j = 0;
	ins_seq[j++] = f;
      }
      mind_the_gap = 1;
    } 
    else {
      // Not a gap
      if (mind_the_gap) {
	// Just finished a gap, add \0 to inserted sequence
	ins_seq[j] = '\0';
	asp->ins[seq_pos] = ins_seq;
      } 
      else { // Not a gap here
	asp->ins[seq_pos] = NULL;
      }
      asp->seq[seq_pos++] = f;
      this_ref_gaps[seq_pos] = 0;
      mind_the_gap = 0;
    }
  }
  
  /* Add string terminator, just in case */
  asp->seq[seq_pos] = '\0';
  
  // Now, go through these ref seq gaps and see if they were already
  // known before
  ref_frag_len = asp->end - asp->start + 1;
  for (i = 0; i < ref_frag_len; i++) {
    ref_pos = asp->start + i;
    gap_compare = this_ref_gaps[i] - maln->ref->gaps[ref_pos];
    
    if (gap_compare > 0) {
      /* Longer gap in this fragment than known before so we must
	 make maln->ref->gaps[ref_pos] longer to accomodate it
      */
      maln->ref->gaps[ref_pos] += gap_compare;
    }
  }
  maln->num_aln_seqs++;
  return 1;
}
Exemplo n.º 12
0
void col_print_cons(char* consensus, char* aln_ref, int* cov, int* ref_poss,
		MapAlignmentP maln) {
	int len, i;
	char c;
	int* starts_f;
	int* starts_r;
	int* ends_f;
	int* ends_r;
	AlnSeqP as;

	len = strlen(consensus);

	starts_f = (int* )save_malloc(len * sizeof(int));
	starts_r = (int* )save_malloc(len * sizeof(int));
	ends_f = (int* )save_malloc(len * sizeof(int));
	ends_r = (int* )save_malloc(len * sizeof(int));

	/* Initialize everything to zero */
	for (i = 0; i < len; i++) {
		starts_f[i] = 0;
		starts_r[i] = 0;
		ends_f[i] = 0;
		ends_r[i] = 0;
	}

	/* Now, go through all the aligned fragments and update
	 the starts and ends arrays based on where each fragment...
	 starts and ends! */
	for (i = 0; i < maln->num_aln_seqs; i++) {
		as = maln->AlnSeqArray[i];
		if (as->revcom) {
			switch (as->segment) {
			case 'f':
				/* only the start is correct for front fragments */
				starts_r[as->start]++;
				break;
			case 'b':
				/* only the end is correct for back fragments */
				ends_r[as->end]++;
				break;
			default:
				starts_r[as->start]++;
				ends_r[as->end]++;
				break;
			}
		}

		/* Not reverse complement */
		else {
			switch (as->segment) {
			case 'f':
				/* only the start is correct for front fragments */
				starts_f[as->start]++;
				break;
			case 'b':
				/* only the end is correct for back fragments */
				ends_f[as->end]++;
				break;
			default:
				starts_f[as->start]++;
				ends_f[as->end]++;
				break;
			}
		}
	}

	printf("# Columns:\n");
	printf("# 1. Assembly consensus base\n");
	printf("# 2. Reference %s base\n", maln->ref->id);
	printf("# 3. Coverage (number of reads overlapping this position)\n");
	printf("# 4. Coordinate on reference sequence (1-based)\n");
	printf("# 5. Number of fragments on forward strand that start here\n");
	printf("# 6. Number of fragments on reverse strand that start here\n");
	printf("# 7. Number of fragments on forward strand that end here\n");
	printf("# 8. Number of fragments on reverse strand that end here\n");
	for (i = 0; i < len; i++) {
		if ( !((consensus[i] == '-') && (aln_ref[i] == '-') )) {
			if (consensus[i] == ' ') {
				c = 'X';
			} else {
				c = consensus[i];
			}
			printf("%c\t%c\t%d\t%d\t%d\t%d\t%d\t%d\n", c, aln_ref[i], cov[i],
					(ref_poss[i]+1), starts_f[ref_poss[i]],
					starts_r[ref_poss[i]], ends_f[ref_poss[i]],
					ends_r[ref_poss[i]]);
		}
	}
}
Exemplo n.º 13
0
/* For a given region, defined by reg_start and reg_end, show
 the refence sequence, the consensus sequence, 
 and the sequence of all the fragments that overlap this
 region at all.
 */
void print_region( MapAlignmentP maln, int reg_start, int reg_end,
		   int out_format, int in_color ) {
  int i, ref_pos, ref_gaps, j, cons_pos, ins_len;
  int num_gaps = 0;
  int ins_seq_len;
  int read_out_pos;
  char* consensus;
  char* aln_ref;
  char* read_reg;
  char* ins_cons;
  char* read_str;
  char* read_id;
  char* ins_seq;
  int* ins_cov;
  BaseCountsP bcs;
  AlnSeqP aln_seq;
  PSSMP psm;
  
  /* Make sure region doesn't go off edge */
  if (reg_start < 1) {
    reg_start = 1;
  }
  if (reg_end > maln->ref->seq_len) {
    reg_end = maln->ref->seq_len;
  }
  
  bcs = (BaseCountsP)save_malloc(sizeof(BaseCounts));
  reset_base_counts(bcs);
  
  /* Find how many gaps are in this region */
  for (i = reg_start-1; i <= reg_end; i++) {
    num_gaps += maln->ref->gaps[i];
  }
  
  /* Make char arrays long enough for the sequence plus
     gaps for the reference, the consensus, and a single 
     read. These will be populated and output by the rest
     of this function.
  */
  consensus = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10)
				 * sizeof(char));
  aln_ref = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10)
			       * sizeof(char));
  read_reg = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10)
				* sizeof(char));
	
  /* Make char and int array for insert consensus and
     insert coverage to be used whenever needed */
  ins_cons = (char*)save_malloc(MAX_INS_LEN * sizeof(char));
  ins_cov = (int* )save_malloc(MAX_INS_LEN * sizeof(int));
  
  cons_pos = 0;
  for (ref_pos = reg_start - 1; ref_pos < reg_end; ref_pos++) {
    ref_gaps = maln->ref->gaps[ref_pos];
    /* Add these gaps to the reference aligned string and the inserted
       sequence to the consensus[] */
    if (ref_gaps > 0) {
      find_ins_cons(maln, ref_pos, ins_cons, ins_cov, out_format);
      for (j = 0; j < ref_gaps; j++) {
	aln_ref[cons_pos] = '-';
	consensus[cons_pos] = ins_cons[j];
	cons_pos++;
      }
    }
    /* Re-zero all the base counts */
    reset_base_counts(bcs);
    
    /* Find all the aligned fragments that include this
       position and make a consensus from it */
    for (j = 0; j < maln->num_aln_seqs; j++) {
      aln_seq = maln->AlnSeqArray[j];
      /* Does this aligned fragment cover this position? */
      if ( (aln_seq->start <= ref_pos) && // checked
	   (aln_seq->end >= ref_pos)) {
	if (aln_seq->revcom) {
	  psm = maln->rpsm;
	} else {
	  psm = maln->fpsm;
	}
	add_base(aln_seq->seq[ref_pos - aln_seq->start], bcs, psm,
		 aln_seq->smp[ref_pos - aln_seq->start]);
      }
    }
    
    consensus[cons_pos] = find_consensus(bcs, maln->cons_code);
    aln_ref[cons_pos] = maln->ref->seq[ref_pos];
    cons_pos++;
  }
  
  consensus[cons_pos] = '\0';
  aln_ref[cons_pos] = '\0';
  
  /* Now print the reference and the consensus */
  if (out_format == 61) {
    fasta_aln_print(aln_ref, maln->ref->id);
    fasta_aln_print(consensus, "Consensus");
  } else {
    if (in_color) {
      printf("%-20.20s ", maln->ref->id);
      color_print(aln_ref);
      printf("%-20.20s ", "Consensus");
      color_print(consensus);
    } else
      printf("%-20.20s %s\n%-20s %s\n", maln->ref->id, aln_ref,
	     "Consensus", consensus);
  }
  
  /* 
     Alloc memories for the string to hold each read (plus .'s outside)
     and alloc memories for the special id which is the regular ID
     plus the code for whether it's truncated, reverse complemented,
     and the number of input sequence
  */
  read_str = (char*)save_malloc(strlen(aln_ref) * sizeof(char) + 1);
  read_id  = (char*)save_malloc((MAX_ID_LEN + 4) * sizeof(char) + 1);
  /* Find every sequence that overlaps this region and print
     the overlapping segment */
  for (j = 0; j < maln->num_aln_seqs; j++) {
    aln_seq = maln->AlnSeqArray[j];
    if (alnseq_ol_reg(aln_seq, (reg_start-1), (reg_end-1)) ) {
      read_out_pos = 0;
      if (aln_seq->trimmed) {
	read_id[0] = 't';
      } else {
	read_id[0] = '_';
      }
      
      if (aln_seq->revcom) {
	read_id[1] = 'r';
      } else {
	read_id[1] = '_';
      }
      sprintf( &read_id[2], "%0.2d", aln_seq->num_inputs );
      read_id[4] = '\0';

      strcat(read_id, aln_seq->id);
      if (out_format == 6) {
	printf("%-20.20s ", read_id);
      }
      for (ref_pos = reg_start - 1; ref_pos < reg_end; ref_pos++) {
	ref_gaps = maln->ref->gaps[ref_pos];
	/* Check to make sure that this fragment has started and
	   not ended by this ref_pos */
	if ( (aln_seq->start <= ref_pos) && // checked
	     (aln_seq->end >= ref_pos)) {
	  if (ref_gaps > 0) {
	    if (aln_seq->ins[ref_pos - aln_seq->start] == NULL) {
	      ins_len = 0;
	    } else {
	      ins_len
		= strlen(aln_seq->ins[ref_pos - aln_seq->start]);
	    }
	    if (aln_seq->start == ref_pos) {
	      // Exactly at the beginning of this frag
	      for (i = 0; i < ref_gaps; i++) {
		read_str[read_out_pos++] = '.';
		//		printf( "." );
	      }
	    } else {
	      // Just a normal, interior gapped position
	      if (ins_len > 0) {
		ins_seq
		  = aln_seq->ins[ref_pos - aln_seq->start];
		ins_seq_len = strlen(ins_seq);
		for (i = 0; i < ins_seq_len; i++) {
		  read_str[read_out_pos++] = ins_seq[i];
		}
		//		printf( "%s", aln_seq->ins[ref_pos - aln_seq->start] );
	      }
	      for (i = 0; i < (ref_gaps - ins_len); i++) {
		read_str[read_out_pos++] = '-';
		//		printf( "-" );
	      }
	    }
	  }
	  read_str[read_out_pos++]
	    = aln_seq->seq[ref_pos - aln_seq->start];
	  //printf( "%c", aln_seq->seq[ref_pos - aln_seq->start] );
	} else {
	  // This fragment doesn't actually cover this base
	  for (i = 0; i < ref_gaps; i++) {
	    // print this . for all ref gaps
	    read_str[read_out_pos++] = '.';
	    // printf( "." );
	  }
	  read_str[read_out_pos++] = '.';
	  //printf( "." );
	}
      }
      read_str[read_out_pos] = '\0';
      if (out_format == 61) {
	fasta_aln_print(read_str, read_id);
      } else {
	
	if (in_color) {
	  color_print(read_str);
	} else
	  printf("%s\n", read_str);
      }
    }
  }
  free(bcs);
  free(consensus);
  free(aln_ref);
  free(read_reg);
  free(ins_cons);
  free(ins_cov);
  free(read_str);
  free(read_id);
}
Exemplo n.º 14
0
/* Takes a MapAlignmentP and a position where some of
 the aligned fragments have an insert relative to the
 reference. That is, maln->ref->gaps[position] > 0.
 Populates the char* ins_cons and int* cons_cov
 arrays with the consensus sequence and consensus
 coverage, respectively. These must be appropriately
 sized elsewhere. If out_format is the special value
 of 4, then we just show these differences now and
 do not return anything.
 */
void find_ins_cons(MapAlignmentP maln, int pos, char* ins_cons, int* cons_cov,
		int out_format) {
	int i, j, ins_len, this_frag_ins_len;
	char* ins_seq;
	AlnSeqP aln_seq;
	BaseCountsP* bcs_array;
	BaseCountsP first_bcs;
	PSSMP psm;

	ins_len = maln->ref->gaps[pos];

	bcs_array = (BaseCountsP*)save_malloc(ins_len * sizeof(BaseCountsP));
	first_bcs = (BaseCountsP)save_malloc(ins_len * sizeof(BaseCounts));

	for (i = 0; i < ins_len; i++) {
		bcs_array[i] = &first_bcs[i];
		reset_base_counts(bcs_array[i]);
	}

	for (i = 0; i < maln->num_aln_seqs; i++) {
		aln_seq = maln->AlnSeqArray[i];
		/* Does this aligned fragment cover this position? */
		if ( (aln_seq->start < pos) && // It does not cover this position
				//if it starts exactly here because the gap is, by convention,
				//just upstream of this position
				(aln_seq->end >= pos)) {
			if (aln_seq->revcom) {
				psm = maln->rpsm;
			} else {
				psm = maln->fpsm;
			}
			/* Does it have some actual inserted sequence? */
			ins_seq = aln_seq->ins[pos - aln_seq->start];
			if (ins_seq == NULL) {
				for (j = 0; j < ins_len; j++) {
					add_base( '-', bcs_array[j], psm,
							aln_seq->smp[pos - aln_seq->start]);
				}
			} else {
				this_frag_ins_len = strlen(ins_seq);
				for (j = 0; j < ins_len; j++) {
					if (j < this_frag_ins_len) {
						add_base(ins_seq[j], bcs_array[j], psm,
								aln_seq->smp[pos - aln_seq->start]);
					} else {
						add_base( '-', bcs_array[j], psm,
								aln_seq->smp[pos - aln_seq->start]);
					}
				}
			}
		}
	}

	for (j = 0; j < ins_len; j++) {
		ins_cons[j] = find_consensus(bcs_array[j], maln->cons_code);
		cons_cov[j] = bcs_array[j]->cov;
		if ( (out_format == 4) && !(ins_cons[j] == '-')) {
			show_single_pos(pos, '-', ins_cons[j], bcs_array[j]);
		}
		if (out_format == 41) {
			show_single_pos(pos, '-', ins_cons[j], bcs_array[j]);
		}
	}

	free(first_bcs);
	free(bcs_array);
}
Exemplo n.º 15
0
int main( int argc, char* argv[] ) {

  char mat_fn[MAX_FN_LEN+1];
  char maln_fn[MAX_FN_LEN+1];
  char fastq_out_fn[MAX_FN_LEN+1];
  char maln_root[MAX_FN_LEN+1];
  char ref_fn[MAX_FN_LEN+1];
  char frag_fn[MAX_FN_LEN+1];
  char adapter_code[2]; // place to keep the argument for -a (which adapter to trim)
  char* c_time; // place to keep asctime string
  char* test_id;

  int ich;
  int any_arg = 0;
  int Hard_cut = 0; // If 0 => use dynamic score cutoff, if > 0 use this instead
  int circular = 0; // Boolean, TRUE if reference sequence is circular
  int make_fastq = 0; // Boolean, TRUE if we should also output fastq database of seqs in assembly
  int seq_code = 0; // code to indicate sequence input format; 0 => fasta; 1 => fastq
  int do_adapter_trimming = 0; // Boolean, TRUE if we should try to trim
                               // adapter from input sequences
  int iterate = 0; //Boolean, TRUE means interate the assembly until convergence
  // on an assembled sequence
  int FINAL_ONLY = 0; //Boolean, TRUE means only write out the final assembly maln file
                      //         FALSE (default) means write out each one
  int ids_rest = 0; // Boolean, TRUE means restrict analysis to IDs in input file
  int repeat_filt = 0; //Boolean, TRUE means remove sequences that are repeats, 
                       // keeping best-scoring representative
  int repeat_qual_filt = 0; //Boolean, TRUE means remove sequences that are repeats,
                            // keeping best quality score sum representative
  int just_outer_coords = 1; // Boolean, TRUE means just use strand, start, and end to
                             // determine if sequences are redundant
  int SCORE_CUT_SET = 0; //Boolean, TRUE means user has set a length/score cutoff line
  int seen_seqs = 0;
  int hp_special = 0; // Boolean, TRUE means user wants hp gap special discount
  int distant_ref = 0; // Boolean, TRUE means the initial reference sequence is
                       // known to be distantly related so keep trying to align all
                       // sequences each round
  int kmer_filt_len = -1; // length of kmer filtering, if user wants it; otherwise
                          // special value of -1 indicates this is unset
  int soft_mask = 0; //Boolean; TRUE => do not use kmers that are all lower-case
                     //        FALSE => DO use all kmers, regardless of case
  int iter_num; // Number of iterations of assembly done
  int collapse = 0; // Boolean; TRUE => collapse input sequences in FSDB to improve
                    //                  sequence quality
                    //          FALSE => (default) keep all sequences
  double slope     = DEF_S; // Set these to default unless, until user changes
  double intercept = DEF_N; // them 
  MapAlignmentP maln, // Contains all fragments initially better
                      // than FIRST_ROUND_SCORE_CUTOFF
    culled_maln;      // Contains all fragments with scores
                      // better than SCORE_CUTOFF
  AlignmentP fw_align, rc_align, adapt_align;
  
  PSSMP ancsubmat   = init_flatsubmat();
  PSSMP rcancsubmat = revcom_submat(ancsubmat);
  const PSSMP flatsubmat  = init_flatsubmat();

  KPL* fkpa; // Place to keep forward kmer array if user requested kmer 
  KPL* rkpa; // Place to keep reverse kmer array if user requested kmer 
  IDsListP good_ids;
  FragSeqP frag_seq;
  PWAlnFragP front_pwaln, back_pwaln;
  FSDB fsdb; // Database to hold sequences to iterate over
  FILE* FF;
  time_t curr_time;


  char maln_root_def[] = "assembly.maln.iter";
  extern int optind;
  extern char* optarg;
  char neand_adapt[] = "GTCAGACACGCAACAGGGGATAGGCAAGGCACACAGGGGATAGG";
  char stand_adapt[] = "CTGAGACACGCAACAGGGGATAGGCAAGGCACACAGGGGATAGG";
  char user_def_adapt[128];
  char* adapter; // set to either neand_adapt or stand_adapt based on user preference
  adapter = neand_adapt; // Default is Neandertal
  char* assembly_cons;
  char* last_assembly_cons;
  int cc = 1; // consensus code for calling consensus base
  int i;

  /* Set the default output filename until the user overrides it */
  strcpy( maln_root, maln_root_def );


  /* Process command line arguments */
  while( (ich=getopt( argc, argv, "s:r:f:m:a:p:H:I:S:N:k:q:FTciuhDMUAC" )) != -1 ) {
    switch(ich) {
    case 'c' :
      circular = 1;
      break;
    case 'q' :
      make_fastq = 1;
      strcpy( fastq_out_fn, optarg );
    case 'C' :
      collapse = 1;
      break;
    case 'i' :
      iterate = 1;
      break;
    case 'h' :
      hp_special = 1;
      break;
    case 'u' :
      repeat_filt = 1;
      break;
    case 'A' :
      just_outer_coords = 0;
      break;
    case 'U' :
      repeat_qual_filt = 1;
      break;
    case 'D' :
      distant_ref = 1;
      break;
    case 'p' :
      cc = atoi( optarg );
      any_arg = 1;
      break;
    case 'I' :
      good_ids = parse_ids( optarg );
      ids_rest = 1;
      break;
    case 'H' :
      Hard_cut = atoi( optarg );
      if ( Hard_cut <= 0 ) {
	fprintf( stderr, "Hard cutoff (-H) must be positive\n" );
	help();
	exit( 0 );
      }
      any_arg = 1;
      break;
    case 'M' :
      soft_mask = 1;
      break;
    case 's' :
      strcpy( mat_fn, optarg );
      free( ancsubmat ); // trash the flat submat we initialized with
      ancsubmat   = read_pssm( mat_fn );
      free( rcancsubmat ); // trash the init rcsubmat, too
      rcancsubmat = revcom_submat( ancsubmat );
      any_arg = 1;
      break;
    case 'r' :
      strcpy( ref_fn, optarg );
      any_arg = 1;
      break;
    case 'k' :
      kmer_filt_len = atoi( optarg );
      any_arg = 1;
      break;
    case 'f' :
      strcpy( frag_fn, optarg );
      any_arg = 1;
      break;
    case 'm' :
      strcpy( maln_root, optarg );
      any_arg = 1;
      break;
    case 'T' :
      do_adapter_trimming = 1;
      break;
    case 'a' :
      if ( strlen( optarg ) > 127 ) {
	  fprintf( stderr, "That adapter is too big!\nMIA will use the standard adapter.\n" );
	  adapter = stand_adapt;
      }
      else {
	strcpy( user_def_adapt, optarg );
	  if ( strlen( user_def_adapt ) > 1 ) {
	    adapter = user_def_adapt;
	  }
	  else {
	    if ( !( (user_def_adapt[0] == 'n') ||
		    (user_def_adapt[0] == 'N') ) ) {
	      adapter = stand_adapt;
	    }
	    else {
	      adapter = neand_adapt;
	    }
	  }
      }
      break;
    case 'S' :
      slope = atof( optarg );
      SCORE_CUT_SET = 1;
      break;
    case 'N' :
      intercept = atof( optarg );
      SCORE_CUT_SET = 1;
      break;
    case 'F' :
      FINAL_ONLY = 1;
      break;
    default :
      help();
      exit( 0 );
    }
  }

  if ( !any_arg ) {
    help();
    exit( 0 );
  }

  if ( optind != argc ) {
    fprintf( stderr, "There seems to be some extra cruff on the command line that mia does not understand.\n" );
  }

  /* Start the clock... */
  curr_time = time(NULL);
  //  c_time = (char*)save_malloc(64*sizeof(char));
  //  c_time = asctime(localtime(&curr_time));

  /* Announce that we're starting */
  fprintf( stderr, 
	   "Starting assembly of %s\nusing %s\nas reference at %s\n", 
	   frag_fn, ref_fn, 
	   asctime(localtime(&curr_time)) );


  /* Set up the maln structure */
  maln = (MapAlignmentP)init_map_alignment();
  maln->cons_code = cc; 
  if ( maln == NULL ) {
    fprintf( stderr, "Not enough memories for this\n" );
    exit( 1 );
  }

  /* Set the distant_ref flag */
  maln->distant_ref = distant_ref;

  /* Set up the FSDB for keeping good-scoring sequence in memory */
  fsdb = init_FSDB();
  if ( fsdb == NULL ) {
    fprintf( stderr, "Not enough memories for holding sequences\n" );
    exit( 1 );
  }

  /* Read in the reference sequence and make reverse complement, too*/
  if ( read_fasta_ref( maln->ref, ref_fn ) != 1 ) {
    fprintf( stderr, "Problem reading reference sequence file %s\n", ref_fn );
    exit( 1 );
  }

  /* Add wrap-around sequence (rc, too) and set maln->ref->circular
     if it's circular */
  if ( circular ) {
    add_ref_wrap( maln->ref );
  }
  else {
    maln->ref->wrap_seq_len = maln->ref->seq_len;
  }
  /* Add space for the gaps array */
  maln->ref->gaps = (int*)save_malloc((maln->ref->wrap_seq_len+1) *
				      sizeof(int));
  for( i = 0; i <= maln->ref->wrap_seq_len; i++ ) {
    maln->ref->gaps[i] = 0;
  }

  /* Set up fkpa and rkpa for list of kmers in the reference (forward and
     revcom strand) if user wants kmer filtering */
  if ( kmer_filt_len > 0 ) {
    fprintf( stderr, "Making kmer list for k-mer filtering...\n" );
    fkpa = init_kpa(kmer_filt_len);
    rkpa = init_kpa(kmer_filt_len);
    /* 
    kmer_list = (KmersP)pop_kmers( maln->ref, kmer_filt_len );
    */
    populate_kpa( fkpa, maln->ref->seq, 
		  maln->ref->wrap_seq_len, kmer_filt_len, 
		  soft_mask );
    populate_kpa( rkpa, maln->ref->rcseq, 
		  maln->ref->wrap_seq_len, kmer_filt_len,
		  soft_mask );
  }

  /* Now kmer arrays have been made if requested. We can upper case
     the reference sequences. */
  make_ref_upper( maln->ref );

  /* Set up FragSeqP to point to a FragSeq */
  frag_seq = (FragSeqP)save_malloc(sizeof(FragSeq));

  /* Set up the alignment structures for forward and reverse
     complement alignments */
  fw_align = (AlignmentP)init_alignment( INIT_ALN_SEQ_LEN,
					 (maln->ref->wrap_seq_len + 
					  (2*INIT_ALN_SEQ_LEN)),
					 0, hp_special );
  rc_align = (AlignmentP)init_alignment( INIT_ALN_SEQ_LEN,
					 (maln->ref->wrap_seq_len + 
					  (2*INIT_ALN_SEQ_LEN)),
					 1, hp_special );

  /* Set up the alignment structure for adapter trimming, if user
     wants that */
  if ( do_adapter_trimming ) {
    adapt_align = (AlignmentP)init_alignment( INIT_ALN_SEQ_LEN,
					      INIT_ALN_SEQ_LEN,
					      0, hp_special );
    /* Setup the flatsubmat */
    //flatsubmat = init_flatsubmat();
    adapt_align->submat = flatsubmat;

    adapt_align->seq2   = adapter;
    adapt_align->len2   = strlen( adapt_align->seq2 );
    pop_s2c_in_a( adapt_align );
    if ( hp_special ) {
      pop_hpl_and_hps( adapt_align->seq2, adapt_align->len2,
		       adapt_align->hprl, adapt_align->hprs );
    }
    /* Set for a semi-global that pays a penalty for unaligning the
       beginning of the adapter, but not for the end of the adapter.
       This is because if the sequence read (align->seq1) ends, then
       we won't see any more of the adapter. When we search for the
       best alignment, we'll only look in the last column, requiring that
       all of align->seq1 is accounted for */
    adapt_align->sg5    = 1;
    adapt_align->sg3    = 0;
  }

  fw_align->seq1 = maln->ref->seq;
  rc_align->seq1 = maln->ref->rcseq;
  if ( circular ) {
    fw_align->len1 = maln->ref->wrap_seq_len;
    rc_align->len1 = maln->ref->wrap_seq_len;
  }
  else {
    fw_align->len1 = maln->ref->seq_len;
    rc_align->len1 = maln->ref->seq_len;
  }

  /* Now the reference sequence and its reverse complement are
     prepared, put the s1c lookup codes in */
  pop_s1c_in_a( fw_align );
  pop_s1c_in_a( rc_align );

  if ( hp_special ) {
    pop_hpl_and_hps( fw_align->seq1, fw_align->len1,
		     fw_align->hpcl, fw_align->hpcs );
    pop_hpl_and_hps( rc_align->seq1, rc_align->len1,
		     rc_align->hpcl, rc_align->hpcs );
  }

  /* One by one, go through the input file of fragments to be aligned.
     Align them to the reference. For each fragment generating an
     alignment score better than the cutoff, merge it into the maln
     alignment. Keep track of those that don't, too. */
  FF = fileOpen( frag_fn, "r" );
  seq_code = find_input_type( FF );

  //LOG = fileOpen( log_fn, "w" );
  front_pwaln = (PWAlnFragP)save_malloc( sizeof(PWAlnFrag));
  back_pwaln  = (PWAlnFragP)save_malloc( sizeof(PWAlnFrag));

  /* Give some space to remember the IDs as we see them */
  test_id = (char*)save_malloc(MAX_ID_LEN * sizeof(char));

  /* Announce we're strarting alignment of fragments */
  fprintf( stderr, "Starting to align sequences to the reference...\n" );

  while( read_next_seq( FF, frag_seq, seq_code ) ) {
    seen_seqs++;
    strcpy( test_id, frag_seq->id );
    if ( DEBUG ) {
      fprintf( stderr, "%s\n", frag_seq->id );
    }
    if ( !ids_rest ||
	 ( bsearch( &test_id, good_ids->ids, 
		    good_ids->num_ids,
		    sizeof(char*), idCmp ) 
	   != NULL ) ) {

      if ( do_adapter_trimming ) {
	/* Trim sequence (set frag_seg->trimmed and 
	   frag_seg->trim_point field) */
	trim_frag( frag_seq, adapter, adapt_align );
      }
      else {
	frag_seq->trimmed = 0;
      }

      /* Check if kmer filtering. If so, filter */
      if ( new_kmer_filter( frag_seq, fkpa, rkpa, kmer_filt_len,
			    fw_align, rc_align ) ) {
	/* Align this fragment to the reference and write 
	   the result into pwaln; use the ancsubmat, not the reverse
	   complemented rcsancsubmat during this first iteration because
	   all sequence is forward strand
	*/
	fw_align->submat = ancsubmat;
	rc_align->submat = ancsubmat;
	
	if ( sg_align( maln, frag_seq, fsdb, 
		       fw_align, rc_align,
		       front_pwaln, 
		       back_pwaln ) == 0 ) {
	  fprintf( stderr, "Problem handling %s\n", frag_seq->id );
	}
      }  
    }
    if ( seen_seqs % 1000 == 0 ) {
      fprintf( stderr, "." );
    }
    if ( seen_seqs % 80000 == 0 ) {
      fprintf( stderr, "\n" );
    }
  }

  /* Now, fsdb is complete and points to all the things in maln.
     So we can fill in the AlnSeqP->smp array for everything in the 
     maln->AlnSeqArray to know which matrices to use for *CALLING* 
     a consensus; Conveniently, there are pointers to all of these
     in the fss->fss[X]->front|back_asp */
  pop_smp_from_FSDB( fsdb, PSSM_DEPTH );

  //fprintf( LOG, "__Finished with initial alignments__" );
  //fflush( LOG );
  fprintf( stderr, "\n" );
  iter_num = 1;

  /* Now, we need a new MapAlignment, culled_maln, that is big
     enough to hold all the unique guys from maln */
  culled_maln = init_culled_map_alignment( maln );

  /* Filtering repeats announcement */
  fprintf( stderr, "Repeat and score filtering\n" );

  /* If user wants to filter against repeats by alignment score, do it */
  if ( repeat_filt ) {  
    /* Sort fsdb by fsdb->as */
    sort_fsdb( fsdb );
    
    /* Now, everything is sorted in fsdb, so I can easily see
       which guys are unique by as, ae, and rc fields */
    set_uniq_in_fsdb( fsdb, just_outer_coords );
  }

  /* If user wants to filter against repeats by q-score sum, do it */
  if ( repeat_qual_filt ) {  
    /* Sort fsdb by fsdb->as */
    sort_fsdb_qscore( fsdb );
    
    /* Now, everything is sorted in fsdb, so I can easily see
       which guys are unique by as, ae, and rc fields */
    set_uniq_in_fsdb( fsdb, just_outer_coords );
  }

  /* Now, we know which sequences are unique, so make a
     culled_maln with just the unique guys */
  cull_maln_from_fsdb( culled_maln, fsdb, Hard_cut, 
		       SCORE_CUT_SET, slope, intercept );

  fclose(FF);

  /* Tell the culled_maln which matrices to use for assembly */
  culled_maln->fpsm = ancsubmat;
  culled_maln->rpsm = rcancsubmat;

  sort_aln_frags( culled_maln ); //invalidates fsdb->front|back_asp fields!

  fw_align->submat = ancsubmat;
  fw_align->sg5 = 1;
  fw_align->sg3 = 1;

  last_assembly_cons = (char*)save_malloc((maln->ref->seq_len +1) * 
				     sizeof(char));
  strncpy( last_assembly_cons, maln->ref->seq, 
	   maln->ref->seq_len );
  last_assembly_cons[maln->ref->seq_len] = '\0';

  /* Re-align everything with revcomped
     sequence and substitution matrices, but first
     unmask all alignment positions and collapse sequences
     if requested
  */
  memset(fw_align->align_mask, 1, fw_align->len1);
  if ( collapse ) {
    collapse_FSDB( fsdb, Hard_cut, SCORE_CUT_SET, 
		   slope, intercept );
  }
  reiterate_assembly( last_assembly_cons, iter_num, maln, fsdb,
		      fw_align, front_pwaln, back_pwaln, 
		      ancsubmat, rcancsubmat );
  pop_smp_from_FSDB( fsdb, PSSM_DEPTH );
  fprintf( stderr, "Repeat and score filtering\n" );
  if ( repeat_filt ) {
    sort_fsdb( fsdb );
    set_uniq_in_fsdb( fsdb, just_outer_coords );
  }
  if ( repeat_qual_filt ) {  
    sort_fsdb_qscore( fsdb );
    set_uniq_in_fsdb( fsdb, just_outer_coords );
  }
  cull_maln_from_fsdb( culled_maln, fsdb, Hard_cut,
		       SCORE_CUT_SET, slope, intercept );
  
  
  /* Tell the culled_maln which matrices to use for assembly */
  culled_maln->fpsm = ancsubmat;
  culled_maln->rpsm = rcancsubmat;
  
  //invalidates fsdb->front|back_asp fields!
  sort_aln_frags( culled_maln );
  sprintf( maln_fn, "%s.%d", maln_root, iter_num );
  if ( !iterate || !FINAL_ONLY ) {
    write_ma( maln_fn, culled_maln );
    if ( make_fastq ) {
      write_fastq( fastq_out_fn, fsdb );
    }
  }

  /* Are we iterating (re-aligning to the a new consensus? */
  if (iterate) {
    /* New assembly consensus announcement */
    fprintf( stderr, "Generating new assembly consensus\n" );
    assembly_cons = consensus_assembly_string( culled_maln );

    while( ( strcmp( assembly_cons, last_assembly_cons ) != 0) &&
	   (iter_num < MAX_ITER) ) {
      /* Another round...*/
      iter_num++;
      free( last_assembly_cons );
      last_assembly_cons = assembly_cons;

      fprintf( stderr, "Starting assembly iteration %d\n", 
	       iter_num );

      /* If the user wants collapsed sequences, now is the time */
      if ( collapse ) {
	collapse_FSDB( fsdb, Hard_cut, SCORE_CUT_SET, 
		       slope, intercept );
      }

      reiterate_assembly( assembly_cons, iter_num, maln, fsdb, 
			  fw_align, front_pwaln, back_pwaln,
			  ancsubmat, rcancsubmat );

      pop_smp_from_FSDB( fsdb, PSSM_DEPTH );

      fprintf( stderr, "Repeat and score filtering\n" );
      if ( repeat_filt ) {
	sort_fsdb( fsdb );
	set_uniq_in_fsdb( fsdb, just_outer_coords );
      }
      if ( repeat_qual_filt ) {
	sort_fsdb_qscore( fsdb );
	set_uniq_in_fsdb( fsdb, just_outer_coords );
      }
      cull_maln_from_fsdb( culled_maln, fsdb, Hard_cut,
			   SCORE_CUT_SET, slope, intercept );

      
      /* Tell the culled_maln which matrices to use for assembly */
      culled_maln->fpsm = ancsubmat;
      culled_maln->rpsm = rcancsubmat;

      //invalidates fsdb->front|back_asp fields!
      sort_aln_frags( culled_maln );

      sprintf( maln_fn, "%s.%d", maln_root, iter_num );
      if ( !FINAL_ONLY ) {
	fprintf( stderr, "Writing maln file for iteration %d\n", 
		 iter_num );
	write_ma( maln_fn, culled_maln );
      }
      assembly_cons = consensus_assembly_string( culled_maln );
    }
  
    /* Convergence? */
    if ( strcmp( assembly_cons, last_assembly_cons ) == 0 ) {
      fprintf( stderr, "Assembly convergence - writing final maln\n" );
      write_ma( maln_fn, culled_maln );
    }
    else {
      fprintf( stderr, "Assembly did not converge after % rounds, quitting\n" );
      write_ma( maln_fn, culled_maln );
    }
    if ( make_fastq ) {
      write_fastq( fastq_out_fn, fsdb );
    }
  }

  /* No iteration, but we must still re-align everything with revcomped
     sequence and substitution matrices to keep scores comparable to what
     they would have been had we iterated */

  /* Announce we're finished */
  curr_time = time(NULL);
  //  c_time    = asctime(localtime(&curr_time));
  fprintf( stderr, "Assembly finished at %s\n",
	   asctime(localtime(&curr_time)) );

  exit( 0 );
}
Exemplo n.º 16
0
/* reiterate_assembly
   Args: (1) a pointer to a sequence to be used as the new reference
         (2) a MapAlignmentP big enough to store all the alignments
	 (3) a FSDB with sequences to be realigned
	 (4) a AlignmentP big enough for the alignments
	 (5) a front PWAlnFragP for storing front alignments
	 (6) a back PWAlnFragP for storing back alignments
	 (7) a PSSMP with the forward substitution matrices
	 (8) a PSSMP with the revcom substitution matrices
   Aligns all the FragSeqs from fsdb to the new reference, using the
   as and ae fields to narrow down where the alignment happens
   Resets the maln and writes all the results there
   Returns void
*/
void reiterate_assembly( char* new_ref_seq, int iter_num,
			 MapAlignmentP maln,
			 FSDB fsdb, AlignmentP a, 
			 PWAlnFragP front_pwaln,
			 PWAlnFragP back_pwaln, 
			 PSSMP ancsubmat,
			 PSSMP rcancsubmat ) {
  int i, j,
    ref_len,
    ref_start, 
    ref_end,
    ref_frag_len, 
    max_score,
    rc_score,
    aln_seq_len;
  FragSeqP fs;
  char iter_ref_id[MAX_ID_LEN + 1];
  char tmp_rc[INIT_ALN_SEQ_LEN + 1];
  char iter_ref_desc[] = "iteration assembly";

  /* Set up maln->ref
     Keep his seq separate from the external assembly because that
     is malloced and freed elsewhere
  */
  sprintf( iter_ref_id, "ConsAssem.%d", iter_num );
  free( maln->ref->seq );
  if ( maln->ref->rcseq != NULL ) {
    free( maln->ref->rcseq );
  }
  free( maln->ref->gaps );

  ref_len = strlen( new_ref_seq );
  maln->ref->seq = (char*)save_malloc((ref_len + 1)* sizeof(char));
  strcpy( maln->ref->seq, new_ref_seq );
  maln->ref->rcseq = NULL; // never again!
  /* Keep the ID and description the same if this is the 1st
     iteration. Otherwise, set it to the generic ones */
  if ( iter_num > 1 ) {
    strcpy( maln->ref->id, iter_ref_id );
    strcpy( maln->ref->desc, iter_ref_desc );
  }

  maln->ref->seq_len = ref_len;
  maln->ref->size = (ref_len+1);

  if ( maln->ref->circular ) {
    add_ref_wrap( maln->ref );
  }
  else {
    maln->ref->wrap_seq_len = maln->ref->seq_len;
  }
  maln->ref->gaps = 
    (int*)save_malloc((maln->ref->wrap_seq_len+1) * sizeof(int));
  for( i = 0; i <= maln->ref->wrap_seq_len; i++ ) {
    maln->ref->gaps[i] = 0;
  }

  /* Reset its AlnSeqArray ->ins to all point to null */
  for ( i = 0; i < maln->num_aln_seqs; i++ ) {
    aln_seq_len = strlen(maln->AlnSeqArray[i]->seq);
    for ( j = 0; j < aln_seq_len; j++ ) {
      /* We couldn't have malloced any sequence for
	 inserts past our length; anything non-NULL
	 out there is cruft */
      if ( maln->AlnSeqArray[i]->ins[j] != NULL ) {
	free( maln->AlnSeqArray[i]->ins[j] );
	maln->AlnSeqArray[i]->ins[j] = NULL;
      }
    }
  }

  /* Now, remake the hpcl and hprl arrays if hp_special */
  if ( a->hp ) {
    free( a->hpcl );
    free( a->hpcs );
    a->hpcl = (int*)save_malloc(maln->ref->wrap_seq_len*sizeof(int));
    a->hpcs = (int*)save_malloc(maln->ref->wrap_seq_len*sizeof(int));
    pop_hpl_and_hps( maln->ref->seq, 
		     maln->ref->wrap_seq_len,
		     a->hpcl, a->hpcs );     
  }

  /* Reset the number of aligned sequences in the maln */
  maln->num_aln_seqs = 0;

  /* OK, ref is set up. Let's go through all the sequences in fsdb
     and re-align them to the new reference. 
     If it's a revcom alignment,
     just use the rcancsubmat */
  for( i = 0; i < fsdb->num_fss; i++ ) {
    fs = fsdb->fss[i];

    /* Special case of distant reference and 
       !fs->strand_known => try to realign both strands
       against the entire reference to learn the 
       strand and alignment region
    */
    if ( maln->distant_ref &&
	 (fs->strand_known == 0 ) &&
	 (iter_num > 1) ) {
      ref_start = 0;
      ref_end = maln->ref->wrap_seq_len;
      ref_frag_len = ref_end - ref_start;
      a->seq1 = &maln->ref->seq[0];
      a->len1 = ref_frag_len;
      pop_s1c_in_a( a );
      a->seq2 = fs->seq;
      a->len2 = strlen( a->seq2 );
      pop_s2c_in_a( a );
      if ( a->hp ) {
	pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs );
	pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs );
      }
      /* Align it! */
      dyn_prog( a );
      /* Find the best forward score */
      max_score = max_sg_score( a );
      if ( max_score > FIRST_ROUND_SCORE_CUTOFF ) {
	fs->strand_known = 1;
	fs->rc = 0;
	find_align_begin( a );
	fs->as = a->abc;
	fs->ae = a->aec;
	fs->score = max_score;
      }

      /* Now, try reverse complement */
      aln_seq_len = strlen( fs->seq );
      a->submat = rcancsubmat;
      for ( j = 0; j < aln_seq_len; j++ ) {
	tmp_rc[j] = revcom_char(fs->seq[aln_seq_len-(j+1)]);
      }
      tmp_rc[aln_seq_len] = '\0';
      a->seq2 = tmp_rc;
      pop_s2c_in_a( a );
      if ( a->hp ) {
	pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs );
	pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs );
      }
      dyn_prog( a );
      max_score = max_sg_score( a );
      if ( (max_score > FIRST_ROUND_SCORE_CUTOFF) &&
	   (max_score > fs->score) ) {
	fs->strand_known = 1;
	fs->rc = 1;
	find_align_begin( a );
	fs->as = a->abc;
	fs->ae = a->aec;
	fs->score = max_score;
	strcpy( fs->seq, tmp_rc );
      }
    }

    /* Do we know the strand (either because we've always
       known it or we just learned it, doesn't matter) */
    if ( fs->strand_known ) {
      if ( fs->rc ) {
	a->submat = rcancsubmat;
      }
      else {
	a->submat = ancsubmat;
      }

      a->seq2 = fs->seq;
      a->len2 = strlen( a->seq2 );
      pop_s2c_in_a( a );

      /* Set up the alignment limits on the reference */
      if ( ((fs->as - REALIGN_BUFFER) < 0 ) ) {
	ref_start = 0;
      }
      else {
	ref_start = (fs->as - REALIGN_BUFFER);
      }
      if ( (fs->ae + REALIGN_BUFFER + 1) > 
	   maln->ref->wrap_seq_len ) {
	ref_end = maln->ref->wrap_seq_len;
      }
      else {
	ref_end = fs->ae + REALIGN_BUFFER;
      }

      /* Check to make sure the regions encompassed by ref_start to
	 ref_end is reasonable given how long this fragment is. If
	 not, just realign this whole mofo again because the reference
	 has probably changed a lot between iterations */
      if ( (ref_start + a->len2) > ref_end ) {
	ref_start = 0;
	ref_end = maln->ref->wrap_seq_len;
      }
    
      ref_frag_len = ref_end - ref_start;
      a->seq1 = &maln->ref->seq[ref_start];
      a->len1 = ref_frag_len;
      pop_s1c_in_a( a );
      
      /* If we want the homopolymer discount, the necessary arrays of
	 hp starts and lengths must be set up anew */
      if ( a->hp ) {
	pop_hpl_and_hps( a->seq2, a->len2, a->hprl, a->hprs );
	pop_hpl_and_hps( a->seq1, a->len1, a->hpcl, a->hpcs );
      }

      /* Align it! */
      dyn_prog( a );
    
      /* Find the best score */
      max_score = max_sg_score( a );

      find_align_begin( a );

      /* First, put all alignment in front_pwaln */
      populate_pwaln_to_begin( a, front_pwaln );
      
      /* Load up front_pwaln */
      strcpy( front_pwaln->ref_id, maln->ref->id );
      strcpy( front_pwaln->ref_desc, maln->ref->desc );
      
      strcpy( front_pwaln->frag_id, fs->id );
      strcpy( front_pwaln->frag_desc, fs->desc );
      
      front_pwaln->trimmed = fs->trimmed;
      front_pwaln->revcom  = fs->rc;
      front_pwaln->num_inputs = fs->num_inputs;
      front_pwaln->segment = 'a';
      front_pwaln->score = a->best_score;
  
      front_pwaln->start = a->abc + ref_start;
      front_pwaln->end   = a->aec + ref_start;

      /* Update stats for this FragSeq */
      fs->as = a->abc + ref_start;
      fs->ae = a->aec + ref_start;
      fs->unique_best = 1;
      fs->score = a->best_score;

      if ( front_pwaln->end > maln->ref->seq_len ) {
	/* This alignment wraps around - adjust the end to
	   demonstrate this for split_maln check */
	front_pwaln->end = front_pwaln->end - maln->ref->seq_len;
      }

      if ( front_pwaln->start > front_pwaln->end ) {
	/* Move wrapped bit to back_pwaln */
	split_pwaln( front_pwaln, back_pwaln, maln->ref->seq_len );
	merge_pwaln_into_maln( front_pwaln, maln );
	fs->front_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1];
	merge_pwaln_into_maln( back_pwaln, maln );
	fs->back_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1];
      }
      else { 
	merge_pwaln_into_maln( front_pwaln, maln );
	fs->front_asp = maln->AlnSeqArray[maln->num_aln_seqs - 1];
      }
    }
  }
  return;
}