Example #1
0
/****************************************************************************
 * Extract a small alignment out of the middle of a larger alignment.
 ****************************************************************************/
ALIGNMENT_T* extract_subalignment
  (int start,
   int width,
   ALIGNMENT_T* alignment)
{
  int num_sequences = get_num_aligned_sequences(alignment);
  SEQ_T** sequences = get_alignment_sequences(alignment);
  SEQ_T** subsequences = (SEQ_T**)mm_malloc(num_sequences * sizeof(SEQ_T*));

  // Extract the specified columns into a new list of sequences.
  int i_seq = 0;
  char* subsequence = mm_malloc((width + 1) * sizeof(char));
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    SEQ_T* this_seq = sequences[i_seq];
    char* raw_seq = get_raw_sequence(this_seq);
    strncpy(subsequence, raw_seq + start, width);
    subsequence[width] = '\0';
    subsequences[i_seq] = 
      allocate_seq(get_seq_name(this_seq),
		   get_seq_description(this_seq),
		   get_seq_offset(this_seq), 
		   subsequence);
  }

  // Extract the consensus string in the specified columns.
  char* consensus = get_consensus_string(alignment);
  char* subconsensus = mm_malloc(sizeof(char) * (width + 1));
  strncpy(subconsensus, consensus + start, width);
  subconsensus[width] = '\0';

  // Allocate and return the new alignment.
  ALIGNMENT_T* subalignment 
    = allocate_alignment(get_alignment_name(alignment),
			 get_alignment_description(alignment),
			 num_sequences,
			 subsequences,
			 subconsensus);

  // Free local dynamic memory.
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    free_seq(subsequences[i_seq]);
  }
  myfree(subsequences);
  myfree(subsequence);
  return(subalignment);
}
Example #2
0
File: ramen.c Project: CPFL/gmeme
void ramen_scan_sequences() {
		FILE* seq_file = NULL;
		MOTIF_T* motif = NULL;
		MOTIF_T* rev_motif = NULL;
		SEQ_T* sequence = NULL;
		SCANNED_SEQUENCE_T* scanned_seq = NULL;
		PATTERN_T* pattern;
		int i;
		int j;
		SEQ_T** seq_list;
		int num_seqs;
		int seq_len;
		//For the bdb_bg mode:
		ARRAY_T* seq_bg_freqs;
		double atcontent;
		double roundatcontent;
		double avg_seq_length = 0;

		//Open the file.
		if (open_file(args.sequence_filename, "r", FALSE, "FASTA", "sequences", &seq_file) == 0) {
				fprintf(stderr, "Couldn't open the file %s.\n", args.sequence_filename);
				ramen_terminate(1);
		}

		//Start reading in the sequences
		read_many_fastas(ramen_alph, seq_file, MAX_SEQ_LENGTH, &num_seqs, &seq_list);


		seq_ids = new_string_list();
		seq_fscores = allocate_array(num_seqs);

		//Allocate the required space for results
		results = malloc(sizeof(double*) * motifs.num);
		for (i=0;i<motifs.num;i++) {
				results[i] = malloc(sizeof(double)*num_seqs);
		}

		for (j=0;j<num_seqs;j++) {

				fprintf(stderr, "\rScanning %i of %i sequences...", j+1, num_seqs);

				//copy the pointer into our current object for clarity
				sequence = seq_list[j];

				//Read the fluorescence data from the description field.
				add_string(get_seq_name(sequence),seq_ids);
				seq_len = get_seq_length(sequence);
				set_array_item(j,atof(get_seq_description(sequence)),seq_fscores);

				//Scan with each motif.
				for (i=0;i<motifs.num;i++) {
						int motifindex = i*2;

						results[i][j] = ramen_sequence_scan(sequence, motif_at(motifs.motifs, motifindex), 
											      motif_at(motifs.motifs, motifindex+1),
											      NULL, NULL, //No need to pass PSSM.
										              AVG_ODDS, 0, TRUE, 0, motifs.bg_freqs);

						if (TRUE == args.linreg_normalise) {
								int k;
								double maxscore = 1;
								motif = motif_at(motifs.motifs,motifindex); 
								for (k=0;k<get_motif_length(motif);k++) {
										double maxprob = 0;
										if (maxprob < get_matrix_cell(k, alph_index(ramen_alph, 'A'), get_motif_freqs(motif)))
												maxprob = get_matrix_cell(k, alph_index(ramen_alph, 'A'), get_motif_freqs(motif));
										if (maxprob < get_matrix_cell(k, alph_index(ramen_alph, 'C'), get_motif_freqs(motif)))
												maxprob = get_matrix_cell(k, alph_index(ramen_alph, 'C'), get_motif_freqs(motif));
										if (maxprob < get_matrix_cell(k, alph_index(ramen_alph, 'G'), get_motif_freqs(motif)))
												maxprob = get_matrix_cell(k, alph_index(ramen_alph, 'G'), get_motif_freqs(motif));
										if (maxprob < get_matrix_cell(k, alph_index(ramen_alph, 'T'), get_motif_freqs(motif)))
												maxprob = get_matrix_cell(k, alph_index(ramen_alph, 'T'), get_motif_freqs(motif));
										maxscore *= maxprob;
								}
								results[i][j] /= maxscore;
						}
				}
		}

}
Example #3
0
/****************************************************************************
 * Remove from the alignment all columns that contain gaps for the
 * specified species.
 ****************************************************************************/
ALIGNMENT_T* remove_alignment_gaps
  (char*        species,
   ALIGNMENT_T* alignment)
{
  // Locate this species in the alignment.
  int species_index = get_index_in_string_list(species, 
					       get_species_names(alignment));
  if (species_index == -1) {
    die("Can't find %s in alignment.\n", species);
  }
  SEQ_T* this_seq = get_alignment_sequence(species_index, alignment);

  // Get the dimensions of the original matrix.
  int num_sequences = get_num_aligned_sequences(alignment);
  int alignment_length = get_alignment_length(alignment);

  // Allocate memory for raw sequences that will constitute the new alignment.
  char** raw_sequences = (char**)mm_malloc(sizeof(char*) * num_sequences);
  int i_seq = 0;
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    raw_sequences[i_seq] 
      = (char*)mm_calloc(alignment_length + 1, sizeof(char*));
  }
  char* consensus = get_consensus_string(alignment);
  char* new_consensus 
    = (char*)mm_calloc(alignment_length + 1, sizeof(char*));

  // Iterate over all columns.
  int i_column;
  int i_raw = 0;
  for (i_column = 0; i_column < alignment_length; i_column++) {

    // Is there a gap?
    char this_char = get_seq_char(i_column, this_seq);
    if ((this_char != '-') && (this_char != '.')) {

      // If no gap, then copy over this column.
      for (i_seq = 0; i_seq < num_sequences; i_seq++) {
	SEQ_T* this_sequence = get_alignment_sequence(i_seq, alignment);
	char this_char = get_seq_char(i_column, this_sequence);
				      
	raw_sequences[i_seq][i_raw] = this_char;
      }
      new_consensus[i_raw] = consensus[i_column];
      i_raw++;
    }
  }

  // Create new sequence objects.
  SEQ_T** new_sequences = (SEQ_T**)mm_malloc(num_sequences * sizeof(SEQ_T*));
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    SEQ_T* this_sequence = get_alignment_sequence(i_seq, alignment);
    new_sequences[i_seq] = allocate_seq(get_seq_name(this_sequence),
					get_seq_description(this_sequence),
					get_seq_offset(this_sequence),
					raw_sequences[i_seq]);
  }

  // Allocate and return the new alignment.
  ALIGNMENT_T* new_alignment
    = allocate_alignment(get_alignment_name(alignment),
			 get_alignment_description(alignment),
			 num_sequences,
			 new_sequences,
			 new_consensus);
  
  // Free local dynamic memory.
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    myfree(raw_sequences[i_seq]);
    free_seq(new_sequences[i_seq]);
  }
  myfree(raw_sequences);
  myfree(new_sequences);
  myfree(new_consensus);

  return(new_alignment);
}
Example #4
0
/****************************************************************************
 * Allocate one alignment object. Name and description may be NULL.
 * 
 * Returns a pointer to the newly created alignment.
 ****************************************************************************/
ALIGNMENT_T* allocate_alignment(
   char* name,
   char* description,
   int num_sequences,
   SEQ_T** sequences,
   char* consensus_string
)
{
  assert(num_sequences > 0);
  assert(sequences != NULL);
  assert(consensus_string != NULL);

  // Allocate the alignment object.
  ALIGNMENT_T* new_alignment = (ALIGNMENT_T*)mm_malloc(sizeof(ALIGNMENT_T));
  if (new_alignment == NULL) {
    die("Error allocating alignment\n");
  }

  // Store the name, truncating if necessary.
  if (name != NULL) {
    strncpy(new_alignment->name, name, MAX_ALIGNMENT_NAME);
    new_alignment->name[MAX_ALIGNMENT_NAME] = '\0';
    if (strlen(new_alignment->name) != strlen(name)) {
      fprintf(stderr, "Warning: truncating alignment program name %s to %s.\n",
	      name, new_alignment->name);
    }
  } else {
    new_alignment->name[0] = '\0';
  }

  // Store the description, truncating if necessary.
  if (description != NULL) {
    strncpy(new_alignment->desc, description, MAX_ALIGNMENT_COMMENT);
    new_alignment->desc[MAX_ALIGNMENT_COMMENT] = '\0';
  } else {
    new_alignment->desc[0] = '\0';
  }

  // Store the sequences.
  new_alignment->sequences = (SEQ_T**) mm_malloc(num_sequences * sizeof(SEQ_T*));
  if (new_alignment->sequences == NULL) {
    die("Error allocating sequences\n");
  }
  new_alignment->num_sequences = num_sequences;
  int seq_length = strlen(get_raw_sequence(sequences[0]));
  int i;
  for (i = 0; i < num_sequences; i++) {
    myassert(TRUE,
	     strlen(get_raw_sequence(sequences[i])) == seq_length,
	     "Sequence #1 (%s) is length=%d, but sequence #%d (%s) is length=%d.\n<%s>\n",
	     get_seq_name(sequences[0]), seq_length, i, 
	     get_seq_name(sequences[i]), strlen(get_raw_sequence(sequences[i])),
	     get_raw_sequence(sequences[i]));
    new_alignment->sequences[i] = 
      allocate_seq(get_seq_name(sequences[i]),
        get_seq_description(sequences[i]),
        get_seq_offset(sequences[i]), 
        get_raw_sequence(sequences[i])
      );
  }

  // Fill in the remaining fields.
  new_alignment->length = seq_length;
  copy_string(&(new_alignment->consensus_string), consensus_string);

  return(new_alignment);
}