Beispiel #1
0
void free_sorted_alignment(sorted_alignment_block aln){
   if(aln==NULL) return;
   free(aln->data);
   int i =0;
   for(; i < aln->in_size; ++i){
      free_sequence(aln->in_sequences[i]);
   }
   free(aln->in_sequences);
   for(i=0; i < aln->out_size; ++i){
      free_sequence(aln->out_sequences[i]);
   }
   free(aln->out_sequences);
   free(aln);
}
Beispiel #2
0
void free_alignment_block(alignment_block aln){
   if(aln==NULL) return;
   free(aln->data);
   for(int i =0; i < aln->size; ++i){
      free_sequence(aln->sequences[i]);
   }
   free(aln->sequences);
   free(aln);
}
/* free a SEQUENCES structure */
void free_sequences ( SEQUENCES *ss ) {
    
    int i;
    if (ss) {
	for ( i = 0; i < ss->num_seq; i++ ) {
	    free_sequence (ss->sequence[i]);
	}
    }
    xfree (ss);
}
int save_change_to_sequence (int seq_id, SEQUENCE *s) {

    int i, num_seq;
    num_seq = sequences->num_seq;

    for (i = 0; i < num_seq; i++) {
	if (seq_id == sequences->sequence[i]->id) {
	    free_sequence (sequences->sequence[i]);
	    sequences->sequence[i] = copy_sequence (s);
	    if (sequences->sequence[i] == NULL) return -1;
	}
    }
    return 0;
}
Beispiel #5
0
void scan_tags(SeqTagLib *lib, FileReader *fwd, FileReader *rev, int is_fq, int pdirs){
	Sequence *seq1, *seq2;
	uint64_t id;
	uint32_t i, len1, len2;
	uint8_t *buf1, *buf2, v;
	__n_seqs = lib->n_seq;
	id = 0;
	seq1 = seq2 = NULL;
	buf1 = malloc(lib->p1len);
	buf2 = malloc(lib->p2len);
	while(is_fq? (fread_fastq_adv(&seq1, fwd, FASTQ_FLAG_NO_NAME | FASTQ_FLAG_NO_QUAL) && 
		fread_fastq_adv(&seq2, rev, FASTQ_FLAG_NO_NAME | FASTQ_FLAG_NO_QUAL)) :
		(fread_fasta_adv(&seq1, fwd, FASTA_FLAG_NO_NAME) && fread_fasta_adv(&seq2, rev, FASTA_FLAG_NO_NAME))){
		id ++;
		__n_scan ++;
		len1 = seq1->seq.size;
		if(len1 > lib->p1len) len1 = lib->p1len;
		for(i=0;i<len1;i++){
			v = base_bit_table[(int)seq1->seq.string[i]];
			if(v == 4) v = lrand48() & 0x03;
			buf1[i] = v;
		}
		len2 = seq2->seq.size;
		if(len2 > lib->p2len) len2 = lib->p2len;
		for(i=0;i<len2;i++){
			v = base_bit_table[(int)seq2->seq.string[i]];
			if(v == 4) v = lrand48() & 0x03;
			buf2[i] = v;
		}
		if(pdirs & 0x01){ scan_tag_core(lib, id, buf1, len1, buf2, len2, 0); }
		if(pdirs & 0x02){ scan_tag_core(lib, id, buf1, len1, buf2, len2, 1); }
	}
	free(buf1);
	free(buf2);
	if(seq1) free_sequence(seq1);
	if(seq2) free_sequence(seq2);
}
void free_sequence (SEQUENCE *sequence) {
  if (sequence) {
    if (sequence->seq) xfree(sequence->seq);
    if (sequence->name) xfree(sequence->name);
    if (sequence->source) xfree(sequence->source);   
    if (sequence->feature_table) free_feature_table(sequence->feature_table);
    if (sequence->left_end) free_site_hang (sequence->left_end);
    if (sequence->right_end) free_site_hang (sequence->right_end);
    if (sequence->cut_site_1) free_site_hang (sequence->cut_site_1);
    if (sequence->cut_site_2) free_site_hang (sequence->cut_site_2);
    if (sequence->fragment) free_sequence (sequence->fragment);
    if (sequence->sites) free_sites (sequence->sites);
    xfree (sequence);
  }
}
Constraint_list *prepare_cl_for_moca ( Constraint_list *CL)
{
    int a, b, c;
    int tot_l, l;
    char **name, **seq;
    Sequence *NS=NULL;

    /*Prepare the constraint list*/
    CL->do_self=1;
    CL->get_dp_cost=moca_slow_get_dp_cost;
    CL->evaluate_residue_pair=moca_residue_pair_extended_list;

    /*Prepare the moca parameters*/
    (CL->moca)->evaluate_domain=evaluate_moca_domain;
    (CL->moca)->cache_cl_with_domain=cache_cl_with_moca_domain;
    (CL->moca)->make_nol_aln=make_moca_nol_aln;

    /*Prepare the packing of the sequences*/
    for ( a=0, b=1; a< (CL->S)->nseq; a++)b+=strlen ( (CL->S)->seq[a])+1;

    seq =declare_char ( 1,b+1);
    name=declare_char(  1,30);
    CL->packed_seq_lu  =declare_int ( b, 2);


    for (tot_l=1,a=0; a< (CL->S)->nseq; a++)
    {
        strcat (seq[0], (CL->S)->seq[a]);
        strcat (seq[0], "X");
        l=strlen((CL->S)->seq[a]);
        for ( c=1; c<= l; c++, tot_l++)
        {
            CL->packed_seq_lu[tot_l][0]=a;
            CL->packed_seq_lu[tot_l][1]=c;
        }
        CL->packed_seq_lu[tot_l++][0]=UNDEFINED;
    }
    sprintf ( name[0], "catseq");
    NS=fill_sequence_struc(1, seq, name, NULL);
    CL->S=add_sequence (NS, CL->S, 0);
    free_char( seq, -1);
    free_char(name, -1);
    free_sequence (NS, NS->nseq);


    return CL;
}
Alignment *free_data_in_aln (Alignment *A)
{
  //Frees only the sequence data (keeps profile information)
  A->seq_al=free_char (A->seq_al, -1);
  A->seq_comment=free_char (A->seq_comment, -1);
  A->aln_comment=free_char (A->aln_comment, -1);
  A->name=free_char (A->name, -1);
  A->expanded_order=free_char (A->expanded_order, -1);

  A->order=free_int (A->order, -1);
  A->seq_cache=free_int (A->seq_cache, -1);
  A->cdna_cache=free_int (A->cdna_cache, -1);
  A->score_res=free_int (A->score_res, -1);
  free_sequence (A->S, -1);
  A->S=NULL;
  return A;

}
Beispiel #9
0
void free_hash_alignment(hash_alignment_block aln){
   if(aln == NULL) return;
   free(aln->data);
   ENTRY *ret_val;
//   ENTRY search;
   int hc=0;
   for(int i = 0; i < aln->size; ++i){
      ENTRY search={aln->species[i],NULL};
      hc = hsearch_r(search,FIND,&ret_val,aln->sequences);
      if(hc == 0){
         fprintf(stderr,"Failed to read hash table: %s\n", strerror(errno));
         exit(1);
      }
      if(ret_val != NULL){
	 free(ret_val->key);
         free_sequence(ret_val->data);
//         free(ret_val);
      }
   }free(aln->species);
   hdestroy_r(aln->sequences);
   free(aln->sequences);
   free(aln);
}
Beispiel #10
0
/*
 * Free a C object according to a type description.  Do not free pointers at
 * the first level; they may be referenced by other fields of a sequence, and
 * will be freed by free_atype_ptr in a second pass.
 */
static void
free_atype(const struct atype_info *a, void *val)
{
    switch (a->type) {
    case atype_fn: {
        const struct fn_info *fn = a->tinfo;
        if (fn->free_func != NULL)
            fn->free_func(val);
        break;
    }
    case atype_sequence:
        free_sequence(a->tinfo, val);
        break;
    case atype_ptr: {
        const struct ptr_info *ptrinfo = a->tinfo;
        void *ptr = LOADPTR(val, ptrinfo);
        if (ptr != NULL) {
            free_atype(ptrinfo->basetype, ptr);
            free_atype_ptr(ptrinfo->basetype, ptr);
        }
        break;
    }
    case atype_offset: {
        const struct offset_info *off = a->tinfo;
        assert(off->basetype != NULL);
        free_atype(off->basetype, (char *)val + off->dataoff);
        break;
    }
    case atype_optional: {
        const struct optional_info *opt = a->tinfo;
        free_atype(opt->basetype, val);
        break;
    }
    case atype_counted: {
        const struct counted_info *counted = a->tinfo;
        void *dataptr = (char *)val + counted->dataoff;
        size_t count;
        if (load_count(val, counted, &count) == 0)
            free_cntype(counted->basetype, dataptr, count);
        break;
    }
    case atype_nullterm_sequence_of:
    case atype_nonempty_nullterm_sequence_of: {
        size_t count = get_nullterm_sequence_len(val, a->tinfo);
        free_sequence_of(a->tinfo, val, count);
        break;
    }
    case atype_tagged_thing: {
        const struct tagged_info *tag = a->tinfo;
        free_atype(tag->basetype, val);
        break;
    }
    case atype_bool:
    case atype_int:
    case atype_uint:
    case atype_int_immediate:
        break;
    default:
        abort();
    }
}
Beispiel #11
0
void test_read_sequence_from_fastq_with_bad_reads_and_long_reads()
{

  int ascii_offset=33;

  //pre-allocate space where to read the sequences
  Sequence* seq = malloc(sizeof(Sequence));
  if (seq==NULL){
    fputs("Out of memory trying to allocate a Sequence",stderr);
      exit(1);
  }

  int max_read_length=200;
  alloc_sequence(seq,max_read_length,LINE_MAX,ascii_offset);
  

  
  int length_seq;
  
  FILE* fp1 = fopen("../data/test/basic/includes_one_read_that_is_too_long.fastq", "r");
  
  // @read1
  // ACGT
  // +
  // !!!!
  // @read2
  // CCCC
  // +
  // 5555
  // @read3
  // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
  // -
  // 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
  // @read4
  // ACGT
  // +
  // 3333



  length_seq = read_sequence_from_fastq(fp1,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("read1",seq->name);
  CU_ASSERT_STRING_EQUAL("ACGT",seq->seq);
  CU_ASSERT((int) (seq->qual[0])==0);
  CU_ASSERT((int) (seq->qual[1])==0);
  CU_ASSERT((int) (seq->qual[2])==0);
  CU_ASSERT((int) (seq->qual[3])==0);
  
  length_seq = read_sequence_from_fastq(fp1,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("read2",seq->name);
  CU_ASSERT_STRING_EQUAL("CCCC",seq->seq);
  CU_ASSERT((int) (seq->qual[0])==20);
  CU_ASSERT((int) (seq->qual[1])==20);
  CU_ASSERT((int) (seq->qual[2])==20);
  CU_ASSERT((int) (seq->qual[3])==20);

  
  length_seq = read_sequence_from_fastq(fp1,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 100);
  CU_ASSERT_STRING_EQUAL("read3",seq->name);
  CU_ASSERT((int) (seq->qual[0])==15); // 0 translates as ascii 48; subtract 33 and get 15
  
  

  length_seq = read_sequence_from_fastq(fp1,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("read4",seq->name);
  CU_ASSERT_STRING_EQUAL("ACGT",seq->seq);
  CU_ASSERT((int) (seq->qual[0])==18);

  
  
  fclose(fp1);
  

  FILE* fp2 = fopen("../data/test/basic/includes_reads_with_bad_characters.fastq", "r");

  //@read1
  //ACGTACGTACGTACGT
  //+
  //WEW2WEW2WEW2WEWA
  //@read2
  //AAAA#5A
  //+
  //1234567
  //@read3
  //TTTT
  //+
  //3333



  length_seq = read_sequence_from_fastq(fp2,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 16);
  CU_ASSERT_STRING_EQUAL("read1",seq->name);
  CU_ASSERT_STRING_EQUAL("ACGTACGTACGTACGT",seq->seq);


  length_seq = read_sequence_from_fastq(fp2,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("read3",seq->name);
  CU_ASSERT_STRING_EQUAL("TTTT",seq->seq);

  
  length_seq = read_sequence_from_fastq(fp2,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 0);
  
  fclose(fp2);




  FILE* fp3 = fopen("../data/test/basic/includes_one_read_where_quality_is_longer_than_seq.fastq", "r");

  //@read1
  //ACGTACGTACGTACGT
  //+
  //WEW2WEW2WEW2WEWA
  //@read2
  //AAAA#5A
  //+
  //!!!!!!!!!!!!!!!!!!!!!!
  //@read3
  //TTTT
  //+
  //3333

  length_seq = read_sequence_from_fastq(fp3,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 16);
  CU_ASSERT_STRING_EQUAL("read1",seq->name);
  CU_ASSERT_STRING_EQUAL("ACGTACGTACGTACGT",seq->seq);

  length_seq = read_sequence_from_fastq(fp3,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("read3",seq->name);
  CU_ASSERT_STRING_EQUAL("TTTT",seq->seq);

  length_seq = read_sequence_from_fastq(fp3,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 0);
  
  fclose(fp3);

  FILE* fp4 = fopen("../data/test/basic/includes_multiline_reads.fastq", "r");

  // @read1
  // ACGT
  // +
  // @@@@
  // @read2 45 bases
  // AAAAAAAAAAAAAAA
  // CCCCCCCCCCCCCCC
  // GGGGGGGGGGGGGGG
  // +
  // 222222222222222
  // 333333333333333
  // 444444444444444
  // @read3
  // TTT
  // -
  // ggg


  length_seq = read_sequence_from_fastq(fp4,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("read1",seq->name);
  CU_ASSERT_STRING_EQUAL("ACGT",seq->seq);
  

  length_seq = read_sequence_from_fastq(fp4,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 45);
  CU_ASSERT_STRING_EQUAL("read2",seq->name);
  CU_ASSERT_STRING_EQUAL("AAAAAAAAAAAAAAACCCCCCCCCCCCCCCGGGGGGGGGGGGGGG",seq->seq);
  

  length_seq = read_sequence_from_fastq(fp4,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 3);
  CU_ASSERT_STRING_EQUAL("read3",seq->name);
  CU_ASSERT_STRING_EQUAL("TTT",seq->seq);
  


  length_seq = read_sequence_from_fastq(fp4,seq,max_read_length);
  CU_ASSERT_EQUAL(length_seq, 0);
  
  fclose(fp4);

  
 

  free_sequence(&seq);






}
Beispiel #12
0
void test_read_sequence_from_fastq(){


  int ascii_offset = 33;

  //pre-allocate space where to read the sequences
  Sequence* seq = malloc(sizeof(Sequence));
  if (seq==NULL){
    fputs("Out of memory trying to allocate a Sequence",stderr);
      exit(1);
  }
  
  alloc_sequence(seq,200,LINE_MAX, ascii_offset);
  
  int length_seq;
  FILE* fp1 = fopen("../data/test/basic/one_entry.fastq", "r");

  // 1. Read from simple fasta:
  // >Zam
  // ACGT
  // +
  // &&&&

  length_seq = read_sequence_from_fastq(fp1,seq,1000);
  
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("Zam",seq->name);
  CU_ASSERT_STRING_EQUAL("ACGT",seq->seq);
  //  CU_ASSERT_STRING_EQUAL("&&&&",seq->qual);/Zam says - /changed this line when I changed the ual reading code to offset by 33
  CU_ASSERT((int) seq->qual[0] == 5);




  FILE* fp2 = fopen("../data/test/basic/three_entries.fastq", "r");
  
  //2. Read from fastq:


  // @Zam1
  //ACGT
  //+
  //&&&&
  //@Zam2
  //AAAAAAAA
  //+
  //!((/8F+,
  //@Zam3
  //ATATATAT
  //TTTTTTTTTT
  //-
  //(((((((+AAAAAABAAA

  
  length_seq = read_sequence_from_fastq(fp2,seq, 1000);
  
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("Zam1",seq->name);
  CU_ASSERT_STRING_EQUAL("ACGT",seq->seq);
  //  CU_ASSERT_STRING_EQUAL("&&&&",seq->qual);/Zam says - /changed this line when I changed the ual reading code to offset by 33
  CU_ASSERT((int) seq->qual[0] == 5);


  length_seq = read_sequence_from_fastq(fp2,seq,1000);
  
  CU_ASSERT_EQUAL(length_seq, 8);
  CU_ASSERT_STRING_EQUAL("Zam2",seq->name);
  CU_ASSERT_STRING_EQUAL("AAAAAAAA",seq->seq);
  CU_ASSERT((int) seq->qual[0] == 0);//! is quality 0 - take 33 off its ascii code, can check on http://www.asciitable.com/
  CU_ASSERT((int) seq->qual[1] == 7);// ( is quality 7
  CU_ASSERT((int) seq->qual[2] == 7);// ( is quality 7
  CU_ASSERT((int) seq->qual[3] == 14);// / is quality 14
  CU_ASSERT((int) seq->qual[4] == 23);// 8 is quality 23
  CU_ASSERT((int) seq->qual[5] == 37);// F is quality 37
  CU_ASSERT((int) seq->qual[6] == 10);// + is quality 10
  CU_ASSERT((int) seq->qual[7] == 11);// , is quality 11



  length_seq = read_sequence_from_fastq(fp2,seq,1000);
  
  CU_ASSERT_EQUAL(length_seq, 18);
  CU_ASSERT_STRING_EQUAL("Zam3",seq->name);
  CU_ASSERT_STRING_EQUAL("ATATATATTTTTTTTTTT",seq->seq);
  CU_ASSERT((int) seq->qual[0] == 7);// ( is quality 7

  length_seq = read_sequence_from_fastq(fp2,seq,1000);

  CU_ASSERT_EQUAL(length_seq, 0);

  fclose(fp2);
  free_sequence(&seq);
}
Beispiel #13
0
//test reading several short entries from a fasta file
void test_read_sequence_from_fasta(){

  Sequence * seq = malloc(sizeof(Sequence));
  boolean full_entry;

  if (seq == NULL){
    fputs("Out of memory trying to allocate Sequence\n",stderr);
    exit(1);
  }

  //pre-allocate space where to read the sequences
  alloc_sequence(seq,200,LINE_MAX, 0);

  int length_seq;
  FILE* fp1 = fopen("../data/test/basic/one_entry.fasta", "r");
  
  if (fp1 == NULL){
    fputs("cannot open file:../data/test/basic/one_entry.fasta\n",stderr);
    exit(1);
  }

  // 1. Read from simple fasta:
  // >Zam
  // ACGT
  // ACGTACGTACGT
  
  length_seq = read_sequence_from_fasta(fp1,seq,1000,true,&full_entry,0);

  CU_ASSERT_EQUAL(length_seq, 16);
  CU_ASSERT_STRING_EQUAL("Zam",seq->name);
  CU_ASSERT_EQUAL(1,seq->start);
  CU_ASSERT_EQUAL(16,seq->end);
  CU_ASSERT_STRING_EQUAL("ACGTACGTACGTACGT",seq->seq);
  CU_ASSERT(full_entry);
  fclose(fp1);

  FILE* fp2 = fopen("../data/test/basic/three_entries.fasta", "r");

  // 2. Read from fasta:
  //>Zam1
  //ACGT
  //ACGTACGTACGT
  //>Zam2
  //ACGT
  //ACGTACGTACGT
  //TTTTTTTT
  //>Zam3
  //ACGTNNACGTACGTACGT

  length_seq = read_sequence_from_fasta(fp2,seq,1000,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 16);
  CU_ASSERT_STRING_EQUAL("Zam1",seq->name);
  CU_ASSERT_EQUAL(1,seq->start);
  CU_ASSERT_EQUAL(16,seq->end);
  CU_ASSERT_STRING_EQUAL("ACGTACGTACGTACGT",seq->seq);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,1000,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 24);
  CU_ASSERT_STRING_EQUAL("Zam2",seq->name);
  CU_ASSERT_EQUAL(1,seq->start);
  CU_ASSERT_EQUAL(24,seq->end);
  CU_ASSERT_STRING_EQUAL("ACGTACGTACGTACGTTTTTTTTt",seq->seq);
  CU_ASSERT(full_entry);
   

  length_seq = read_sequence_from_fasta(fp2,seq,1000,true,&full_entry,0);

  
  CU_ASSERT_EQUAL(length_seq, 18);
  CU_ASSERT_STRING_EQUAL("Zam3",seq->name);
  CU_ASSERT_EQUAL(1,seq->start);
  CU_ASSERT_EQUAL(18,seq->end);
  CU_ASSERT_STRING_EQUAL("ACGTNNACGTACGTACGT",seq->seq);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,1000,true,&full_entry,0);

  CU_ASSERT_EQUAL(length_seq, 0);
  CU_ASSERT(full_entry);
   
  fclose(fp2);
  free_sequence(&seq);

}
Beispiel #14
0
void test_read_sequence_from_fasta_when_file_has_bad_reads()
{

  int length_seq;
  Sequence * seq = malloc(sizeof(Sequence));
  boolean full_entry;
  
  if (seq == NULL){							
    fputs("Out of memory trying to allocate Sequence\n",stderr);	
    exit(1);								
  }
  //pre-allocate space where to read the sequences
  int max_read_length=100;
  alloc_sequence(seq,max_read_length,LINE_MAX, 0);
 
  FILE* fp2= fopen("../data/test/basic/includes_reads_that_have_bad_characters.fasta", "r");

  // >read1
  // AAAAAAAAAAAA9
  // >read2
  // ¡€#9∞§¶#¶•#•#•#ª#ª#ª#ªº#º#º#º––––
  // >read3 4 c's
  // CCCC
  // >read4 10 Ts
  // TTTTTTTTTT
  // >read5
  // $
  // >read6
  // AAAAAAAAAAAAAAAAAA#A
  // >read7
  // AAA

  length_seq = read_sequence_from_fasta(fp2,seq,max_read_length,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 13);
  CU_ASSERT_STRING_EQUAL("read1",seq->name);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,max_read_length,true,&full_entry,0);

  CU_ASSERT_EQUAL(length_seq, 63);
  CU_ASSERT_STRING_EQUAL("read2",seq->name);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,max_read_length,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("read3",seq->name);
  CU_ASSERT_STRING_EQUAL("CCCC",seq->seq);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,max_read_length,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 10);
  CU_ASSERT_STRING_EQUAL("read4",seq->name);
  CU_ASSERT_STRING_EQUAL("TTTTTTTTTT",seq->seq);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,max_read_length,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 1);
  CU_ASSERT_STRING_EQUAL("read5",seq->name);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,max_read_length,true,&full_entry,0);
 
  CU_ASSERT_EQUAL(length_seq, 20);
  CU_ASSERT_STRING_EQUAL("read6",seq->name);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,max_read_length,true,&full_entry,0);
 
  CU_ASSERT_EQUAL(length_seq, 3);
  CU_ASSERT_STRING_EQUAL("read7",seq->name);
  CU_ASSERT_STRING_EQUAL("AAA",seq->seq);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp2,seq,max_read_length,true,&full_entry,0);
  CU_ASSERT_EQUAL(length_seq, 0);
  CU_ASSERT(full_entry);
   
  fclose(fp2);


  //now make sure we do not get trapped in an infinite loop if the last read of a file is bad

  FILE* fp3= fopen("../data/test/basic/includes_final_read_that_has_bad_characters.fasta", "r");

  // >read1
  // AAAAAAAAAAAA9
  // >read2
  // ¡€#9∞§¶#¶•#•#•#ª#ª#ª#ªº#º#º#º––––
  // >read3 4 c's
  // CCCC
  // >read4 10 Ts
  // TTTTTTTTTT
  // >read5
  // $
  // >read6
  // AAAAAAAAAAAAAAAAAA#A


  length_seq = read_sequence_from_fasta(fp3,seq,max_read_length,true,&full_entry,0);

  CU_ASSERT_EQUAL(length_seq, 13);
  CU_ASSERT_STRING_EQUAL("read1",seq->name);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp3,seq,max_read_length,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 63);
  CU_ASSERT_STRING_EQUAL("read2",seq->name);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp3,seq,max_read_length,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 4);
  CU_ASSERT_STRING_EQUAL("CCCC",seq->seq);
  CU_ASSERT_STRING_EQUAL("read3",seq->name);
  CU_ASSERT(full_entry);
  
  length_seq = read_sequence_from_fasta(fp3,seq,max_read_length,true,&full_entry,0);
  
  CU_ASSERT_EQUAL(length_seq, 10);
  CU_ASSERT_STRING_EQUAL("read4",seq->name);
  CU_ASSERT_STRING_EQUAL("TTTTTTTTTT",seq->seq);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp3,seq,max_read_length,true,&full_entry,0);

  CU_ASSERT_EQUAL(length_seq, 1);
  CU_ASSERT_STRING_EQUAL("read5",seq->name);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp3,seq,max_read_length,true,&full_entry,0);

  
  CU_ASSERT_EQUAL(length_seq, 20);
  CU_ASSERT_STRING_EQUAL("read6",seq->name);
  CU_ASSERT(full_entry);

  length_seq = read_sequence_from_fasta(fp3,seq,max_read_length,true,&full_entry,0);

  CU_ASSERT_EQUAL(length_seq, 0);
  CU_ASSERT(full_entry);

  fclose(fp3);



  free_sequence(&seq);

}
Sequence *free_constraint_list (Constraint_list *CL)
    {
    Sequence *S;
    int a, b;
    Constraint_list *pCL;


    /*Prepare the selective freeing of the CL data structure:
      If the CL has been obtained from copy, every pointer that is identical to the parent CL (CL->pCL)
      will not be saved.
    */


    if ( !CL)return NULL;
    else S=CL->S;

    if ( CL->copy_mode==SOFT_COPY && !CL->pCL)
      {vfree(CL); return S;}
    else if ( CL->copy_mode==SOFT_COPY)
      {

	pCL=CL->pCL;
	CL->residue_index=NULL;

	if ( CL->M                      ==pCL->M                       )CL->M=NULL;

	if (CL->start_index             ==pCL->start_index             )CL->start_index=NULL;
	if (CL->end_index             ==pCL->end_index                 )CL->end_index=NULL;

	if ( CL->fp                     ==pCL->fp                      )CL->fp=NULL;
	if ( CL->matrices_list          ==pCL->matrices_list           )CL->matrices_list=NULL;


	if ( CL->STRUC_LIST             ==pCL->STRUC_LIST              )CL->STRUC_LIST=NULL;
	if ( CL->W                      ==pCL->W                       )CL->W=NULL;
	if ( CL->DM                     ==pCL->DM                      )CL->DM=NULL;
	if ( CL->ktupDM                 ==pCL->ktupDM                      )CL->ktupDM=NULL;


	if ( CL->translation            ==pCL->translation             )CL->translation=NULL;
	if ( CL->moca                   ==pCL->moca                    )CL->moca=NULL;
	if ( CL->Prot_Blast             ==pCL->Prot_Blast              )CL->Prot_Blast=NULL;
	if ( CL->DNA_Blast              ==pCL->DNA_Blast               )CL->DNA_Blast=NULL;
	if ( CL->Pdb_Blast              ==pCL->Pdb_Blast               )CL->Pdb_Blast=NULL;
	if ( CL->seq_for_quadruplet     ==pCL->seq_for_quadruplet      )CL->seq_for_quadruplet=NULL;
	if ( CL->TC                      ==pCL->TC                       )CL->TC=NULL;

      }


    /*End of selective freeing of the CL data structure*/



    if ( CL->residue_index)free_arrayN(CL->residue_index, 3);

    if ( CL->M)free_int (CL->M, -1);
    if ( CL->fp)vfclose (CL->fp);
    if ( CL->matrices_list)free_char(CL->matrices_list,-1);


    if ( CL->start_index)free_int ( CL->start_index,-1);
    if ( CL->end_index)free_int ( CL->end_index,-1);




    if ( CL->STRUC_LIST)free_sequence ( CL->STRUC_LIST, (CL->STRUC_LIST)->nseq);
    if ( CL->W)free_weights (CL->W);

    CL->DM=free_distance_matrix (CL->DM);
    CL->ktupDM=free_distance_matrix (CL->ktupDM);

    if ( CL->translation)vfree(CL->translation);
    if ( CL->moca)free_moca (CL->moca);
    if ( CL->Prot_Blast)free_blast_param ( CL->Prot_Blast);
    if ( CL->DNA_Blast) free_blast_param ( CL->DNA_Blast);
    if ( CL->Pdb_Blast) free_blast_param ( CL->Pdb_Blast);
    if ( CL->TC) free_TC_param ( CL->TC);

    if (CL->seq_for_quadruplet)vfree (CL->seq_for_quadruplet);

    vfree(CL);
    return S;
    }
Constraint_list *free_constraint_list_full (Constraint_list *CL)
{
  free_sequence (free_constraint_list (CL), -1);
  return NULL;
}
Beispiel #17
0
int main(int argc, char *argv[])
{
  sequence *seq;       /* pointer to input sequence data structure  */
  image    **imgs;     /* pointer to input sequence array of images */
  image    *out;       /* pointer the output image */

  char  seq_in[100],  /* input sequence name */
        seq_out[100], /* output sequence name */
        file_out[100]; /* output file name */

  int length;         /* length of the sequence */
  int i;              /* loop index */

  /* Process input arguments */
  if(argc != 3)
    {
      fprintf(stderr,"Usage: %s [input PGM sequence name] [output PGM sequence name]\n", argv[0]);
      exit(0);
    }
  else
    {
      strcpy(seq_in, argv[1]);
      strcpy(seq_out, argv[2]);
    }

  /* Load the input sequence */
  seq = load_sequence(seq_in);

  /* Get a pointer to the sequence images */
  imgs = get_sequence_images(seq);

  /* Get sequence length */
  length = get_sequence_length(seq);

  /* Check if sequence is long enough */
  if(length < 2) {
    fprintf(stderr, "sequence is too short\n");
    exit(1);
  }

  /* Clone the first image to store results */
  out = clone_image(imgs[0]);

  /* Loop through the sequence and write the mean sequence       */
  /* i.e. for each image of the input sequence, write the image  */
  /* resulting from the mean between the current image and       */
  /* the next one.                                               */

  for(i = 0; i < length - 1; i++) {
    /* Compute the mean between the current picture and the next one */
    mean_image(imgs[i], imgs[i+1], out);

    /* Now write out the output image */
    sprintf(file_out, "%s%03d.pgm", seq_out, i);
    printf("Writing out %s\n",file_out);
    pgm_write_image(out, file_out);
  }

  /* We must deallocate the memory the sequence and for the image before */
  /* we finish */
  free_image(out);
  free_sequence(seq);

  return(EXIT_SUCCESS);
}
Beispiel #18
0
seq get_sequence(char *data){
   if(data == NULL) return NULL;
   char *seq_parse;
   char *src_parse;
   seq new_seq = malloc(sizeof(*new_seq));
   assert(new_seq!=NULL);
   new_seq->src=NULL;
   new_seq->sequence=NULL;
   char *temp = strdup(data);
   assert(temp!=NULL);
//First part of entry, is the 's', throw that away
   char *datum =strtok_r(temp," \t\n",&seq_parse);
   for(int i=2;i<8;++i){
      datum = strtok_r(NULL," \t\n",&seq_parse);
      if(datum == NULL){
         fprintf(stderr,"Invalid sequence: %s\n", data);
         free_sequence(new_seq);
         return NULL;
      }
   switch (i){
//Second part is species name and contig
      case 2: 
         new_seq->src = strdup(datum);
         char *parse_src = strdup(datum);
         new_seq->species = strtok_r(parse_src,".",&src_parse);
         new_seq->scaffold = strtok_r(NULL,".",&src_parse);
         assert(new_seq->src != NULL);
         break;
//Third part is the start of the aligned region in the source sequence
      case 3:
        errno=0;
        unsigned long start = strtol(datum,NULL,10);   
        if(errno !=0){
           fprintf(stderr, "Invalid sequence start: %s\nIn sequence: %s\n"
             ,datum,data);
           free_sequence(new_seq);
           return NULL;
        }
       new_seq->start=start;
       break;
//Fourth is aligned sequence length
      case 4:
        errno=0;
        unsigned int size = strtol(datum,NULL,10);
        if(errno !=0){
           fprintf(stderr, "Invalid sequence start: %s\nIn sequence: %s\n"
             ,datum,data);
           free_sequence(new_seq);
           return NULL; 
        }
        new_seq->size = size;
        break;
//Fifth is strand
      case 5:
        if(datum[0] != '+' && datum[0] != '-'){
           fprintf(stderr, "Invalid strand: %s\nIn sequence: %s\n"
             ,datum,data);
           free_sequence(new_seq);
           return NULL;
        }
        new_seq->strand=datum[0];
        break;
//Sixth is size of source sequence
      case 6:
        errno=0;
        unsigned long srcSize = strtol(datum,NULL,10);
        if(errno !=0){
            fprintf(stderr, "Invalid source sequence size: %s\nIn sequence: %s\n"
             ,datum,data);
            free_sequence(new_seq);
            return NULL;
        }
        new_seq->srcSize = srcSize;
        break;
//Last is the sequence itself
     case 7:
         new_seq->sequence = strdup(datum);
         assert(new_seq->sequence !=NULL);
         break;
     default:
       printf("Default case\n");
    }
  }
  free(temp);
  return new_seq;
}
Beispiel #19
0
sorted_alignment_block get_sorted_alignment(maf_linear_parser parser, 
                    char **in_group, int in_size, char **out_group, int out_size){
   sorted_alignment_block new_align = NULL;
   int in_block=0;
   long bytesread;
   int sizeLeftover=0;
   int bLoopCompleted = 0;
   int first = 1;
   char *datum;
   char *npos;
   char *temp;
   char *species;
   do{
      if(parser->fill_buf){
         bytesread = fread(parser->buf+sizeLeftover, 1,
            sizeof(parser->buf)-1-sizeLeftover, parser->maf_file);
         if (bytesread<1){
            bLoopCompleted = 1;
            bytesread  = 0;
            continue;
         }
        if(ferror(parser->maf_file) != 0){
                fprintf(stderr, "File stream error: %s\nError: %s",
                   parser->filename,strerror(errno));
                return NULL;
         }
         parser->buf[sizeLeftover+bytesread]=0;
         parser->curr_pos=0;
         parser->pos=parser->buf;
         --parser->fill_buf;
     }
     npos = strchr(parser->pos,'\n');

     if(npos==NULL){
        sizeLeftover = strlen(parser->pos);
        memmove(parser->buf,parser->buf+(sizeof(parser->buf))-sizeLeftover-1,sizeLeftover);
        ++parser->fill_buf;
        continue;
     }
     *npos=0;
     datum = parser->pos;
     parser->pos = npos+1;
//If we've yet to enter an alignment block, and the first character
//of the line isn't 'a', then skip over it.
      if(!in_block && datum[0]!='a') continue;
//***HANDLE SCORE/PASS/DATA here**i
      else if(datum[0]=='a'){
//If we find an 'a' after entering a block, then this is a new block
//so rewind the file pointer and break out of read loop.
         if(in_block){
            *npos='\n';
            parser->pos = datum;
            break;
         }
//Else we're starting a new alignment block, initialize the data
//structure and set in_block to true.
         new_align=malloc(sizeof(*new_align));
         assert(new_align != NULL);
         new_align->in_sequences = malloc(16*sizeof(*new_align->in_sequences));
         assert(new_align->in_sequences != NULL);
         new_align->in_size=0;
         new_align->in_max=16;
         new_align->out_sequences = malloc(16*sizeof(*new_align->out_sequences));
         assert(new_align->out_sequences != NULL);
         new_align->out_size=0;
         new_align->out_max=16;
         new_align->data = NULL;
         new_align->seq_length=0;
         in_block=1;
         continue;
      }
//If in a block and find 's', then it's a sequence to add to the
//current alignment block, parse it, reallocate alignment block's
//sequence array if necessary, and store the new sequence.
      else if(datum[0]=='s'){
         seq new_seq = get_sequence(datum);
         if(new_seq == NULL){
           fprintf(stderr, "Invalid sequence entry %s\n",datum);
           return NULL;
         }
         if(first){
            new_align->seq_length=strlen(new_seq->sequence);
            first = 0;
         }
         temp = strdup(new_seq->src);
         assert(temp != NULL);
         species = strtok(temp,".");
         if(in_list(species,in_group,in_size)){
            if(new_align->in_size ==new_align->in_max){
               new_align->in_sequences=realloc(new_align->in_sequences,
                  2*new_align->in_max*sizeof(seq));
               assert(new_align->in_sequences!=NULL);
               new_align->in_max *=2;
            }new_align->in_sequences[new_align->in_size++]=new_seq;
         }else if(in_list(species,out_group,out_size)){
            if(new_align->out_size ==new_align->out_max){
               new_align->out_sequences=realloc(new_align->out_sequences,
                  2*new_align->out_max*sizeof(seq));
               assert(new_align->out_sequences!=NULL);
               new_align->out_max *=2;
            }new_align->out_sequences[new_align->out_size++]=new_seq;
//If not in in group or out group, throw away.
         }else free_sequence(new_seq);
         free(temp);
      }
//If we hit a character other than 'a' or 's', then we've exited
//the current alignment block, break out of the read loop and return
//the current alignment block.
      else break;
   }while(!bLoopCompleted);
   return new_align;
}