Sequence * translate_swapped(Sequence * swapped) { CodonTable * ct; int i,j; Sequence * out; out = Sequence_alloc(); out->name = stringalloc(swapped->name); out->seq = calloc(1+swapped->len/3,sizeof(char)); ct = read_CodonTable_file("codon.table"); for(i=0,j=0;i<swapped->len;i+=3,j++) { out->seq[j] = aminoacid_from_seq(ct,swapped->seq+i); if( isupper(swapped->seq[i]) && isupper(swapped->seq[i+1]) && isupper(swapped->seq[i+2]) ) { out->seq[j] = toupper(out->seq[j]); } else{ out->seq[j] = tolower(out->seq[j]); } } out->seq[j] = '\0'; return out; }
cDNA * get_cDNA_from_Transcript(Transcript * trs) { Genomic * gn; Sequence * base; int i; char buffer[64]; if( trs->cDNA != NULL) return trs->cDNA; if( trs->parent == NULL ) { warn("Cannot get cDNA, as no parent Gene!"); return NULL; } if ( (gn = get_Genomic_from_Gene(trs->parent)) == NULL ) { warn("Cannot get cDNA, as cannot get Genomic sequence from Gene"); return NULL; } base = Sequence_alloc(); sprintf(buffer,"%s.sp",Genomic_name(gn)); base->name = stringalloc(buffer); base->seq = ckcalloc(length_Transcript(trs)+1,sizeof(char)); base->seq[0]='\0'; for(i=0;i<trs->ex_len;i++) { strncat(base->seq,gn->baseseq->seq+trs->exon[i]->start,trs->exon[i]->end-trs->exon[i]->start); } make_len_type_Sequence(base); base->type = SEQUENCE_CDNA; trs->cDNA = cDNA_from_Sequence(base); return trs->cDNA; }
Protein * get_Protein_from_Translation(Translation * ts,CodonTable * ct) { cDNA * cd; int i,j; Sequence * seq; char buffer[64]; assert(ts); assert(ct); /* fprintf(stderr,"Codon table is %d\n",ct);*/ if( ts->protein != NULL) return ts->protein; if( ts->parent == NULL ) { warn("Cannot get Protein from translation as no parent!"); return NULL; } cd = get_cDNA_from_Transcript(ts->parent); if( cd == NULL ) { warn("Cannot make translation as can't get transcript!"); return NULL; } if( cd->baseseq == NULL ) { warn("A bad error - a non NULL cDNA with a null sequence object. No translation here!"); return NULL; } if( cd->baseseq->len == 0 ) { warn("Attempting to translate a zero length cDNA. Yikes!"); return NULL; } seq = Sequence_alloc(); sprintf(buffer,"%s.tr",cDNA_name(cd)); seq->name = stringalloc(buffer); seq->seq = ckcalloc((cd->baseseq->len/3) + 2,sizeof(char)); seq->type = SEQUENCE_PROTEIN; if( cd->baseseq->len%3 != 0 ) { warn("Problem in making translation, cDNA is not mod3! - length is %d - transcript id %s",cd->baseseq->len,seq->name); } for(i=0,j=0;i<cd->baseseq->len;i+=3,j++) { if( is_stop_codon(codon_from_seq(cd->baseseq->seq+i),ct) == TRUE ) { if( i+3 >= cd->baseseq->len ) break; else { warn("Got a stop codon in the middle of a translation at postion [%d]. Yuk!",i); seq->seq[j] = '*'; } } else { seq->seq[j] = aminoacid_from_seq(ct,cd->baseseq->seq+i); } } seq->seq[j]='\0'; make_len_type_Sequence(seq); /*write_fasta_Sequence(seq,stdout);*/ seq->type = SEQUENCE_PROTEIN; ts->protein = Protein_from_Sequence(seq); return ts->protein; }
Sequence * extract_dna_LinkStream(LinkStream * ln,LinkNumberArrayDebug * lnad,int nmer_size) { LinkStream * runner; LinkStream * prev; LinkStream * next; int i = 0; Sequence * out; int flipped = 0; assert(ln); i++; if( lnad->extraction != 0 ) { fprintf(lnad->ofp,"Extracting DNA from linkstream %d\n",ln); } ln->have_seen = 1; if( ln->a == NULL ) { if( ln->b->x == ln ) { runner = ln->b->y; } else { runner = ln->b->x; } } else { if( ln->a->x == ln ) { runner = ln->a->y; } else { runner = ln->a->x; } } prev = ln; while( runner != NULL ) { /* to find the outgoing link from here, test neither a nor b is not NULL and figure out the right way to go from the fact that the link is back to ourselves */ if( runner->a == NULL || runner->b == NULL ) { /* other end of stream */ break; } runner->have_seen = 1; i++; if( lnad->extraction > 2 ) { fprintf(lnad->ofp,"Extracting DNA from linkstream %d, runner %d, position count %d\n",ln,runner,i); } if( runner->a->x == runner && runner->a->y != prev) { next = runner->a->y; } else if ( runner->a->y == runner && runner->a->x != prev ) { next = runner->a->x; } else if( runner->b->x == runner && runner->b->y != prev) { next = runner->b->y; } else if ( runner->b->y == runner && runner->b->x != prev ) { next = runner->b->x; } else { fatal("Unable to move off edge!"); } prev = runner; runner = next; } out = Sequence_alloc(); out->seq = calloc(i+1,sizeof(char)); i = 0; flipped = ln->starting_flip; if( ln->a == NULL ) { out->seq[0] = first_char_from_dnanumber(ln->number,nmer_size,flipped); } else { /* b is NULL, indicating a 3' end of a sequence, so flip the flip*/ out->seq[0] = first_char_from_dnanumber(ln->number,nmer_size,!flipped); } if( ln->a == NULL ) { if( ln->b->x == ln ) { runner = ln->b->y; } else { runner = ln->b->x; } if( ln->b->twist == 1 ) { flipped = !flipped; } } else { if( ln->a->x == ln ) { runner = ln->a->y; } else { runner = ln->a->x; } /* as b is always on the reverse strand, invert the flipped sense as we read down it*/ if( ln->a->twist == 0 ) { flipped = !flipped; } } prev = ln; i++; while( runner != NULL ) { /* to find the outgoing link from here, test neither a nor b is not NULL and figure out the right way to go from the fact that the link is back to ourselves */ if( runner->a == NULL || runner->b == NULL ) { /* other end of stream */ break; } out->seq[i] = first_char_from_dnanumber(runner->number,nmer_size,flipped); i++; if( runner->a->x == runner && runner->a->y != prev) { next = runner->a->y; if( runner->a->twist == 1 ) { flipped = !flipped; } } else if ( runner->a->y == runner && runner->a->x != prev ) { next = runner->a->x; if( runner->a->twist == 1 ) { flipped = !flipped; } } else if( runner->b->x == runner && runner->b->y != prev) { next = runner->b->y; if( runner->b->twist == 1 ) { flipped = !flipped; } } else if ( runner->b->y == runner && runner->b->x != prev ) { next = runner->b->x; if( runner->b->twist == 1 ) { flipped = !flipped; } } else { fatal("Unable to move off edge!"); } prev = runner; runner = next; } runner->have_seen = 1; return out; }