void biolib_getorf(AjPSeq seq, AjPTrn table, unsigned int minsize) { ORFrec record; /* ORF number to append to name of sequence to create unique name */ ajint orf_no; AjBool sense; /* ajTrue = forward sense */ ajint len; orf_no = 1; /* number of the next ORF */ sense = ajTrue; /* forward sense initially */ /* get the length of the sequence */ len = ajSeqGetLen(seq); /* find the ORFs */ getorf_FindORFs(seq, len, trnTable, minsize/3, 10000, seqout, sense, circular, find, &orf_no, methionine, around, &record); /* now reverse complement the sequence and do it again */ sense = ajFalse; ajSeqReverseForce(seq); // getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense, // circular, find, &orf_no, methionine, // around); }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeqout seqout; AjPSeq seq = NULL; AjPStr str = NULL; AjPStr desc = NULL; ajint tail3; ajint tail5 = 0; ajint minlength; ajint mismatches; AjBool reverse; AjBool fiveprime; AjBool cvttolower; embInit("trimest", argc, argv); seqall = ajAcdGetSeqall("sequence"); seqout = ajAcdGetSeqoutall("outseq"); minlength = ajAcdGetInt("minlength"); mismatches = ajAcdGetInt("mismatches"); reverse = ajAcdGetBoolean("reverse"); fiveprime = ajAcdGetBoolean("fiveprime"); cvttolower = ajAcdGetToggle("tolower"); str = ajStrNew(); while(ajSeqallNext(seqall, &seq)) { /* get sequence description */ ajStrAssignS(&desc, ajSeqGetDescS(seq)); /* get positions to cut in 5' poly-T and 3' poly-A tails */ if(fiveprime) tail5 = trimest_get_tail(seq, 5, minlength, mismatches); tail3 = trimest_get_tail(seq, 3, minlength, mismatches); /* get a COPY of the sequence string */ ajStrAssignS(&str, ajSeqGetSeqS(seq)); /* cut off longest of 3' or 5' tail */ if(tail5 > tail3) { /* if 5' poly-T tail, then reverse the sequence */ ajDebug("Tail=%d\n", tail5); if(cvttolower) trimest_tolower(&str, 0, tail5-1); else ajStrKeepRange(&str, tail5, ajSeqGetLen(seq)-1); ajStrAppendC(&desc, " [poly-T tail removed]"); } else if(tail3 > tail5) { /* remove 3' poly-A tail */ ajDebug("Tail=%d\n", tail3); if(cvttolower) trimest_tolower(&str, ajSeqGetLen(seq)-tail3, ajSeqGetLen(seq)); else ajStrKeepRange(&str, 0, ajSeqGetLen(seq)-tail3-1); ajStrAppendC(&desc, " [poly-A tail removed]"); } /* write sequence out */ ajSeqAssignSeqS(seq, str); /* reverse complement if poly-T found */ if(tail5 > tail3 && reverse) { ajSeqReverseForce(seq); ajStrAppendC(&desc, " [reverse complement]"); } /* set description */ ajSeqAssignDescS(seq, desc); ajSeqoutWriteSeq(seqout, seq); } ajSeqoutClose(seqout); ajStrDel(&str); ajStrDel(&desc); ajSeqallDel(&seqall); ajSeqDel(&seq); ajSeqoutDel(&seqout); embExit(); return 0; }
AjPSeqout get_orf( AjPSeqout seqout, AjPSeqall seqall, AjPStr tablestr, ajuint minsize, ajuint maxsize, AjPStr findstr, AjBool methionine, AjBool circular, AjBool reverse, ajint around ) { ajint table; ajint find; AjPSeq seq = NULL; AjPTrn trnTable; AjPStr sseq = NULL; /* sequence string */ /* ORF number to append to name of sequence to create unique name */ ajint orf_no; AjBool sense; /* ajTrue = forward sense */ ajint len; /* initialise the translation table */ ajStrToInt(tablestr, &table); trnTable = ajTrnNewI(table); /* what sort of ORF are we looking for */ ajStrToInt(findstr, &find); /* ** get the minimum size converted to protein length if storing ** protein sequences */ if (find == P_STOP2STOP || find == P_START2STOP || find == AROUND_START) { minsize /= 3; maxsize /= 3; } while (ajSeqallNext(seqall, &seq)) { orf_no = 1; /* number of the next ORF */ sense = ajTrue; /* forward sense initially */ /* get the length of the sequence */ len = ajSeqGetLen(seq); /* ** if the sequence is circular, append it to itself to triple its ** length so can deal easily with wrapped ORFs, but don't update ** len */ if (circular) { ajStrAssignS(&sseq, ajSeqGetSeqS(seq)); ajStrAppendS(&sseq, ajSeqGetSeqS(seq)); ajStrAppendS(&sseq, ajSeqGetSeqS(seq)); ajSeqAssignSeqS(seq, sseq); } /* find the ORFs */ getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense, circular, find, &orf_no, methionine, around, &record); /* now reverse complement the sequence and do it again */ if (reverse) { sense = ajFalse; ajSeqReverseForce(seq); getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense, circular, find, &orf_no, methionine, around); } } ajTrnDel(&trnTable); ajSeqDel(&seq); ajStrDel(&sseq); }