static void eprimer3_send_range2(FILE * stream, const char * tag, const AjPRange value) { AjPStr str; ajuint n; ajuint start; ajuint end; str=ajStrNew(); if(ajRangeGetSize(value)) { ajFmtPrintS(&str, "%s=", tag); eprimer3_write(str, stream); ajStrSetClear(&str); for(n=0; n < ajRangeGetSize(value); n++) { ajRangeElementGetValues(value, n, &start, &end); ajFmtPrintS(&str, "%d-%d ", start, end); eprimer3_write(str, stream); ajStrSetClear(&str); } ajFmtPrintS(&str, "\n"); eprimer3_write(str, stream); } ajStrDel(&str); return; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeqout seqout; AjPSeqout junkout; AjPSeq seq = NULL; AjPStr exclude = NULL; AjPStr pattern = NULL; AjPStr name = NULL; AjPStr acc = NULL; embInit("notseq", argc, argv); seqout = ajAcdGetSeqoutall("outseq"); junkout = ajAcdGetSeqoutall("junkoutseq"); seqall = ajAcdGetSeqall("sequence"); exclude = ajAcdGetString("exclude"); notseq_readfile(exclude, &pattern); while(ajSeqallNext(seqall, &seq)) { ajStrAssignS(&name, ajSeqGetNameS(seq)); ajStrAssignS(&acc, ajSeqGetAccS(seq)); if(embMiscMatchPatternDelimC(name, pattern, ",;") || embMiscMatchPatternDelimC(acc, pattern, ",;")) ajSeqoutWriteSeq(junkout, seq); else /* no match, so not excluded */ ajSeqoutWriteSeq(seqout, seq); ajStrSetClear(&name); ajStrSetClear(&acc); } ajSeqoutClose(seqout); ajSeqoutClose(junkout); ajSeqallDel(&seqall); ajSeqDel(&seq); ajSeqoutDel(&seqout); ajSeqoutDel(&junkout); ajStrDel(&exclude); ajStrDel(&pattern); ajStrDel(&name); ajStrDel(&acc); embExit(); return 0; }
static void remap_read_file_of_enzyme_names(AjPStr *enzymes) { AjPFile file = NULL; AjPStr line; const char *p = NULL; if(ajStrFindC(*enzymes, "@") == 0) { ajStrTrimC(enzymes, "@"); /* remove the @ */ file = ajFileNewInNameS(*enzymes); if(file == NULL) ajFatal("Cannot open the file of enzyme names: '%S'", enzymes); /* blank off the enzyme file name and replace with the enzyme names */ ajStrSetClear(enzymes); line = ajStrNew(); while(ajReadlineTrim(file, &line)) { p = ajStrGetPtr(line); if(!*p || *p == '#' || *p == '!') continue; ajStrAppendS(enzymes, line); ajStrAppendC(enzymes, ","); } ajStrDel(&line); ajFileClose(&file); } return; }
void ajXmlClear(AjPXml xml) { if(MAJSTRGETLEN(xml->Id)) ajStrSetClear(&xml->Id); if(MAJSTRGETLEN(xml->Db)) ajStrSetClear(&xml->Db); if(MAJSTRGETLEN(xml->Setdb)) ajStrSetClear(&xml->Setdb); if(MAJSTRGETLEN(xml->Full)) ajStrSetClear(&xml->Full); if(MAJSTRGETLEN(xml->Qry)) ajStrSetClear(&xml->Qry); if(MAJSTRGETLEN(xml->Formatstr)) ajStrSetClear(&xml->Formatstr); if(MAJSTRGETLEN(xml->Filename)) ajStrSetClear(&xml->Filename); ajStrDel(&xml->TextPtr); xml->Count = 0; xml->Fpos = 0L; xml->Format = 0; if(xml->Doc) ajDomDocumentDestroyNode(xml->Doc, &xml->Doc); return; }
static void notseq_readfile(const AjPStr exclude, AjPStr *pattern) { AjPFile file = NULL; AjPStr line; AjPStr filename = NULL; const char *p = NULL; if(ajStrFindC(exclude, "@") != 0) { ajStrAssignS(pattern, exclude); } else { ajStrAssignS(&filename, exclude); ajStrTrimC(&filename, "@"); /* remove the @ */ file = ajFileNewInNameS(filename); if(file == NULL) ajFatal("Cannot open the file of sequence names: '%S'", filename); /* blank off the file name and replace with the sequence names */ ajStrSetClear(pattern); line = ajStrNew(); while(ajReadlineTrim(file, &line)) { p = ajStrGetPtr(line); if(!*p || *p == '#' || *p == '!') continue; ajStrAppendS(pattern, line); ajStrAppendC(pattern, ","); } ajStrDel(&line); ajStrDel(&filename); ajFileClose(&file); } return; }
void ajRefseqClear(AjPRefseq refseq) { AjPSeqRange tmprange = NULL; if(MAJSTRGETLEN(refseq->Id)) ajStrSetClear(&refseq->Id); if(MAJSTRGETLEN(refseq->Db)) ajStrSetClear(&refseq->Db); if(MAJSTRGETLEN(refseq->Setdb)) ajStrSetClear(&refseq->Setdb); if(MAJSTRGETLEN(refseq->Full)) ajStrSetClear(&refseq->Full); if(MAJSTRGETLEN(refseq->Qry)) ajStrSetClear(&refseq->Qry); if(MAJSTRGETLEN(refseq->Formatstr)) ajStrSetClear(&refseq->Formatstr); if(MAJSTRGETLEN(refseq->Filename)) ajStrSetClear(&refseq->Filename); ajStrDel(&refseq->TextPtr); ajStrDel(&refseq->Desc); ajStrDel(&refseq->Seq); while(ajListPop(refseq->Seqlist,(void **)&tmprange)) ajSeqrangeDel(&tmprange); ajListFree(&refseq->Seqlist); refseq->Count = 0; refseq->Fpos = 0L; refseq->Format = 0; return; }
void ajResourceClear(AjPResource resource) { AjPReslink lnk = NULL; AjPResquery qry = NULL; AjPResterm resterm = NULL; AjPStr ptr = NULL; if(MAJSTRGETLEN(resource->Id)) ajStrSetClear(&resource->Id); if(ajListGetLength(resource->Idalt)) while(ajListstrPop(resource->Idalt,&ptr)) ajStrDel(&ptr); if(MAJSTRGETLEN(resource->Acc)) ajStrSetClear(&resource->Acc); if(MAJSTRGETLEN(resource->Name)) ajStrSetClear(&resource->Name); if(MAJSTRGETLEN(resource->Desc)) ajStrSetClear(&resource->Desc); if(MAJSTRGETLEN(resource->Url)) ajStrSetClear(&resource->Url); if(MAJSTRGETLEN(resource->Urllink)) ajStrSetClear(&resource->Urllink); if(MAJSTRGETLEN(resource->Urlrest)) ajStrSetClear(&resource->Urlrest); if(MAJSTRGETLEN(resource->Urlsoap)) ajStrSetClear(&resource->Urlsoap); if(ajListGetLength(resource->Cat)) while(ajListstrPop(resource->Cat,&ptr)) ajStrDel(&ptr); if(ajListGetLength(resource->Taxon)) while(ajListPop(resource->Taxon,(void**)&resterm)) ajRestermDel(&resterm); if(ajListGetLength(resource->Edamdat)) while(ajListPop(resource->Edamdat,(void**)&resterm)) ajRestermDel(&resterm); if(ajListGetLength(resource->Edamfmt)) while(ajListPop(resource->Edamfmt,(void**)&resterm)) ajRestermDel(&resterm); if(ajListGetLength(resource->Edamid)) while(ajListPop(resource->Edamid,(void**)&resterm)) ajRestermDel(&resterm); if(ajListGetLength(resource->Edamtpc)) while(ajListPop(resource->Edamtpc,(void**)&resterm)) ajRestermDel(&resterm); if(ajListGetLength(resource->Xref)) while(ajListPop(resource->Xref,(void**)&lnk)) ajReslinkDel(&lnk); if(ajListGetLength(resource->Query)) while(ajListPop(resource->Query,(void**)&qry)) ajResqueryDel(&qry); if(ajListGetLength(resource->Example)) while(ajListstrPop(resource->Example,&ptr)) ajStrDel(&ptr); if(MAJSTRGETLEN(resource->Db)) ajStrSetClear(&resource->Db); if(MAJSTRGETLEN(resource->Setdb)) ajStrSetClear(&resource->Setdb); if(MAJSTRGETLEN(resource->Full)) ajStrSetClear(&resource->Full); if(MAJSTRGETLEN(resource->Qry)) ajStrSetClear(&resource->Qry); if(MAJSTRGETLEN(resource->Formatstr)) ajStrSetClear(&resource->Formatstr); if(MAJSTRGETLEN(resource->Filename)) ajStrSetClear(&resource->Filename); ajStrDel(&resource->TextPtr); resource->Count = 0; resource->Fpos = 0L; resource->Format = 0; return; }
static void extractfeat_FeatSeqExtract(const AjPSeq seq, AjPSeqout seqout, AjPFeattable featab, ajint before, ajint after, AjBool join, AjBool featinname, const AjPStr describe) { AjIList iter = NULL; AjPFeature gf = NULL; AjBool single; /* ajtrue = is not a multiple */ AjBool parent; /* ajtrue = is a parent of a multiple */ AjBool child; /* ajTrue = is a child of a multiple */ AjBool compall; /* ajTrue = reverse comp all of join */ AjBool sense; /* ajTrue = forward sense */ AjBool remote; /* ajTrue = remote ID */ AjPStr type = NULL; /* name of feature */ AjPStr featseq = NULL; /* feature sequence string */ AjPStr tmpseq = NULL; /* temporary sequence string */ ajint firstpos; ajint lastpos; /* bounds of feature in sequence */ AjPStr describeout = NULL; /* tag names/values to add to descriptions */ ajuint count = 0; /* For all features... */ if(featab && ajFeattableGetSize(featab)) { /* initialise details of a feature */ featseq = ajStrNew(); tmpseq = ajStrNew(); type = ajStrNew(); remote = ajFalse; compall = ajFalse; sense = ajTrue; firstpos = 0; lastpos = 0; describeout = ajStrNew(); iter = ajListIterNewread(featab->Features); while(!ajListIterDone(iter)) { gf = ajListIterGet(iter) ; /* ** Determine what sort of thing this feature is. Only one of ** these will be true. ** True if this is part of a multiple join and it is not ** the parent */ child = ajFalse; /* True if this is part of a multiple join and it is the parent */ parent = ajFalse; /* True if this is not part of a multiple join */ single = ajFalse; if(ajFeatIsMultiple(gf)) { if(ajFeatIsChild(gf)) child = ajTrue; else parent = ajTrue; } else single = ajTrue; /* ** If not wish to assembling joins(), then force all features ** to be treated as single */ if(!join) { child = ajFalse; parent = ajFalse; single = ajTrue; } ajDebug("feature %S %d-%d is parent %B, child %B, single %B\n", ajFeatGetType(gf), ajFeatGetStart(gf), ajFeatGetEnd(gf), parent, child, single); /* ajUser("feature %S %d-%d is parent %B, child %B, single %B", ajFeatGetType(gf), ajFeatGetStart(gf), ajFeatGetEnd(gf), parent, child, single); */ /* ** If single or parent, write out any stored previous feature ** sequence */ if(count++ && !child) { extractfeat_WriteOut(seqout, &featseq, compall, sense, firstpos, lastpos, before, after, seq, remote, type, featinname, describeout); /* reset joined feature information */ ajStrSetClear(&featseq); ajStrSetClear(&tmpseq); ajStrSetClear(&type); ajStrSetClear(&describeout); remote = ajFalse; compall = ajFalse; sense = ajTrue; firstpos = 0; lastpos = 0; } /* if parent, note if have Complemented Join */ if(parent) compall = ajFeatIsCompMult(gf); /* ** Get the sense of the feature ** NB. if complementing several joined features, then pretend they ** are forward sense until its possible to reverse-complement ** them all together. */ if(!compall && ajFeatGetStrand(gf) == '-') sense = ajFalse; /* get 'type' name of feature */ if(single || parent) ajStrAssignS(&type, ajFeatGetType(gf)); /* ** if single or parent, get 'before' + 'after' sequence ** positions */ if(single || parent) { firstpos = ajFeatGetStart(gf)-1; lastpos = ajFeatGetEnd(gf)-1; } /* if child, update the boundary positions */ if(child) { if(sense) lastpos = ajFeatGetEnd(gf)-1; else firstpos = ajFeatGetStart(gf)-1; } extractfeat_MatchPatternDescribe(gf, describe, &describeout); /* get feature sequence(complement if required) */ if(!child) { if(join) ajFeatGetSeqJoin(gf, featab, seq, &tmpseq); else ajFeatGetSeq(gf, seq, &tmpseq); ajDebug("extracted feature = %d bases\n", ajStrGetLen(tmpseq)); /*ajUser("extracted feature = %d bases", ajStrGetLen(tmpseq));*/ ajStrAssignS(&featseq, tmpseq); } } ajListIterDel(&iter) ; /* ** write out any previous sequence(s) ** - add before + after, complement all */ extractfeat_WriteOut(seqout, &featseq, compall, sense, firstpos, lastpos, before, after, seq, remote, type, featinname, describeout); ajStrDel(&featseq); ajStrDel(&tmpseq); ajStrDel(&type); ajStrDel(&describeout); } return; }
AjPPatlistRegex ajPatlistRegexRead (const AjPStr patspec, const AjPStr patname, const AjPStr fmt, ajuint type, AjBool upper, AjBool lower) { AjPPatlistRegex patlist = NULL; AjPStr line = NULL; AjPStr pat = NULL; AjPStr name = NULL; AjPFilebuff infile = NULL; AjPStr patstr = NULL; ajuint ifmt; ajuint npat = 0; AjPStr namestr = NULL; ajStrAssignS(&namestr, patname); ajStrAssignEmptyC(&namestr, "regex"); ajStrAssignS(&patstr, patspec); patlist = ajPatlistRegexNewType(type); ifmt = patternRegexFormat(fmt); if(ajStrGetCharFirst(patspec) == '@') { ajStrCutStart(&patstr, 1); infile = ajFilebuffNewNameS(patstr); if(!infile) { ajErr("Unable to open regular expression file '%S'", patstr); return NULL; } line = ajStrNew(); pat = ajStrNew(); name = ajStrNew(); if(!ifmt) { ajBuffreadLineTrim(infile,&line); if(ajStrPrefixC(line, ">")) ifmt = 2; else ifmt = 1; ajFilebuffReset(infile); } switch(ifmt) { case 1: while (ajBuffreadLineTrim(infile,&line)) { npat++; ajStrAppendS (&pat,line); if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajFmtPrintS(&name, "%S%u", namestr, npat); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&pat); } break; default: while (ajBuffreadLineTrim(infile,&line)) { if (ajStrFindC(line,">")>-1) { npat++; if (ajStrGetLen(name)) { if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&name); ajStrSetClear(&pat); } ajStrCutStart(&line,1); ajStrAssignS (&name,line); if(!ajStrGetLen(name)) ajFmtPrintS(&name, "%S%u", namestr, npat); } else ajStrAppendS (&pat,line); } ajStrAssignEmptyS(&name, patname); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&pat); break; } ajFilebuffDel(&infile); } else { ajStrAssignS(&pat, patspec); if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajStrAssignS(&name, namestr); ajPatternRegexNewList(patlist,name,pat); } ajStrDel(&name); ajStrDel(&namestr); ajStrDel(&patstr); ajStrDel(&line); ajStrDel(&pat); return patlist; }
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec, const AjPStr patname, const AjPStr fmt, AjBool protein, ajuint mismatches) { AjPPatlistSeq patlist = NULL; AjPStr line = NULL; AjPStr name = NULL; AjPFilebuff infile = NULL; AjPRegexp mismreg = NULL; AjPStr patstr = NULL; AjPStr pat = NULL; ajuint mismatch = 0; ajint ifmt = 0; ajuint npat = 0; AjPStr namestr = NULL; ajStrAssignS(&namestr, patname); ajStrAssignEmptyC(&namestr, "pattern"); ajStrAssignS(&patstr, patspec); patlist = ajPatlistSeqNewType(protein); ifmt = patternSeqFormat(fmt); ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' " "protein: %B mismatches: %d\n", patspec, patname, protein, mismatches); if(ajStrGetCharFirst(patstr) == '@') { ajStrCutStart(&patstr, 1); infile = ajFilebuffNewNameS(patstr); if(!infile) { ajErr("Unable to open pattern file '%S'", patstr); return NULL; } line = ajStrNew(); name = ajStrNew(); if(!ifmt) { ajBuffreadLineTrim(infile,&line); if(ajStrPrefixC(line, ">")) ifmt = 2; else ifmt = 1; ajFilebuffReset(infile); } switch(ifmt) { case 1: while (ajBuffreadLineTrim(infile,&line)) { npat++; ajStrAppendS (&pat,line); ajFmtPrintS(&name, "%S%u", namestr, npat); ajPatternSeqNewList(patlist,name,pat,mismatches); ajStrSetClear(&pat); } break; default: mismreg = ajRegCompC("<mismatch=(\\d+)>"); while (ajBuffreadLineTrim(infile,&line)) { if (ajStrGetCharFirst(line) == '>') { if (ajStrGetLen(name)) { ajPatternSeqNewList(patlist,name,pat, mismatch); ajStrSetClear(&name); ajStrSetClear(&pat); mismatch=mismatches; } ajStrCutStart(&line,1); if (ajRegExec(mismreg,line)) { ajRegSubI(mismreg,1,&name); ajStrToUint(name,&mismatch); ajStrTruncateLen(&line,ajRegOffset(mismreg)); ajStrTrimWhiteEnd(&line); } ajStrAssignS (&name,line); ajStrAssignEmptyS(&name, patname); } else ajStrAppendS (&pat,line); } ajStrAssignEmptyS(&name, patname); ajPatternSeqNewList(patlist,name,pat,mismatch); ajRegFree(&mismreg); break; } ajFilebuffDel(&infile); } else { ajStrAssignS(&name, namestr); ajPatternSeqNewList(patlist,name,patstr,mismatches); } ajStrDel(&name); ajStrDel(&line); ajStrDel(&pat); ajStrDel(&namestr); ajStrDel(&patstr); return patlist; }
int main(int argc, char **argv) { /* Global details */ AjBool explain_flag; AjBool file_flag; AjPStr* task; AjBool do_primer; AjBool do_hybrid; ajint num_return; ajint first_base_index; /* "Sequence" Input Tags */ AjPSeqall sequence; AjPRange included_region; AjPRange target; AjPRange excluded_region; AjPStr left_input; AjPStr right_input; /* Primer details */ AjBool pick_anyway; AjPFile mispriming_library; float max_mispriming; float pair_max_mispriming; ajint gc_clamp; ajint opt_size; ajint min_size; ajint max_size; float opt_tm; float min_tm; float max_tm; float max_diff_tm; float opt_gc_percent; float min_gc; float max_gc; float salt_conc; float dna_conc; ajint num_ns_accepted; float self_any; float self_end; ajint max_poly_x; /* Sequence Quality. These are not (yet) implemented */ /* AjPFile sequence_quality; ajint min_quality; ajint min_end_quality; ajint quality_range_min; ajint quality_range_max; */ /* Product details */ ajint product_opt_size; AjPRange product_size_range; float product_opt_tm; float product_min_tm; float product_max_tm; /* Objective Function Penalty Weights for Primers */ float max_end_stability; /* these are not (yet) implemented */ /* float inside_penalty; float outside_penalty; */ /* Primer penalties */ /* these are not (yet) implemented */ /* Internal Oligo "Sequence" Input Tags */ AjPRange internal_oligo_excluded_region; /* Internal Oligo "Global" Input Tags */ AjPStr internal_oligo_input; ajint internal_oligo_opt_size; ajint internal_oligo_min_size; ajint internal_oligo_max_size; float internal_oligo_opt_tm; float internal_oligo_min_tm; float internal_oligo_max_tm; float internal_oligo_opt_gc_percent; float internal_oligo_min_gc; float internal_oligo_max_gc; float internal_oligo_salt_conc; float internal_oligo_dna_conc; float internal_oligo_self_any; float internal_oligo_self_end; ajint internal_oligo_max_poly_x; AjPFile internal_oligo_mishyb_library; float internal_oligo_max_mishyb; /* ajint internal_oligo_min_quality; */ /* Internal Oligo penalties */ /* these are not (yet) implemented */ /* EMBOSS-wrapper-specific stuff */ AjPFile outfile; /* other variables */ AjPStr result = NULL; AjPStr strand = NULL; AjPStr substr = NULL; AjPSeq seq = NULL; ajint begin = 0; ajint end; FILE* stream; AjPStr taskstr = NULL; const AjPStr program = NULL; /* pipe variables */ int *pipeto; /* pipe to feed the exec'ed program input */ int *pipefrom; /* pipe to get the exec'ed program output */ embInit("eprimer3", argc, argv); /* Global details */ explain_flag = ajAcdGetBoolean("explainflag"); file_flag = ajAcdGetBoolean("fileflag"); task = ajAcdGetList("task"); do_primer = ajAcdGetToggle("primer"); do_hybrid = ajAcdGetToggle("hybridprobe"); num_return = ajAcdGetInt("numreturn"); first_base_index = ajAcdGetInt("firstbaseindex"); /* "Sequence" Input Tags */ sequence = ajAcdGetSeqall("sequence"); included_region = ajAcdGetRange("includedregion"); target = ajAcdGetRange("targetregion"); excluded_region = ajAcdGetRange("excludedregion"); left_input = ajAcdGetString("forwardinput"); right_input = ajAcdGetString("reverseinput"); /* Primer details */ pick_anyway = ajAcdGetBoolean("pickanyway"); mispriming_library = ajAcdGetInfile("mispriminglibraryfile"); max_mispriming = ajAcdGetFloat("maxmispriming"); pair_max_mispriming = ajAcdGetFloat("pairmaxmispriming"); gc_clamp = ajAcdGetInt("gcclamp"); opt_size = ajAcdGetInt("osize"); min_size = ajAcdGetInt("minsize"); max_size = ajAcdGetInt("maxsize"); opt_tm = ajAcdGetFloat("otm"); min_tm = ajAcdGetFloat("mintm"); max_tm = ajAcdGetFloat("maxtm"); max_diff_tm = ajAcdGetFloat("maxdifftm"); opt_gc_percent = ajAcdGetFloat("ogcpercent"); min_gc = ajAcdGetFloat("mingc"); max_gc = ajAcdGetFloat("maxgc"); salt_conc = ajAcdGetFloat("saltconc"); dna_conc = ajAcdGetFloat("dnaconc"); num_ns_accepted = ajAcdGetInt("numnsaccepted"); self_any = ajAcdGetFloat("selfany"); self_end = ajAcdGetFloat("selfend"); max_poly_x = ajAcdGetInt("maxpolyx"); AJCNEW0(pipeto,2); AJCNEW0(pipefrom,2); /* Sequence Quality */ /* these are not (yet) implemented */ /* sequence_quality = ajAcdGetInfile("sequencequality"); min_quality = ajAcdGetInt("minquality"); min_end_quality = ajAcdGetInt("minendquality"); quality_range_min = ajAcdGetInt("qualityrangemin"); quality_range_max = ajAcdGetInt("qualityrangemax"); */ /* Product details */ product_opt_size = ajAcdGetInt("psizeopt"); product_size_range = ajAcdGetRange("prange"); product_opt_tm = ajAcdGetFloat("ptmopt"); product_min_tm = ajAcdGetFloat("ptmmin"); product_max_tm = ajAcdGetFloat("ptmmax"); /* Objective Function Penalty Weights for Primers */ max_end_stability = ajAcdGetFloat("maxendstability"); /* these are not (yet) implemented */ /* inside_penalty = ajAcdGetFloat("insidepenalty"); outside_penalty = ajAcdGetFloat("outsidepenalty"); */ /* Primer penalties */ /* these are not (yet) implemented */ /* Internal Oligo "Sequence" Input Tags */ internal_oligo_excluded_region = ajAcdGetRange("oexcludedregion"); internal_oligo_input = ajAcdGetString("oligoinput"); /* Internal Oligo "Global" Input Tags */ internal_oligo_opt_size = ajAcdGetInt("osizeopt"); internal_oligo_min_size = ajAcdGetInt("ominsize"); internal_oligo_max_size = ajAcdGetInt("omaxsize"); internal_oligo_opt_tm = ajAcdGetFloat("otmopt"); internal_oligo_min_tm = ajAcdGetFloat("otmmin"); internal_oligo_max_tm = ajAcdGetFloat("otmmax"); internal_oligo_opt_gc_percent = ajAcdGetFloat("ogcopt"); internal_oligo_min_gc = ajAcdGetFloat("ogcmin"); internal_oligo_max_gc = ajAcdGetFloat("ogcmax"); internal_oligo_salt_conc = ajAcdGetFloat("osaltconc"); internal_oligo_dna_conc = ajAcdGetFloat("odnaconc"); internal_oligo_self_any = ajAcdGetFloat("oanyself"); internal_oligo_self_end = ajAcdGetFloat("oendself"); internal_oligo_max_poly_x = ajAcdGetInt("opolyxmax"); internal_oligo_mishyb_library = ajAcdGetInfile("mishyblibraryfile"); internal_oligo_max_mishyb = ajAcdGetFloat("omishybmax"); /* internal_oligo_min_quality = ajAcdGetInt("oligominquality"); */ /* Internal Oligo penalties */ /* these are not (yet) implemented */ /* EMBOSS-wrapper-specific stuff */ outfile = ajAcdGetOutfile("outfile"); ajStrRemoveWhite(&left_input); ajStrRemoveWhite(&right_input); /* ** OK - we will now try to do a separate fork-exec for each sequence. */ result = ajStrNew(); while(ajSeqallNext(sequence, &seq)) { program = ajAcdGetpathC("primer3_core"); if(!ajSysExecRedirectC(ajStrGetPtr(program),&pipeto,&pipefrom)) ajFatal("eprimer3: Could not exec primer3_core"); stream = eprimer3_start_write(pipeto[1]); /* send primer3 Primer "Global" parameters */ eprimer3_send_bool(stream, "PRIMER_EXPLAIN_FLAG", explain_flag); eprimer3_send_bool(stream, "PRIMER_FILE_FLAG", file_flag); if(do_hybrid) { if(!ajStrCmpC(task[0], "1")) ajStrAssignC(&taskstr, "pick_pcr_primers_and_hyb_probe"); else if(!ajStrCmpC(task[0], "2")) ajStrAssignC(&taskstr, "pick_left_only"); else if(!ajStrCmpC(task[0], "3")) ajStrAssignC(&taskstr, "pick_right_only"); else if(!ajStrCmpC(task[0], "4")) ajStrAssignC(&taskstr, "pick_hyb_probe_only"); if (!do_primer) ajStrAssignC(&taskstr, "pick_hyb_probe_only"); } else { if(!ajStrCmpC(task[0], "1")) ajStrAssignC(&taskstr, "pick_pcr_primers"); else if(!ajStrCmpC(task[0], "2")) ajStrAssignC(&taskstr, "pick_left_only"); else if(!ajStrCmpC(task[0], "3")) ajStrAssignC(&taskstr, "pick_right_only"); else if(!ajStrCmpC(task[0], "4")) ajStrAssignC(&taskstr, "pick_hyb_probe_only"); } eprimer3_send_string(stream, "PRIMER_TASK", taskstr); eprimer3_send_int(stream, "PRIMER_NUM_RETURN", num_return); eprimer3_send_int(stream, "PRIMER_FIRST_BASE_INDEX", first_base_index); eprimer3_send_bool(stream, "PRIMER_PICK_ANYWAY", pick_anyway); /* mispriming library may not have been specified */ if(mispriming_library) eprimer3_send_stringC(stream, "PRIMER_MISPRIMING_LIBRARY", ajFileGetPrintnameC(mispriming_library)); eprimer3_send_float(stream, "PRIMER_MAX_MISPRIMING", max_mispriming); eprimer3_send_float(stream, "PRIMER_PAIR_MAX_MISPRIMING", pair_max_mispriming); eprimer3_send_int(stream, "PRIMER_GC_CLAMP", gc_clamp); eprimer3_send_int(stream, "PRIMER_OPT_SIZE", opt_size); eprimer3_send_int(stream, "PRIMER_MIN_SIZE", min_size); eprimer3_send_int(stream, "PRIMER_MAX_SIZE", max_size); eprimer3_send_float(stream, "PRIMER_OPT_TM", opt_tm); eprimer3_send_float(stream, "PRIMER_MIN_TM", min_tm); eprimer3_send_float(stream, "PRIMER_MAX_TM", max_tm); eprimer3_send_float(stream, "PRIMER_MAX_DIFF_TM", max_diff_tm); eprimer3_send_float(stream, "PRIMER_OPT_GC_PERCENT", opt_gc_percent); eprimer3_send_float(stream, "PRIMER_MIN_GC", min_gc); eprimer3_send_float(stream, "PRIMER_MAX_GC", max_gc); eprimer3_send_float(stream, "PRIMER_SALT_CONC", salt_conc); eprimer3_send_float(stream, "PRIMER_DNA_CONC", dna_conc); eprimer3_send_int(stream, "PRIMER_NUM_NS_ACCEPTED", num_ns_accepted); eprimer3_send_float(stream, "PRIMER_SELF_ANY", self_any); eprimer3_send_float(stream, "PRIMER_SELF_END", self_end); eprimer3_send_int(stream, "PRIMER_MAX_POLY_X", max_poly_x); eprimer3_send_int(stream, "PRIMER_PRODUCT_OPT_SIZE", product_opt_size); eprimer3_send_range2(stream, "PRIMER_PRODUCT_SIZE_RANGE", product_size_range); eprimer3_send_float(stream, "PRIMER_PRODUCT_OPT_TM", product_opt_tm); eprimer3_send_float(stream, "PRIMER_PRODUCT_MIN_TM", product_min_tm); eprimer3_send_float(stream, "PRIMER_PRODUCT_MAX_TM", product_max_tm); eprimer3_send_float(stream, "PRIMER_MAX_END_STABILITY", max_end_stability); /* send primer3 Internal Oligo "Global" parameters */ eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_OPT_SIZE", internal_oligo_opt_size); eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MIN_SIZE", internal_oligo_min_size); eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MAX_SIZE", internal_oligo_max_size); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_OPT_TM", internal_oligo_opt_tm); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MIN_TM", internal_oligo_min_tm); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_TM", internal_oligo_max_tm); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_OPT_GC_PERCENT", internal_oligo_opt_gc_percent); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MIN_GC", internal_oligo_min_gc); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_GC", internal_oligo_max_gc); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SALT_CONC", internal_oligo_salt_conc); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_DNA_CONC", internal_oligo_dna_conc); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SELF_ANY", internal_oligo_self_any); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SELF_END", internal_oligo_self_end); eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MAX_POLY_X", internal_oligo_max_poly_x); /* ** internal oligo mishybridising library may not have been ** specified */ if(internal_oligo_mishyb_library) eprimer3_send_stringC(stream, "PRIMER_INTERNAL_OLIGO_MISHYB_LIBRARY", ajFileGetPrintnameC(internal_oligo_mishyb_library)); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_MISHYB", internal_oligo_max_mishyb); /* ** Start sequence-specific stuff */ begin = ajSeqallGetseqBegin(sequence) - 1; end = ajSeqallGetseqEnd(sequence) - 1; strand = ajSeqGetSeqCopyS(seq); ajStrFmtUpper(&strand); ajStrAssignSubC(&substr,ajStrGetPtr(strand), begin, end); /* send flags to turn on using optimal product size */ eprimer3_send_float(stream, "PRIMER_PAIR_WT_PRODUCT_SIZE_GT", (float)0.05); eprimer3_send_float(stream, "PRIMER_PAIR_WT_PRODUCT_SIZE_LT", (float)0.05); /* send primer3 Primer "Sequence" parameters */ eprimer3_send_string(stream, "SEQUENCE", substr); /* if no ID name, use the USA */ if(ajStrMatchC(ajSeqGetNameS(seq),"")) eprimer3_send_string(stream, "PRIMER_SEQUENCE_ID", ajSeqGetUsaS(seq)); else eprimer3_send_string(stream, "PRIMER_SEQUENCE_ID", ajSeqGetNameS(seq)); eprimer3_send_range(stream, "INCLUDED_REGION", included_region, begin); eprimer3_send_range(stream, "TARGET", target, begin); eprimer3_send_range(stream, "EXCLUDED_REGION", excluded_region, begin); eprimer3_send_string(stream, "PRIMER_LEFT_INPUT", left_input); eprimer3_send_string(stream, "PRIMER_RIGHT_INPUT", right_input); /* send primer3 Internal Oligo "Sequence" parameters */ eprimer3_send_range(stream, "PRIMER_INTERNAL_OLIGO_EXCLUDED_REGION", internal_oligo_excluded_region, begin); eprimer3_send_string(stream, "PRIMER_INTERNAL_OLIGO_INPUT", internal_oligo_input); /* end the primer3 input sequence record with a '=' */ eprimer3_send_end(stream); /* and close the ouput pipe stream */ eprimer3_end_write(stream); /* read the primer3 output */ eprimer3_read(pipefrom[0], &result); eprimer3_report(outfile, result, num_return, begin); ajStrSetClear(&result); #ifndef WIN32 close(pipeto[1]); close(pipefrom[0]); #endif } /* end of sequence loop */ ajStrDel(&result); ajSeqDel(&seq); ajStrDel(&strand); ajStrDel(&substr); ajFileClose(&outfile); ajStrDel(&taskstr); ajStrDelarray(&task); ajSeqallDel(&sequence); ajSeqDel(&seq); ajRangeDel(&included_region); ajRangeDel(&target); ajRangeDel(&excluded_region); ajRangeDel(&product_size_range); ajRangeDel(&internal_oligo_excluded_region); ajStrDel(&left_input); ajStrDel(&right_input); ajStrDel(&internal_oligo_input); AJFREE(pipeto); AJFREE(pipefrom); ajFileClose(&mispriming_library); embExit(); return 0; }
int main(int argc, char **argv) { ajint famn = 0; /* Counter for the families. */ ajint nset = 0; /* No. entries in family. */ ajint last_nodeid = 0; /* SCOP Sunid of last family that was processed. */ AjPStr last_node = NULL; /* Last family that was processed. */ AjPStr exec = NULL; /* The UNIX command line to be executed. */ AjPStr out = NULL; /* Name of stamp alignment file. */ AjPStr align = NULL; /* Name of sequence alignment file. */ AjPStr alignc = NULL; /* Name of structure alignment file. */ AjPStr log = NULL; /* Name of STAMP log file. */ AjPStr dom = NULL; /* Name of file containing single domain. */ AjPStr set = NULL; /* Name of file containing set of domains. */ AjPStr scan = NULL; /* Name of temp. file used by STAMP. */ AjPStr sort = NULL; /* Name of temp. file used by STAMP. */ AjPStr name = NULL; /* Base name of STAMP temp files. */ AjPStr pdbnames = NULL; /* Names of domain pdb files to be passed to TCOFFEEE. */ AjPDir pdb = NULL; /* Path of domain coordinate files (pdb format input). */ AjPDirout daf = NULL; /* Path of sequence alignment files for output. */ AjPDirout super = NULL; /* Path of structure alignment files for output. */ AjPDirout singlets = NULL; /* Path of FASTA singlet sequence files for output. */ AjPStr temp1 = NULL; /* A temporary string. */ AjPFile dcfin = NULL; /* File pointer for original Escop.dat file.*/ AjPFile domf = NULL; /* File pointer for single domain file. */ AjPFile setf = NULL; /* File pointer for domain set file. */ AjPFile logf = NULL; /* Log file. */ AjPDomain domain = NULL; /* Pointer to domain structure. */ AjPDomain prevdomain = NULL; /* Pointer to previous domain structure. */ ajint type = 0; /* Type of domain (ajSCOP or ajCATH) in the DCF file. */ AjPStr *node = NULL; /* Node of alignment . */ ajint noden = 0; /*1: Class (SCOP), 2: Fold (SCOP) etc, see ACD file. */ AjPStr *mode = NULL; /* Mode of operation from acd*/ ajint moden = 0; /* Program mode, 1: MODE_STAMP, 2: MODE_TCOFFEE (not yet implemented). */ AjBool keepsinglets= ajFalse; /*Whether to retain sequences of singlet families and write them to an output file. */ AjPStr temp = NULL; /* A temporary string. */ AjPStr cmd = NULL; /* The command line to execute t-coffee. */ /* Initialise strings etc*/ last_node = ajStrNew(); exec = ajStrNew(); out = ajStrNew(); align = ajStrNew(); alignc = ajStrNew(); log = ajStrNew(); dom = ajStrNew(); set = ajStrNew(); scan = ajStrNew(); sort = ajStrNew(); name = ajStrNew(); temp = ajStrNew(); temp1 = ajStrNew(); cmd = ajStrNew(); pdbnames = ajStrNew(); /* Read data from acd. */ embInitPV("domainalign",argc,argv,"DOMALIGN",VERSION); dcfin = ajAcdGetInfile("dcfinfile"); pdb = ajAcdGetDirectory("pdbdir"); daf = ajAcdGetOutdir("dafoutdir"); super = ajAcdGetOutdir("superoutdir"); singlets = ajAcdGetOutdir("singletsoutdir"); node = ajAcdGetList("node"); mode = ajAcdGetList("mode"); keepsinglets = ajAcdGetToggle("keepsinglets"); logf = ajAcdGetOutfile("logfile"); /* Convert the selected node and mode to an integer. */ if(!(ajStrToInt(node[0], &noden))) ajFatal("Could not parse ACD node option"); if(!(ajStrToInt(mode[0], &moden))) ajFatal("Could not parse ACD node option"); /* Initialise random number generator for naming of temp. files. */ ajRandomSeed(); ajFilenameSetTempname(&name); /* Create names for temp. files. */ ajStrAssignS(&log, name); ajStrAppendC(&log, ".log"); ajStrAssignS(&dom, name); ajStrAppendC(&dom, ".dom"); ajStrAssignS(&set, name); ajStrAppendC(&set, ".set"); ajStrAssignS(&scan, name); ajStrAppendC(&scan, ".scan"); ajStrAssignS(&sort, name); ajStrAppendC(&sort, ".sort"); ajStrAssignS(&out, name); ajStrAppendC(&out, ".out"); /* Initialise last_node with something that is not in SCOP. */ ajStrAssignC(&last_node,"!!!!!"); /* Open STAMP domain set file. */ if(moden == MODE_STAMP) { if(!(setf=ajFileNewOutNameS(set))) ajFatal("Could not open domain set file\n"); } /* Get domain type. */ type = ajDomainDCFType(dcfin); /* Start of main application loop. */ while((domain=(ajDomainReadCNew(dcfin, "*", type)))) { /* A new family. */ if(((domain->Type == ajSCOP) && (((noden==1) && (last_nodeid != domain->Scop->Sunid_Class)) || ((noden==2) && (last_nodeid != domain->Scop->Sunid_Fold)) || ((noden==3) && (last_nodeid != domain->Scop->Sunid_Superfamily))|| ((noden==4) && (last_nodeid != domain->Scop->Sunid_Family)))) || ((domain->Type == ajCATH) && (((noden==5) && (last_nodeid != domain->Cath->Class_Id)) || ((noden==6) && (last_nodeid != domain->Cath->Arch_Id)) || ((noden==7) && (last_nodeid != domain->Cath->Topology_Id)) || ((noden==8) && (last_nodeid != domain->Cath->Superfamily_Id)) || ((noden==9) && (last_nodeid != domain->Cath->Family_Id))))) { /* If we have done the first family. */ if(famn) { /* Create the output file for the alignment - the name will be the same as the Sunid for the DOMAIN family. */ domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc); if(moden == MODE_STAMP) { /* Close domain set file. */ ajFileClose(&setf); /* Call STAMP. */ /* Family with 2 or more entries. */ if(nset > 1) { domainalign_stamp(prevdomain, domain, daf, super, singlets, align, alignc, dom, name, set, scan, sort, log, out, keepsinglets, moden, noden, nset, logf); } else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); /* Open STAMP domain set file. */ if(!(setf=ajFileNewOutNameS(set))) ajFatal("Could not open domain set file\n"); } else { /* Call TCOFEE. */ if(nset > 1) domainalign_tcoffee(prevdomain, out, align, alignc, pdbnames, noden, logf); else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } /* Set the number of members of the new family to zero. */ nset = 0; /* Clear TCOFFEE argument. */ ajStrSetClear(&pdbnames); } /* Open, write and close STAMP domain file. */ if(moden == MODE_STAMP) { if(!(domf=ajFileNewOutNameS(dom))) ajFatal("Could not open domain file\n"); ajStrAssignS(&temp, ajDomainGetId(domain)); ajStrFmtLower(&temp); ajFmtPrintF(domf, "%S %S { ALL }\n", temp, temp); ajFileClose(&domf); } /* Copy current family name to last_node. */ domainalign_writelast(domain, noden, &last_node, &last_nodeid); /* Copy current domain pointer to prevdomain. */ ajDomainDel(&prevdomain); prevdomain=NULL; ajDomainCopy(&prevdomain, domain); /* Increment family counter. */ famn++; } ajStrAssignS(&temp, ajDomainGetId(domain)); ajStrFmtLower(&temp); /* Write STAMP domain set file. */ if(moden == MODE_STAMP) ajFmtPrintF(setf, "%S %S { ALL }\n", temp, temp); /* Write TCOFFEE argument. */ else { ajStrAppendS(&pdbnames, ajDirGetPath(pdb)); ajStrAppendS(&pdbnames, temp); ajStrAppendC(&pdbnames, "."); ajStrAppendS(&pdbnames, ajDirGetExt(pdb)); ajStrAppendC(&pdbnames, " "); } ajDomainDel(&domain); /* Increment number of members in family. */ nset++; } /* End of main application loop. */ domain=prevdomain; ajFmtPrint("\nProcessing node %d\n", last_nodeid); /* Create the output file for the alignment - the name will be the same as the Sunid for the DOMAIN family. */ domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc); /* Code to process last family. */ if(moden == MODE_STAMP) { /*Close domain set file. */ ajFileClose(&setf); /* ajFmtPrint("\n***** SECOND CALL\n");. */ if(nset > 1) { domainalign_stamp(prevdomain, domain, daf, super, singlets, align, alignc, dom, name, set, scan, sort, log, out, keepsinglets, moden, noden, nset, logf); } else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } else { /* Call TCOFEE. */ if(nset > 1) domainalign_tcoffee(prevdomain, out, align, alignc, pdbnames, noden, logf); else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } /* Remove all temporary files. */ ajSysFileUnlinkS(log); ajSysFileUnlinkS(dom); ajSysFileUnlinkS(set); ajSysFileUnlinkS(scan); ajSysFileUnlinkS(sort); ajSysFileUnlinkS(out); ajStrAssignS(&temp, name); ajStrAppendC(&temp, ".mat"); ajSysFileUnlinkS(temp); /* Tidy up*/ ajDomainDel(&domain); ajFileClose(&dcfin); ajStrDel(&last_node); ajStrDel(&exec); ajStrDel(&log); ajStrDel(&dom); ajStrDel(&set); ajStrDel(&scan); ajStrDel(&sort); ajStrDel(&name); ajStrDel(&out); ajStrDel(&align); ajStrDel(&alignc); ajStrDel(&pdbnames); ajDirDel(&pdb); ajDiroutDel(&daf); ajDiroutDel(&super); ajDiroutDel(&singlets); ajStrDel(&temp); ajStrDel(&temp1); ajStrDel(&node[0]); AJFREE(node); ajStrDel(&mode[0]); AJFREE(mode); ajFileClose(&logf); ajExit(); return 0; }
void getorf_FindORFs(const AjPSeq seq, ajint len, const AjPTrn trnTable, ajuint minsize, ajuint maxsize, AjPSeqout seqout, AjBool sense, AjBool circular, ajint find, ajint *orf_no, AjBool methionine, ajint around, ORFrec *record) { AjBool ORF[3]; /* true if found an ORF */ AjBool LASTORF[3]; /* true if hit the end of an ORF past the end on the genome in this frame */ AjBool GOTSTOP[3]; /* true if found a STOP in a circular genome's frame when find = P_STOP2STOP or N_STOP2STOP */ ajint start[3]; /* possible starting position of the three frames */ ajint pos; ajint codon; char aa; ajint frame; AjPStr newstr[3]; /* strings of the three frames of ORF sequences that we are growing */ AjPSeq pep = NULL; ajint i; ajint seqlen; const char *chrseq; seqlen = ajSeqGetLen(seq); chrseq = ajSeqGetSeqC(seq); /* initialise the ORF sequences */ newstr[0] = NULL; newstr[1] = NULL; newstr[2] = NULL; /* ** initialise flags for found the last ORF past the end of a circular ** genome */ LASTORF[0] = ajFalse; LASTORF[1] = ajFalse; LASTORF[2] = ajFalse; /* initialise flags for found at least one STOP codon in a frame */ GOTSTOP[0] = ajFalse; GOTSTOP[1] = ajFalse; GOTSTOP[2] = ajFalse; if (circular || find == P_START2STOP || find == N_START2STOP || find == AROUND_START) { ORF[0] = ajFalse; ORF[1] = ajFalse; ORF[2] = ajFalse; } else { /* ** assume already in a ORF so we get ORFs at the start of the ** sequence */ ORF[0] = ajTrue; ORF[1] = ajTrue; ORF[2] = ajTrue; start[0] = 0; start[1] = 1; start[2] = 2; } for (pos=0; pos<seqlen-2; pos++) { codon = ajTrnStartStopC(trnTable, &chrseq[pos], &aa); frame = pos % 3; ajDebug("len=%d, Pos=%d, Frame=%d start/stop=%d, aa=%c", len, pos, frame, codon, aa); /* don't want to find extra ORFs when already been round circ */ if (LASTORF[frame]) continue; if (find == P_STOP2STOP || find == N_STOP2STOP || find == AROUND_INIT_STOP || find == AROUND_END_STOP) { /* look for stop codon to begin reporting ORF */ /* note that there was at least one STOP in a circular genome */ if (codon == STOP) { GOTSTOP[frame] = ajTrue; } /* write details if a STOP is hit or the end of the sequence */ if (codon == STOP || pos >= seqlen-5) { /* ** End of the sequence? If so, append any ** last codon to the sequence - otherwise, ignore the STOP ** codon */ if (codon != STOP) getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); /* Already have a sequence to write out? */ if (ORF[frame]) { if (ajStrGetLen(newstr[frame]) >= minsize && ajStrGetLen(newstr[frame]) <= maxsize) { /* create a new sequence */ if (codon == STOP) getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, start[frame], pos-1, newstr[frame], seqout, around); else getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, start[frame], pos+2, newstr[frame], seqout, around); } ajStrSetClear(&newstr[frame]); } /* ** if its a circular genome and the STOP codon hits past ** the end of the genome in all frames, then break */ if (circular && pos >= len) { ORF[frame] = ajFalse; /* past the end of the genome */ LASTORF[frame] = ajTrue; /* finished getting ORFs */ if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break; } else { /* ** hit a STOP, therefore a potential ORF to write ** out next time, even if the genome is circular */ ORF[frame] = ajTrue; start[frame] = pos+3; /* next start of the ORF */ } } else if (ORF[frame]) /* append sequence to newstr if in an ORF */ getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); } else { /* Look for start: P_START2STOP N_START2STOP AROUND_START */ if (codon == START && !ORF[frame]) { /* not in a ORF already and found a START */ if (pos < len) { /* ** reset the newstr to zero length to enable ** storing the ORF for this */ ajStrSetClear(&newstr[frame]); ORF[frame] = ajTrue; /* now in an ORF */ start[frame] = pos; /* start of the ORF for this frame */ if (methionine) getorf_AppORF(find, &newstr[frame], chrseq, pos, 'M'); else getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); } } else if (codon == STOP) { /* hit a STOP */ /* Already have a sequence to write out? */ if (ORF[frame]) { ORF[frame] = ajFalse; /* not in an ORF */ if (ajStrGetLen(newstr[frame]) >= minsize && ajStrGetLen(newstr[frame]) <= maxsize) { /* create a new sequence */ getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, start[frame], pos-1, newstr[frame], seqout, around); } } /* ** if a circular genome and hit the STOP past ** the end of the genome in all frames, then break */ if (circular && pos >= len) { LASTORF[frame] = ajTrue; /* finished getting ORFs */ if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break; } ajStrSetClear(&newstr[frame]); } else if (pos >= seqlen-5) { /* hit the end of the sequence without a stop */ /* Already have a sequence to write out? */ if (ORF[frame]) { ORF[frame] = ajFalse; /* not in an ORF */ /* ** End of the sequence? If so, append any ** last codon to the sequence - otherwise, ignore the ** STOP codon */ if (pos >= seqlen-5 && pos < seqlen-2) getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); if (ajStrGetLen(newstr[frame]) >= minsize && ajStrGetLen(newstr[frame]) <= maxsize) { /* create a new sequence */ getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, start[frame], pos+2, newstr[frame], seqout, around); } } /* ** if a circular genome and hit the STOP past ** the end of the genome in all frames, then break */ if (circular && pos >= len) { LASTORF[frame] = ajTrue; /* finished getting ORFs */ if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break; } ajStrSetClear(&newstr[frame]); } else if (ORF[frame]) getorf_AppORF(find, &newstr[frame], chrseq, pos, aa); } } /* ** Currently miss reporting a STOP-to-STOP ORF that is ** the full length of a circular genome when there are no STOP codons in ** that frame */ if ((find == P_STOP2STOP || find == N_STOP2STOP) && circular) { if (!GOTSTOP[0]) { /* translate frame 1 into pep */ pep = ajTrnSeqOrig(trnTable, seq, 1); if (ajSeqGetLen(pep) >= minsize && ajSeqGetLen(pep) <= maxsize) getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, 0, seqlen-1, ajSeqGetSeqS(pep), seqout, around); ajSeqDel(&pep); } if (!GOTSTOP[1]) { /* translate frame 2 into pep */ pep = ajTrnSeqOrig(trnTable, seq, 2); if (ajSeqGetLen(pep) >= minsize && ajSeqGetLen(pep) <= maxsize) getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, 1, seqlen-1, ajSeqGetSeqS(pep), seqout, around); ajSeqDel(&pep); } if (!GOTSTOP[2]) { /* translate frame 3 into pep */ pep = ajTrnSeqOrig(trnTable, seq, 3); if (ajSeqGetLen(pep) >= minsize && ajSeqGetLen(pep) >= maxsize) getorf_WriteORF(seq, len, seqlen, sense, find, orf_no, 2, seqlen-1, ajSeqGetSeqS(pep), seqout, around); ajSeqDel(&pep); } } for (i=0;i<3;++i) ajStrDel(&newstr[i]); return; }