static void splitter_ProcessChunk (AjPSeqout seqout, const AjPSeq seq, ajuint start, ajuint end, const AjPStr name, AjBool feature) { AjPStr str; AjPFeattable new_feattable = NULL; AjPSeq subseq; ajDebug("splitter_ProcessChunk %d..%d '%S' %B\n", start, end, name, feature); str = ajStrNew(); subseq = ajSeqNew(); new_feattable = ajFeattableNew(name); subseq->Fttable = new_feattable; ajFeattableSetNuc(new_feattable); ajStrAssignSubC(&str,ajSeqGetSeqC(seq),start,end); ajSeqAssignSeqS(subseq,str); if(feature) splitter_AddSubSeqFeat(subseq->Fttable,start,end,seq); ajSeqAssignNameS(subseq, name); splitter_write(seqout,subseq,seq); ajStrDel(&str); ajSeqDel(&subseq); return; }
static void extractfeat_WriteOut(AjPSeqout seqout, AjPStr *featstr, AjBool compall, AjBool sense, ajint firstpos, ajint lastpos, ajint before, ajint after, const AjPSeq seq, AjBool remote, const AjPStr type, AjBool featinname, const AjPStr describestr) { AjPSeq newseq = NULL; AjPStr name = NULL; /* new name of the sequence */ AjPStr value = NULL; /* string value of start or end position */ AjPStr desc = NULL; /* sequence description */ AjBool forward = sense; if(compall) forward = ajFalse; ajDebug("WriteOut %S_%d_%d [%S] %d all:%B fwd:%B remote:%B\n", ajSeqGetNameS(seq), firstpos+1, lastpos+1, type, ajStrGetLen(*featstr), compall, sense, remote); /* see if there is a sequence to be written out */ if(!ajStrGetLen(*featstr)) { ajWarn("feature %S_%d_%d [%S] " "not written out because it has zero length\n", ajSeqGetNameS(seq), firstpos+1, lastpos+1, type); ajDebug("feature not written out because it has length=0 " "(probably first time round)\n"); return; } /* see if must abort because there were Remote IDs in the features */ if(remote) { ajWarn("feature not written out because it has Remote IDs\n"); ajDebug("feature not written out because it has Remote IDs\n"); return; } ajDebug("feature = %d bases\n", ajStrGetLen(*featstr)); /* featstr may be edited, so it is a AjPStr* */ extractfeat_BeforeAfter (seq, featstr, firstpos, lastpos, before, after, forward); ajDebug("feature+before/after = %d bases\n", ajStrGetLen(*featstr)); /* set the extracted sequence */ newseq = ajSeqNew(); ajSeqAssignSeqS(newseq, *featstr); /* create a nice name for the new sequence */ name = ajStrNew(); ajStrAppendS(&name, ajSeqGetNameS(seq)); ajStrAppendC(&name, "_"); value = ajStrNew(); ajStrFromInt(&value, firstpos+1); ajStrAppendS(&name, value); ajStrAppendC(&name, "_"); ajStrFromInt(&value, lastpos+1); ajStrAppendS(&name, value); /* add the type of feature to the name, if required */ if(featinname) { ajStrAppendC(&name, "_"); ajStrAppendS(&name, type); } ajSeqAssignNameS(newseq, name); /* set the sequence description with the 'type' added */ desc = ajStrNew(); ajStrAppendC(&desc, "["); ajStrAppendS(&desc, type); ajStrAppendC(&desc, "] "); if(ajStrGetLen(describestr)) ajStrAppendS(&desc, describestr); ajStrAppendS(&desc, ajSeqGetDescS(seq)); ajSeqAssignDescS(newseq, desc); /* set the type */ if(ajSeqIsNuc(seq)) ajSeqSetNuc(newseq); else ajSeqSetProt(newseq); /* write the new sequence */ ajSeqoutWriteSeq(seqout, newseq); ajSeqDel(&newseq); ajStrDel(&name); ajStrDel(&value); ajStrDel(&desc); return; }
int main(int argc, char **argv) { AjPSeqall nucseq; /* input nucleic sequences */ AjPSeqset protseq; /* input aligned protein sequences */ AjPSeqout seqout; AjPSeq nseq; /* next nucleic sequence to align */ const AjPSeq pseq; /* next protein sequence use in alignment */ AjPTrn trnTable; AjPSeq pep; /* translation of nseq */ AjPStr tablelist; ajint table; AjPSeqset outseqset; /* set of aligned nucleic sequences */ ajint proteinseqcount = 0; AjPStr degapstr = NULL; /* used to check if it matches with START removed */ AjPStr degapstr2 = NULL; AjPStr codon = NULL; /* holds temporary codon to check if is START */ char aa; /* translated putative START codon */ ajint type; /* returned type of the putative START codon */ /* start position of guide protein in translation */ ajlong pos = 0; AjPSeq newseq = NULL; /* output aligned nucleic sequence */ ajint frame; embInit("tranalign", argc, argv); nucseq = ajAcdGetSeqall("asequence"); protseq = ajAcdGetSeqset("bsequence"); tablelist = ajAcdGetListSingle("table"); seqout = ajAcdGetSeqoutset("outseq"); outseqset = ajSeqsetNew(); degapstr = ajStrNew(); /* initialise the translation table */ ajStrToInt(tablelist, &table); trnTable = ajTrnNewI(table); ajSeqsetFill(protseq); while(ajSeqallNext(nucseq, &nseq)) { if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL) ajErr("No guide protein sequence available for " "nucleic sequence %S", ajSeqGetNameS(nseq)); ajDebug("Aligning %S and %S\n", ajSeqGetNameS(nseq), ajSeqGetNameS(pseq)); /* get copy of pseq string with no gaps */ ajStrAssignS(°apstr, ajSeqGetSeqS(pseq)); ajStrRemoveGap(°apstr); /* ** for each translation frame look for subset of pep that ** matches pseq */ for(frame = 1; frame <4; frame++) { ajDebug("trying frame %d\n", frame); pep = ajTrnSeqOrig(trnTable, nseq, frame); degapstr2 = ajStrNew(); ajStrAssignRef(°apstr2, degapstr); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr); /* ** we might have a START codon that should be translated as 'M' ** we need to check if there is a match after a possible START ** codon */ if(pos == -1 && ajStrGetLen(degapstr) > 1 && (ajStrGetPtr(degapstr)[0] == 'M' || ajStrGetPtr(degapstr)[0] == 'm')) { /* see if pep minus the first character is a match */ ajStrCutStart(°apstr2, 1); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); /* ** pos is >= 1 if we have a match that is after the first ** residue */ if(pos >= 1) { /* point back at the putative START Methionine */ pos--; /* test if first codon is a START */ codon = ajStrNew(); ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), (pos*3)+frame-1, (pos*3)+frame+2); type = ajTrnCodonstrTypeS(trnTable, codon, &aa); if(type != 1) { /* first codon is not a valid START, force a mismatch */ pos = -1; } ajStrDel(&codon); } else { /* force 'pos == 0' to be treated as a mismatch */ pos = -1; } } ajStrDel(°apstr2); ajSeqDel(&pep); if(pos != -1) break; } if(pos == -1) ajErr("Guide protein sequence %S not found in nucleic sequence %S", ajSeqGetNameS(pseq), ajSeqGetNameS(nseq)); else { ajDebug("got a match with frame=%d\n", frame); /* extract the coding region of nseq with gaps */ newseq = ajSeqNew(); ajSeqSetNuc(newseq); ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq)); ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq)); tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1); /* output the gapped nucleic sequence */ ajSeqsetApp(outseqset, newseq); ajSeqDel(&newseq); } ajStrRemoveWhiteExcess(°apstr); } ajSeqoutWriteSet(seqout, outseqset); ajSeqoutClose(seqout); ajTrnDel(&trnTable); ajSeqsetDel(&outseqset); ajStrDel(°apstr); ajStrDel(°apstr2); ajSeqallDel(&nucseq); ajSeqDel(&nseq); ajSeqoutDel(&seqout); ajSeqsetDel(&protseq); ajStrDel(&tablelist); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset = NULL; AjPStr refseq; /* input name/number of reference sequence */ ajint nrefseq; /* numeric reference sequence */ AjPMatrix matrix; /* scoring matrix structure */ ajint **sub; /* integer scoring matrix */ AjPSeqCvt cvt = 0; /* conversion table for scoring matrix */ float identity; ajint ident; float fplural; AjPStr cons; AjPSeq consensus; const AjPSeq ref; const AjPSeq seq; ajuint i; AjBool html; AjBool doheader; AjBool dousa; AjBool doname; AjBool doseqlength; AjBool doalignlength; AjBool dogaps; AjBool dogapcount; AjBool doidcount; AjBool dosimcount; AjBool dodifcount; AjBool dochange; AjBool dodesc; AjBool dowt; ajint seqlength; ajint alignlength; ajint gaps; ajint gapcount; ajint idcount; ajint simcount; ajint difcount; float change; AjPFile outfile; const AjPStr usa; const AjPStr name; AjPStr altusa; /* default name when the real name is not known */ AjPStr altname; AjPStr xxx = NULL; embInit("infoalign", argc, argv); seqset = ajAcdGetSeqset("sequence"); refseq = ajAcdGetString("refseq"); matrix = ajAcdGetMatrix("matrix"); ajSeqsetFill(seqset); outfile = ajAcdGetOutfile("outfile"); html = ajAcdGetBoolean("html"); doheader = ajAcdGetBoolean("heading"); dousa = ajAcdGetBoolean("usa"); doname = ajAcdGetBoolean("name"); doseqlength = ajAcdGetBoolean("seqlength"); doalignlength = ajAcdGetBoolean("alignlength"); dogaps = ajAcdGetBoolean("gaps"); dogapcount = ajAcdGetBoolean("gapcount"); doidcount = ajAcdGetBoolean("idcount"); dosimcount = ajAcdGetBoolean("simcount"); dodifcount = ajAcdGetBoolean("diffcount"); dochange = ajAcdGetBoolean("change"); dodesc = ajAcdGetBoolean("description"); dowt = ajAcdGetBoolean("weight"); /* consensus parameters */ fplural = ajAcdGetFloat("plurality"); identity = ajAcdGetFloat("identity"); cons = ajStrNew(); consensus = ajSeqNew(); altusa = ajStrNewC("-"); altname = ajStrNewC("-"); /* get conversion table and scoring matrix */ cvt = ajMatrixGetCvt(matrix); sub = ajMatrixGetMatrix(matrix); /* get the number of the reference sequence */ nrefseq = infoalign_Getrefseq(refseq, seqset); /* change the % plurality to the fraction of absolute total weight */ fplural = ajSeqsetGetTotweight(seqset) * fplural / 100; /* ** change the % identity to the number of identical sequences at a ** position required for consensus */ ident = ajSeqsetGetSize(seqset) * (ajint)identity / 100; /* get the consensus sequence */ embConsCalc(seqset, matrix, ajSeqsetGetSize(seqset), ajSeqsetGetLen(seqset), fplural, 0.0, ident, ajFalse, &cons); ajSeqAssignSeqS(consensus, cons); ajSeqAssignNameS(consensus,(xxx=ajStrNewC("Consensus"))); /* get the reference sequence */ if(nrefseq == -1) ref = consensus; else ref = ajSeqsetGetseqSeq(seqset, nrefseq); /* start the HTML table */ if(html) ajFmtPrintF(outfile,"<table border cellpadding=4 bgcolor=" "\"#FFFFF0\">\n"); /* print the header information */ if(doheader) { /* start the HTML table title line and output the Name header */ if(html) ajFmtPrintF(outfile, "<tr>"); else ajFmtPrintF(outfile, "%s", "# "); if(dousa) { if(html) ajFmtPrintF(outfile, "<th>USA</th>"); else ajFmtPrintF(outfile, "%-16s", "USA"); } if(doname) { if(html) ajFmtPrintF(outfile, "<th>Name</th>"); else ajFmtPrintF(outfile, "%-12s", "Name"); } if(doseqlength) { if(html) ajFmtPrintF(outfile, "<th>Sequence Length</th>"); else ajFmtPrintF(outfile, "SeqLen\t"); } if(doalignlength) { if(html) ajFmtPrintF(outfile, "<th>Aligned Length</th>"); else ajFmtPrintF(outfile, "AlignLen\t"); } if(dogaps) { if(html) ajFmtPrintF(outfile, "<th>Gaps</th>"); else ajFmtPrintF(outfile, "Gaps\t"); } if(dogapcount) { if(html) ajFmtPrintF(outfile, "<th>Gap Length</th>"); else ajFmtPrintF(outfile, "GapLen\t"); } if(doidcount) { if(html) ajFmtPrintF(outfile, "<th>Identity</th>"); else ajFmtPrintF(outfile, "Ident\t"); } if(dosimcount) { if(html) ajFmtPrintF(outfile, "<th>Similarity</th>"); else ajFmtPrintF(outfile, "Similar\t"); } if(dodifcount) { if(html) ajFmtPrintF(outfile, "<th>Difference</th>"); else ajFmtPrintF(outfile, "Differ\t"); } if(dochange) { if(html) ajFmtPrintF(outfile, "<th>%% Change</th>"); else ajFmtPrintF(outfile, "%% Change\t"); } if(dowt) { if(html) ajFmtPrintF(outfile, "<th>Weight</th>"); else ajFmtPrintF(outfile, "Weight\t"); } if(dodesc) { if(html) ajFmtPrintF(outfile, "<th>Description</th>"); else ajFmtPrintF(outfile, "Description"); } /* end the HTML table title line */ if(html) ajFmtPrintF(outfile, "</tr>\n"); else ajFmtPrintF(outfile, "\n"); } for(i=0; i<ajSeqsetGetSize(seqset); i++) { seq = ajSeqsetGetseqSeq(seqset, i); /* get the usa ('-' if unknown) */ usa = ajSeqGetUsaS(seq); if(ajStrGetLen(usa) == 0) usa = altusa; /* get the name ('-' if unknown) */ name = ajSeqGetNameS(seq); if(ajStrGetLen(name) == 0) name = altname; /* get the stats from the comparison to the reference sequence */ infoalign_Compare(ref, seq, sub, cvt, &seqlength, &alignlength, &gaps, &gapcount, &idcount, &simcount, &difcount, &change); /* start table line */ if(html) ajFmtPrintF(outfile, "<tr>"); if(dousa) infoalign_OutputStr(outfile, usa, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doseqlength || doalignlength || doname), 18); if(doname) infoalign_OutputStr(outfile, name, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doseqlength || doalignlength), 14); if(doseqlength) infoalign_OutputInt(outfile, seqlength, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps || doalignlength)); if(doalignlength) infoalign_OutputInt(outfile, alignlength, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount || dogaps)); if(dogaps) infoalign_OutputInt(outfile, gaps, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount || dogapcount)); if(dogapcount) infoalign_OutputInt(outfile, gapcount, html, (dodesc || dowt || dochange || dodifcount || dosimcount || doidcount)); if(doidcount) infoalign_OutputInt(outfile, idcount, html, (dodesc || dowt || dochange || dodifcount || dosimcount)); if(dosimcount) infoalign_OutputInt(outfile, simcount, html, (dodesc || dowt || dochange || dodifcount)); if(dodifcount) infoalign_OutputInt(outfile, difcount, html, (dodesc || dowt || dochange)); if(dochange) infoalign_OutputFloat(outfile, change, html, (dodesc || dowt) ); if(dowt) infoalign_OutputFloat(outfile, ajSeqsetGetseqWeight(seqset,i), html, dodesc); if(dodesc) infoalign_OutputStr(outfile, ajSeqGetDescS(seq), html, ajFalse, NOLIMIT); /* end table line */ if(html) ajFmtPrintF(outfile, "</tr>\n"); else ajFmtPrintF(outfile, "\n"); } /* end the HTML table */ if(html) ajFmtPrintF(outfile, "</table>\n"); ajFileClose(&outfile); /* tidy up */ ajStrDel(&altusa); ajStrDel(&altname); ajStrDel(&xxx); ajSeqDel(&consensus); ajSeqsetDel(&seqset); ajStrDel(&refseq); ajMatrixDel(&matrix); ajStrDel(&cons); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqout outseq = NULL; AjPList list = NULL; AjPSeq seq = NULL; AjPStr insert = NULL; AjPStr seqstr = NULL; AjPStr* seqr = NULL; AjPFile data = NULL; ajint start = 0; ajint length = 0; ajint amount = 0; ajint scmax = 0; ajint extra = 0; embInit("makeprotseq", argc, argv); data = ajAcdGetInfile("pepstatsfile"); insert = ajAcdGetString("insert"); start = ajAcdGetInt("start"); length = ajAcdGetInt("length"); amount = ajAcdGetInt("amount"); outseq = ajAcdGetSeqoutall("outseq"); list = ajListstrNew(); /* this is checked by acd if(amount <=0 || length <= 0) ajFatal("Amount or length is 0 or less. " "Unable to create any sequences"); */ /* if insert, make sure sequence is large enough */ if(ajStrGetLen(insert)) { length -= ajStrGetLen(insert); /* start= start <= 1 ? 0 : --start; */ /* checked in acd */ start--; if(length <= 0) ajFatal("Sequence smaller than inserted part. " "Unable to create sequences."); } /* make the list of AjPStr to be used in sequence creation */ if(data) { ajDebug("Distribution datafile '%s' given checking type\n", ajFileGetPrintnameC(data)); seqstr = ajStrNew(); ajReadlineTrim(data,&seqstr); if(ajStrFindC(seqstr,"PEPSTATS") == 0) { makeprotseq_parse_pepstats(&list,data); } else { ajWarn("Not pepstats file. Making completely random sequences."); makeprotseq_default_chars(&list); } ajStrDel(&seqstr); ajFileClose(&data); } else makeprotseq_default_chars(&list); /* if insert, make sure type is correct */ /* typecheking code is not working, uncomment and test after it is if(ajStrGetLen(insert)) { seqstr = ajStrNew(); if(prot) ajStrAssignC(&seqstr,"pureprotein"); if(!ajSeqTypeCheckS(&insert,seqstr)) ajFatal("Insert not the same sequence type as sequence itself."); ajStrDel(&seqstr); } */ /* array allows fast creation of a sequences */ scmax = (ajuint) ajListstrToarray(list,&seqr); if(!scmax) ajFatal("No strings in list. No characters to make the sequence."); ajDebug("Distribution array done.\nscmax '%d', extra '%d', first '%S'\n", scmax,extra,seqr[0]); ajRandomSeed(); while(amount-- > 0) { seqstr = makeprotseq_random_sequence(seqr,scmax,length); if(ajStrGetLen(insert)) ajStrInsertS(&seqstr,start,insert); ajStrFmtLower(&seqstr); seq = ajSeqNew(); ajSeqAssignSeqS(seq, seqstr); ajSeqSetProt(seq); ajSeqoutWriteSeq(outseq, seq); ajSeqDel(&seq); ajStrDel(&seqstr); } ajSeqoutClose(outseq); ajSeqoutDel(&outseq); ajListstrFreeData(&list); ajStrDel(&insert); AJFREE(seqr); embExit(); return 0; }
int main(ajint argc, char **argv) { AjPFile datafile; AjPFile outf = NULL; AjPSeqall seqall; AjPSeq ajseq = NULL; ajuint i; ajint verb; ajint window; ajint pt; ajint which; ajint weighted; ajint t = 0; ajint tc = 0; ajint mode; ajint min_seg; const AjPStr seqdes; float min_P; struct hept_pref *h; embInit("newcoils",argc,argv); window = ajAcdGetInt("window"); weighted = ajAcdGetInt("weighted"); verb = ajAcdGetInt("verb"); mode = ajAcdGetInt("mode"); min_P = ajAcdGetFloat("minp"); min_seg = ajAcdGetInt("minseg"); outf = ajAcdGetOutfile("outfile"); datafile = ajAcdGetDatafile("datafile"); seqall = ajAcdGetSeqall("sequence"); ajseq = ajSeqNew(); h = newcoils_read_matrix(datafile); if(verb) { for(i=0; i<strlen(NCAAs); ++i) if(NCAAs[i] != '_') { pt = (int)(NCAAs[i]-'A'); ajFmtPrintF(outf,"AA %c %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f " "%4.2f\n",NCAAs[i],h->m[pt][0],h->m[pt][1], h->m[pt][2],h->m[pt][3],h->m[pt][4], h->m[pt][5],h->m[pt][6]); } for(i=0; i<(ajuint)h->n; ++i) ajFmtPrintF(outf,"Window %4d %1d %f %f %f %f %f\n",h->f[i].win, h->f[i].w,h->f[i].m_cc,h->f[i].sd_cc,h->f[i].m_g, h->f[i].sd_g,h->f[i].sc); } /* See if there is a file for the chosen window length/weight scheme */ which = -1; for(i=0; i<(ajuint)h->n; ++i) { if((h->f[i].win == window) && (h->f[i].w == weighted)) { /* match */ if(verb) ajFmtPrintF(outf,"Found fitting data for win %4d w %d\n", window,weighted); which = i; } } while(ajSeqallNext(seqall, &ajseq)) { seqdes = ajSeqGetDescS(ajseq); newcoils_pred_coils(outf,ajSeqGetSeqC(ajseq),ajSeqGetNameC(ajseq), seqdes,h,window, which,weighted,mode,min_P,&t,&tc,min_seg); } if (outf) ajFileClose(&outf); ajSeqDel(&ajseq); embExit(); return 0; }
/* @prog seqnr ************************************************************** ** ** Removes redundancy from DHF files (domain hits files) or other files of ** sequences. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variable declarations */ AjPList in = NULL; /* Names of domain hits files (input). */ AjPStr inname = NULL; /* Full name of the current DHF file. */ AjPFile inf = NULL; /* Current DHF file. */ EmbPHitlist infhits = NULL; /* Hitlist from DHF file */ AjBool dosing = ajFalse; /* Filter using singlet sequences. */ AjPDir singlets = NULL; /* Singlets (input). */ AjBool dosets = ajFalse; /* Filter using sets of sequences. */ AjPDir insets = NULL; /* Sets (input). */ AjPStr mode = NULL; /* Mode of operation */ ajint moden = 0; /* Mode 1: single threshold for redundancy removal, 2: lower and upper thresholds for redundancy removal. */ float thresh = 0.0; /* Threshold for non-redundancy. */ float threshlow = 0.0; /* Threshold (lower limit). */ float threshup = 0.0; /* Threshold (upper limit). */ AjPMatrixf matrix = NULL; /* Substitution matrix. */ float gapopen = 0.0; /* Gap insertion penalty. */ float gapextend = 0.0; /* Gap extension penalty. */ AjPDirout out = NULL; /* Domain hits files (output). */ AjPFile outf = NULL; /* Current DHF file (output). */ AjBool dored = ajFalse; /* True if redundant hits are output. */ AjPDirout outred = NULL; /* DHF files for redundant hits (output).*/ AjPFile redf = NULL; /* Current DHF file redundancy (output). */ AjPStr outname = NULL; /* Name of output file (re-used). */ AjPFile logf = NULL; /* Log file pointer. */ AjBool ok = ajFalse; /* Housekeeping. */ AjPSeqset seqset = NULL; /* Seqset (re-used). */ AjPSeqin seqin = NULL; /* Seqin (re-used). */ AjPList seq_list = NULL; /* Main list for redundancy removal. */ EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */ ajint seq_siz = 0; /* Size of seq_list. */ AjPUint keep = NULL; /* 1: Sequence in seq_list was classed as non-redundant, 0: redundant. */ AjPUint nokeep = NULL; /* Inversion of keep array. */ ajint nseqnr = 0; /* No. non-redundant seqs. in seq_list. */ AjPStr filtername= NULL; /* Name of filter file (re-used). */ AjPFile filterf = NULL; /* Current filter file. */ EmbPHitlist hitlist = NULL; /* Hitlist from input file (re-used). */ AjPScopalg scopalg = NULL; /* Scopalg from input file. */ ajint x = 0; /* Housekeeping. */ /* Read data from acd. */ embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION); in = ajAcdGetDirlist("dhfinpath"); dosing = ajAcdGetToggle("dosing"); singlets = ajAcdGetDirectory("singletsdir"); dosets = ajAcdGetToggle("dosets"); insets = ajAcdGetDirectory("insetsdir"); mode = ajAcdGetListSingle("mode"); thresh = ajAcdGetFloat("thresh"); threshlow = ajAcdGetFloat("threshlow"); threshup = ajAcdGetFloat("threshup"); matrix = ajAcdGetMatrixf("matrix"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); out = ajAcdGetOutdir("dhfoutdir"); dored = ajAcdGetToggle("dored"); outred = ajAcdGetOutdir("redoutdir"); logf = ajAcdGetOutfile("logfile"); /* Housekeeping. */ filtername = ajStrNew(); outname = ajStrNew(); if(!(ajStrToInt(mode, &moden))) ajFatal("Could not parse ACD node option"); /* Process each DHF (input) in turn. */ while(ajListPop(in,(void **)&inname)) { ajFmtPrint("Processing %S\n", inname); ajFmtPrintF(logf, "//\n%S\n", inname); seq_list = ajListNew(); keep = ajUintNew(); nokeep = ajUintNew(); /**********************************/ /* Open DHF file */ /**********************************/ if((inf = ajFileNewInNameS(inname)) == NULL) ajFatal("Could not open DHF file %S", inname); /* Read DHF file. */ ok = ajFalse; if(!(infhits = embHitlistReadFasta(inf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqsearch_psialigned"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(infhits->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&inf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty input file %S\n", inname); ajFmtPrintF(logf, "Empty input file %S\n", inname); if(infhits) embHitlistDel(&infhits); if(seqset) ajSeqsetDel(&seqset); if(seqin) ajSeqinDel(&seqin); continue; } /* 1. Create list of sequences from the main input directory.. */ if(infhits) { for(x=0; x<infhits->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } /**********************************/ /* Open singlets filter file */ /**********************************/ if(dosing) { /* Open singlets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(singlets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(singlets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DHF file %S", filtername); ajFmtPrint("Could not open singlets filter file %S", filtername); } else { /* Read DHF file. */ ok = ajFalse; if(!(hitlist = embHitlistReadFasta(filterf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(hitlist->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty singlets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty singlets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files). */ if(hitlist) { for(x=0; x<hitlist->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } embHitlistDel(&hitlist); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /**********************************/ /* Open sets filter file */ /**********************************/ if(dosets) { /* Open sets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(insets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(insets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DAF file %S", filtername); ajFmtPrint("Could not open sets filter file %S", filtername); } else { /* Read DAF file. */ ok = ajFalse; if(!(ajDmxScopalgRead(filterf, &scopalg))) { ajWarn("ajDmxScopalgRead call failed in seqnr"); ajFmtPrintF(logf, "ajDmxScopalgRead call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(scopalg->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty sets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty sets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files).. */ if(scopalg) { for(x=0; x<scopalg->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]); ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]); /* Remove gap char's & whitespace. */ ajStrRemoveGap(&seq_tmp->Seq->Seq); ajListPushAppend(seq_list,seq_tmp); } ajDmxScopalgDel(&scopalg); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /* 4. Identify redundant domains.. */ if(moden == 1) { if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, thresh, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } else { if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, threshlow, threshup, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } seq_siz = ajListGetLength(seq_list); for(x=0; x<seq_siz; x++) if(ajUintGet(keep, x) == 1) ajUintPut(&nokeep, x, 0); else ajUintPut(&nokeep, x, 1); /* Create output files. */ ajStrAssignS(&outname, inname); ajFilenameTrimPathExt(&outname); outf = ajFileNewOutNameDirS(outname, out); if(dored) redf = ajFileNewOutNameDirS(outname, outred); /* 5. Write non-redundant domains to main output directory. 6. If specified, write redundant domains to output directory. */ embHitlistWriteSubsetFasta(outf, infhits, keep); if(dored) embHitlistWriteSubsetFasta(redf, infhits, nokeep); embHitlistDel(&infhits); ajFileClose(&outf); ajFileClose(&redf); ajStrDel(&inname); while(ajListPop(seq_list, (void **) &seq_tmp)) { ajSeqDel(&seq_tmp->Seq); AJFREE(seq_tmp); } ajListFree(&seq_list); ajUintDel(&keep); ajUintDel(&nokeep); } /* Tidy up. */ ajListFree(&in); if(singlets) ajDirDel(&singlets); if(insets) ajDirDel(&insets); ajDiroutDel(&out); if(outred) ajDiroutDel(&outred); ajFileClose(&logf); ajMatrixfDel(&matrix); ajStrDel(&filtername); ajStrDel(&outname); ajStrDel(&mode); embExit(); return 0; }