static void prima_TwoSortscorepos(AjPList *pairlist) { PPair tmp = NULL; AjPList intlist = NULL; AjPList filist = NULL; PPair save = NULL; float score = 0.0; ajListSort(*pairlist,prima_Compare); intlist = ajListNew(); filist = ajListNew(); score = (float) -1.0; while(ajListPop(*pairlist,(void **)&tmp)) { if(tmp->f->score == score) { ajListPush(intlist,(void *)tmp); continue; } save = tmp; ajListSort(intlist,prima_PosCompare); score = tmp->f->score; prima_RevSort(&intlist); while(ajListPop(intlist,(void **)&tmp)) ajListPushAppend(filist,(void *)tmp); ajListPush(intlist,(void *)save); } ajListSort(intlist,prima_PosCompare); prima_RevSort(&intlist); while(ajListPop(intlist,(void **)&tmp)) ajListPushAppend(filist,(void *)tmp); ajListFree(&intlist); ajListFree(pairlist); *pairlist = filist; return; }
static void prima_RevSort(AjPList *alist) { PPair tmp = NULL; AjPList intlist = NULL; AjPList filist = NULL; PPair save = NULL; ajint pos = -1; intlist = ajListNew(); filist = ajListNew(); pos = -1; while(ajListPop(*alist,(void **)&tmp)) { if(tmp->f->start+tmp->f->primerlen == pos) { ajListPush(intlist,(void *)tmp); continue; } save = tmp; ajListSort(intlist,prima_PosEndCompare); pos = tmp->f->start+tmp->f->primerlen; while(ajListPop(intlist,(void **)&tmp)) ajListPushAppend(filist,(void *)tmp); ajListPush(intlist,(void *)save); } ajListSort(intlist,prima_PosEndCompare); while(ajListPop(intlist,(void **)&tmp)) ajListPushAppend(filist,(void *)tmp); ajListFree(&intlist); ajListFree(alist); *alist = filist; return; }
static void silent_fmt_hits(AjPList hits, AjPFeattable feat, AjBool silent, AjBool rev) { PSilent res; AjPFeature sf = NULL; AjPStr tmpFeatStr = NULL; ajListSort(hits,silent_basecompare); while(ajListPop(hits,(void **)&res)) { if (rev) sf = ajFeatNewIIRev(feat, res->match, res->match+ajStrGetLen(res->site)-1); else sf = ajFeatNewII(feat, res->match, res->match+ajStrGetLen(res->site)-1); if (silent) { ajFmtPrintS(&tmpFeatStr, "*silent Yes"); ajFeatTagAdd (sf, NULL, tmpFeatStr); } ajFmtPrintS(&tmpFeatStr, "*enzyme %S", res->code); ajFeatTagAdd (sf, NULL, tmpFeatStr); ajFmtPrintS(&tmpFeatStr, "*rspattern %S", res->site); ajFeatTagAdd (sf, NULL, tmpFeatStr); ajFmtPrintS(&tmpFeatStr, "*baseposn %d", res->base); ajFeatTagAdd (sf, NULL, tmpFeatStr); ajFmtPrintS(&tmpFeatStr, "*aa %S.%S", res->seqaa, res->reaa); ajFeatTagAdd (sf, NULL, tmpFeatStr); ajFmtPrintS(&tmpFeatStr, "*mutation %c->%c", res->obase,res->nbase); ajFeatTagAdd (sf, NULL, tmpFeatStr); ajStrDel(&res->code); ajStrDel(&res->site); ajStrDel(&res->seqaa); ajStrDel(&res->reaa); AJFREE(res); } ajStrDel(&tmpFeatStr); return; }
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable, AjBool html, const AjPStr enzymes, AjBool blunt, AjBool sticky, ajuint sitelen, AjBool commercial, AjBool ambiguity, AjBool limit, const AjPTable retable) { /* for iterating over hittable */ PValue value; void **keyarray = NULL; /* array for table */ void **valarray = NULL; /* array for table */ ajint i; /* list of enzymes that cut */ AjPList cutlist; AjIList citer; /* iterator for cutlist */ AjPStr cutname = NULL; AjBool found; /* for parsing value->iso string */ AjPStrTok tok; char tokens[] = " ,"; AjPStr code = NULL; const char *p; /* for reading in enzymes names */ AjPFile enzfile = NULL; AjPStr *ea; ajint ne; /* number of enzymes */ AjBool isall = ajTrue; /* list of enzymes that don't cut */ AjPList nocutlist; AjIList niter; /* iterator for nocutlist */ AjPStr nocutname = NULL; /* count of rejected enzymes not matching criteria */ ajint rejected_count = 0; EmbPPatRestrict enz; /* for renaming preferred isoschizomers */ AjPList newlist; /* ** ** Make a list of enzymes('cutlist') that hit ** including the isoschizomer names ** */ ajDebug("Make a list of all enzymes that cut\n"); cutlist = ajListstrNew(); nocutlist = ajListstrNew(); ajTableToarrayKeysValues(hittable, &keyarray, &valarray); for(i = 0; keyarray[i]; i++) { value = (PValue) valarray[i]; cutname = ajStrNew(); ajStrAssignRef(&cutname, keyarray[i]); ajListstrPushAppend(cutlist, cutname); /* Add to cutlist all isoschizomers of enzymes that cut */ ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n"); /* start token to parse isoschizomers names */ tok = ajStrTokenNewC(value->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { cutname = ajStrNew(); ajStrAssignS(&cutname, code); ajListstrPushAppend(cutlist, cutname); } ajStrTokenDel(&tok); } ajStrDel(&code); AJFREE(keyarray); AJFREE(valarray); /* ** Read in list of enzymes ('nocutlist') - either all or ** the input enzyme list. ** Exclude those that don't match the selection criteria - count these. */ ajDebug("Read in a list of all input enzyme names\n"); ne = 0; if(!enzymes) isall = ajTrue; else { /* get input list of enzymes into ea[] */ ne = ajArrCommaList(enzymes, &ea); if(ajStrMatchCaseC(ea[0], "all")) isall = ajTrue; else { isall = ajFalse; for(i=0; i<ne; ++i) ajStrRemoveWhite(&ea[i]); } } enzfile = ajDatafileNewInNameC(ENZDATA); /* push all enzyme names without the required criteria onto nocutlist */ enz = embPatRestrictNew(); while(!ajFileIsEof(enzfile)) { if(!embPatRestrictReadEntry(enz, enzfile)) continue; /* ** If user entered explicit enzyme list, then check to see if ** this is one of that explicit list */ if(!isall) { found = AJFALSE; for(i=0; i<ne; ++i) if(ajStrMatchCaseS(ea[i], enz->cod)) { found = AJTRUE; break; } if(!found) /* not in the explicit list */ continue; ajDebug("RE %S is in the input explicit list of REs\n", enz->cod); } /* ignore ncuts==0 as they are unknown */ if(!enz->ncuts) { /* number of cut positions */ ajDebug("RE %S has an unknown number of cut positions\n", enz->cod); continue; } ajDebug("RE %S has a known number of cut sites\n", enz->cod); if(enz->len < sitelen) { /* recognition site length */ ajDebug("RE %S does not have a long enough recognition site\n", enz->cod); rejected_count++; continue; } if(!blunt && enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is blunt\n", enz->cod); rejected_count++; continue; } if(!sticky && !enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is sticky\n", enz->cod); rejected_count++; continue; } /* commercially available enzymes have uppercase patterns */ p = ajStrGetPtr(enz->pat); /* ** The -commercial qualifier is only used if we are searching ** through 'all' of the REBASE database - if we have specified an ** explicit list of enzymes then they are searched for whether or ** not they are commercially available */ if((*p >= 'a' && *p <= 'z') && commercial && isall) { ajDebug("RE %S is not commercial\n", enz->cod); rejected_count++; continue; } if(!ambiguity && remap_Ambiguous(enz->pat)) { ajDebug("RE %S is ambiguous\n", enz->cod); rejected_count++; continue; } ajDebug("RE %S matches all required criteria\n", enz->cod); code = ajStrNew(); ajStrAssignS(&code, enz->cod); ajListstrPushAppend(nocutlist, code); } embPatRestrictDel(&enz); ajFileClose(&enzfile); for(i=0; i<ne; ++i) if(ea[i]) ajStrDel(&ea[i]); if(ne) AJFREE(ea); /* ** Change names of enzymes in the non-cutter list ** to that of preferred (prototype) ** enzyme name so that the isoschizomers of cutters ** will be removed from the ** non-cutter list in the next bit. ** Remove duplicate prototype names. */ if(limit) { newlist = ajListstrNew(); remap_RenamePreferred(nocutlist, retable, newlist); ajListstrFreeData(&nocutlist); nocutlist = newlist; ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel); } /* ** Iterate through the list of input enzymes removing those that are in ** the cutlist. */ ajDebug("Remove from the nocutlist all enzymes and isoschizomers " "that cut\n"); /* ** This steps down both lists at the same time, comparing names and ** iterating to the next name in whichever list whose name compares ** alphabetically before the other. Where a match is found, the ** nocutlist item is deleted. */ ajListSort(nocutlist, remap_cmpcase); ajListSort(cutlist, remap_cmpcase); citer = ajListIterNewread(cutlist); niter = ajListIterNew(nocutlist); /* while((cutname = (AjPStr)ajListIterGet(citer)) != NULL) ajDebug("dbg cutname = %S\n", cutname); */ nocutname = (AjPStr)ajListIterGet(niter); cutname = (AjPStr)ajListIterGet(citer); ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname); while(nocutname != NULL && cutname != NULL) { i = ajStrCmpCaseS(cutname, nocutname); ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname); ajDebug("ajStrCmpCase=%d\n", i); if(i == 0) { /* match - so remove from nocutlist */ ajDebug("ajListstrRemove %S\n", nocutname); ajListstrIterRemove(niter); nocutname = (AjPStr)ajListIterGet(niter); /* ** Don't increment the cutname list pointer here ** - there may be more than one entry in the nocutname ** list with the same name because we have converted ** isoschizomers to their preferred name */ /* cutname = (AjPStr)ajListIterGet(citer); */ } else if(i == -1) /* cutlist name sorts before nocutlist name */ cutname = (AjPStr)ajListIterGet(citer); else if(i == 1) /* nocutlist name sorts before cutlist name */ nocutname = (AjPStr)ajListIterGet(niter); } ajListIterDel(&citer); ajListIterDel(&niter); ajListstrFreeData(&cutlist); /* Print the resulting list of those that do not cut*/ ajDebug("Print out the list\n"); /* print the title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(html) ajFmtPrintF(outfile, "<PRE>"); /* ajListSort(nocutlist, ajStrVcmp);*/ niter = ajListIterNewread(nocutlist); i = 0; while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL) { ajFmtPrintF(outfile, "%-10S", nocutname); /* new line after every 7 names printed */ if(i++ == 7) { ajFmtPrintF(outfile, "\n"); i = 0; } } ajListIterDel(&niter); /* end the output */ ajFmtPrintF(outfile, "\n"); if(html) {ajFmtPrintF(outfile, "</PRE>\n");} /* ** Print the count of rejected enzymes ** N.B. This is the count of ALL rejected enzymes including all ** isoschizomers */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# No. of cutting enzymes which do not match the\n" "# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); ajFmtPrintF(outfile, "%d\n", rejected_count); ajDebug("Tidy up\n"); ajListstrFreeData(&nocutlist); ajListstrFreeData(&cutlist); return; }
/* @funcstatic sigscanlig_score_ligands_site ********************************* ** ** Writes score data for ligands (Lighit objects) from individual hits to ** signatures (Hit objects). Site score method is used. List of hit objects ** must be sorted by ligand type and site number. ** ** @param [r] hits [const AjPList] List of hit objects. ** @return [AjBool] True on success ** @@ ******************************************************************************/ AjPList sigscanlig_score_ligands_site(AjPList hits) { AjPList ret = NULL; AjIList iter = NULL; /* Iterator. */ EmbPHit hit = NULL; AjPStr prev_ligand = NULL; SigPLighit lighit = NULL; ajint prevsn = -1; /* Previous site number */ float score = 0.0; /* Score for current ligand */ ajint nhits = 0; /* No. of hits (patches) for current ligand. */ ajint nsites = 0; /* No. of sites for current ligand */ float score_site = 0.0; /* Score for this site. */ ajint patch_cnt = 0; /* No. of patches in current site. */ ret = ajListNew(); prev_ligand = ajStrNew(); iter = ajListIterNew(hits); /* Hits are already sorted by ligid & site number */ while((hit = (EmbPHit) ajListIterGet(iter))) { /* ajFmtPrint("Current hit: %S (ligid: %S, sn: %d) score: %f\n", hit->Acc, hit->Sig->Ligid, hit->Sig->sn, hit->Score); */ /* New site */ if(hit->Sig->sn != prevsn) { if(patch_cnt) score_site /= patch_cnt; score += score_site; patch_cnt = 0; score_site = 0; } /* New ligand */ if((!ajStrMatchS(hit->Sig->Ligid, prev_ligand))) { if(nsites) score /= nsites; if(lighit) { /* lighit->ns = snarr_siz; */ lighit->ns = nsites; lighit->np = nhits; lighit->score = score; ajStrAssignS(&lighit->ligid, prev_ligand); ajListPushAppend(ret, lighit); } lighit = sigscanlig_LighitNew(); nsites = 0; nhits=0; prevsn = -1; score = 0; } /* Increment count of sites and hits/patches (for current ligand) and patches (for current site) */ if(hit->Sig->sn != prevsn) nsites++; score_site += hit->Score; nhits++; patch_cnt++; ajStrAssignS(&prev_ligand, hit->Sig->Ligid); prevsn = hit->Sig->sn; } /* Process last hit */ if(patch_cnt) score_site /= patch_cnt; score += score_site; if(nsites) score /= nsites; if(lighit) { /* lighit->ns = snarr_siz; */ lighit->ns = nsites; lighit->np = nhits; lighit->score = score; ajStrAssignS(&lighit->ligid, prev_ligand); ajListPushAppend(ret, lighit); } ajListSort(ret, sigscanlig_MatchinvScore); ajListIterDel(&iter); ajStrDel(&prev_ligand); return ret; }
AjPList sigscanlig_score_ligands_patch(AjPList hits) { AjPList ret = NULL; AjIList iter = NULL; /* Iterator. */ EmbPHit hit = NULL; AjPStr prev_ligand = NULL; SigPLighit lighit = NULL; float score = 0.0; ajint nhits = 0; /* No. of hits (patches) for current ligand. */ ajint nsites = 0; /* No. of sites for current ligand. */ ajint prevsn = -1; /* Previous site number */ ret = ajListNew(); prev_ligand = ajStrNew(); iter = ajListIterNew(hits); while((hit = (EmbPHit) ajListIterGet(iter))) { /* New ligand */ if((!ajStrMatchS(hit->Sig->Ligid, prev_ligand))) { if(nhits) score /= nhits; if(lighit) { lighit->ns = nsites; lighit->np = nhits; lighit->score = score; ajStrAssignS(&lighit->ligid, prev_ligand); ajListPushAppend(ret, lighit); } lighit = sigscanlig_LighitNew(); nsites = 0; nhits=0; prevsn = -1; score = 0; } /* Increment count of sites and hits/patches (for current ligand) and patches (for current site) */ if(hit->Sig->sn != prevsn) nsites++; score+= hit->Score; nhits++; ajStrAssignS(&prev_ligand, hit->Sig->Ligid); prevsn = hit->Sig->sn; } /* Process last hit */ if(nhits) score /= nhits; if(lighit) { lighit->ns = nsites; lighit->np = nhits; lighit->score = score; ajStrAssignS(&lighit->ligid, prev_ligand); ajListPushAppend(ret, lighit); } ajListSort(ret, sigscanlig_MatchinvScore); ajListIterDel(&iter); ajStrDel(&prev_ligand); return ret; }
int main(int argc, char **argv) { AjPList sigin = NULL; /* Signature input file names. */ AjPStr signame = NULL; /* Name of signature file. */ AjPFile sigf = NULL; /* Signature input file. */ EmbPSignature sig = NULL; /* Signature. */ AjPList siglist = NULL; /* List of signatures. */ AjIList sigiter = NULL; /* Iterator for siglist. */ AjBool sigok = ajFalse; /* True if signature processed ok. */ EmbPHit hit = NULL; /* Hit to store signature-sequence match. */ AjPList hits = NULL; /* List of hits */ AjPList ligands = NULL; /* List of top-scoring ligands. */ AjPSeqall database=NULL; /* Protein sequences to match signature against. */ AjPSeq seq = NULL; /* Current sequence. */ AjPMatrixf sub =NULL; /* Residue substitution matrix. */ float gapo =0.0; /* Gap insertion penalty. */ float gape =0.0; /* Gap extension penalty. */ AjPStr nterm=NULL; /* Holds N-terminal matching options from acd. */ ajint ntermi=0; /* N-terminal option as int. */ AjPFile hitsf =NULL; /* Hits output file. sequence matches. */ AjPDirout hitsdir=NULL; /* Directory of hits files (output). */ AjPFile alignf =NULL; /* Alignment output file. */ AjPDirout aligndir=NULL; /* Directory of alignment files (output). */ AjPFile resultsf =NULL; /* Results file (output). */ AjPDirout resultsdir=NULL; /* Directory of results files (output). */ AjPStr mode = NULL; /* Mode, 1: Patch score mode, 2: Site score mode. */ ajint modei = 0; /* Selected mode as integer. */ SigPLighit lighit = NULL; embInitPV("sigscanlig", argc, argv, "SIGNATURE",VERSION); /* GET VALUES FROM ACD */ sigin = ajAcdGetDirlist("siginfilesdir"); database = ajAcdGetSeqall("dbseqall"); sub = ajAcdGetMatrixf("sub"); gapo = ajAcdGetFloat("gapo"); gape = ajAcdGetFloat("gape"); nterm = ajAcdGetListSingle("nterm"); hitsdir = ajAcdGetOutdir("hitsoutdir"); aligndir = ajAcdGetOutdir("alignoutdir"); resultsdir = ajAcdGetOutdir("resultsoutdir"); mode = ajAcdGetListSingle("mode"); /*Assign N-terminal matching option etc. */ ajFmtScanS(nterm, "%d", &ntermi); modei = (ajint) ajStrGetCharFirst(mode)-48; /* READ & PROCESS SIGNATURES */ siglist = ajListNew(); while(ajListPop(sigin, (void **) &signame)) { /* Read signature files, compile signatures and populate list. */ sigok = ajFalse; if((sigf = ajFileNewInNameS(signame))) if((sig = embSignatureReadNew(sigf))) if(embSignatureCompile(&sig, gapo, gape, sub)) { sigok=ajTrue; ajListPushAppend(siglist, sig); /* ajFmtPrint("Id: %S\nDomid: %S\nLigid: %S\nns: %d\n" "sn: %d\nnp: %d\npn: %d\nminpatch: %d\n" "maxgap: %d\n", sig->Id, sig->Domid, sig->Ligid, sig->ns, sig->sn, sig->np, sig->pn, sig->minpatch, sig->maxgap); */ } if(!sigok) { ajWarn("Could not process %S", signame); embSignatureDel(&sig); ajFileClose(&sigf); ajStrDel(&signame); continue; } ajFileClose(&sigf); ajStrDel(&signame); } ajListFree(&sigin); /* ALIGN EACH QUERY SEQUENCE TO LIST OF SIGNATURE */ while(ajSeqallNext(database, &seq)) { /* Do sequence-signature alignment and save results */ hits = ajListNew(); sigiter = ajListIterNew(siglist); while((sig = (EmbPSignature) ajListIterGet(sigiter))) { if(embSignatureAlignSeq(sig, seq, &hit, ntermi)) { hit->Sig = sig; ajListPushAppend(hits, hit); hit=NULL; /* To force reallocation by embSignatureAlignSeq */ } /* There has to be a hit for each signature for correct generation of the LHF by sigscanlig_WriteFasta. So push an empty hit if necessary. 'hit'=NULL forces reallocation by embSignatureAlignSeq. */ /* else { hit = embHitNew(); ajListPushAppend(hits, hit); hit=NULL; } */ } ajListIterDel(&sigiter); /* Rank-order the list of hits by score */ ajListSort(hits, embMatchinvScore); /* Write ligand hits & alignment files (output) */ hitsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), hitsdir); alignf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), aligndir); resultsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), resultsdir); /* if((!sigscanlig_WriteFasta(hitsf, siglist, hits))) ajFatal("Bad args to sigscanlig_WriteFasta"); */ if((!sigscanlig_WriteFasta(hitsf, hits))) ajFatal("Bad args to sigscanlig_WriteFasta"); if((!sigscanlig_SignatureAlignWriteBlock(alignf, hits))) ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); /* if((!sigscanlig_SignatureAlignWriteBlock(alignf, siglist, hits))) ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); */ /* Sort list of hits by ligand type and site number. Process list of ligands and print out. */ ajListSortTwo(hits, embMatchLigid, embMatchSN); if(modei==1) ligands = sigscanlig_score_ligands_patch(hits); else if(modei==2) ligands = sigscanlig_score_ligands_site(hits); else ajFatal("Unrecognised mode"); sigscanlig_WriteResults(ligands, resultsf); ajFileClose(&hitsf); ajFileClose(&alignf); ajFileClose(&resultsf); /* Memory management */ while(ajListPop(hits, (void **) &hit)) embHitDel(&hit); ajListFree(&hits); while(ajListPop(ligands, (void **) &lighit)) sigscanlig_LigHitDel(&lighit); ajListFree(&ligands); } /* MEMORY MANAGEMENT */ while(ajListPop(siglist, (void **) &sig)) embSignatureDel(&sig); ajListFree(&siglist); ajSeqallDel(&database); ajMatrixfDel(&sub); ajStrDel(&nterm); ajDiroutDel(&hitsdir); ajDiroutDel(&aligndir); ajDiroutDel(&resultsdir); ajStrDel(&mode); embExit(); return 0; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajint blastv = 0; char dbtype = '\0'; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPStr version = NULL; AjPStr seqtype = NULL; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjBool usesrc = AJTRUE; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPStr idformat = NULL; EmbPEntry entry; PBlastDb db = NULL; ajuint idCount = 0; ajuint idDone; AjPList listTestFiles = NULL; void ** testFiles = NULL; ajuint nfiles; ajuint ifile; ajuint jfile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i = 0; embInit("dbiblast", argc, argv); idformat = ajStrNewC("NCBI"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); version = ajAcdGetListSingle("blastversion"); seqtype = ajAcdGetListSingle("seqtype"); usesrc = ajAcdGetBoolean("sourcefile"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint) maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); if(ajUtilGetBigendian()) readReverse = ajFalse; else readReverse = ajTrue; ajStrToInt(version, &blastv); dbtype = ajStrGetCharFirst(seqtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listTestFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listTestFiles, ajStrVcmp); nfiles = ajListToarray(listTestFiles, &testFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) testFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ jfile = 0; for(ifile=0; ifile < nfiles; ifile++) { curfilename = (AjPStr) testFiles[ifile]; if(!dbiblast_blastopenlib(curfilename, usesrc, blastv, dbtype, &db)) continue; /* could be the wrong file type with "*.*" */ ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%S' ...\n", db->TFile->Name); ajStrAssignS(&divfiles[jfile], db->TFile->Name); ajFilenameTrimPath(&divfiles[jfile]); if(ajStrGetLen(divfiles[jfile]) >= maxfilelen) maxfilelen = ajStrGetLen(divfiles[jfile]) + 1; if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, jfile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbiblast_nextblastentry(db, jfile, idformat, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ { embDbiMemEntry(idlist, fieldList, nfields, entry, jfile); } } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); /* lost the entry, so can't free it :-) */ } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); dbiblast_dbfree(&db); jfile++; } nfiles = jfile; /* ** write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* ** Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* ** Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajListMap(idlist, embDbiEntryDelMap, NULL); ajListFree(&idlist); AJFREE(entryIds); ajStrDelarray(&fields); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); ajStrDel(&version); ajStrDel(&seqtype); ajFileClose(&elistfile); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(countField); AJFREE(fieldTot); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&idformat); ajStrDel(&tmpfname); AJFREE(maxFieldLen); ajFileClose(&logfile); ajListstrFreeData(&listTestFiles); ajStrDel(&t); ajStrDel(&id); ajStrDel(&acc); ajStrDel(&hline); ajStrDel(&tmpdes); ajStrDel(&tmpfd); ajStrDel(&tmpgi); ajStrDel(&tmpdb); ajStrDel(&tmpac); ajStrDel(&tmpsv); ajRegFree(&wrdexp); embDbiEntryDel(&dbiblastEntry); if(fdl) { for(i=0; i < nfields; i++) ajListFree(&fdl[i]); AJFREE(fdl); } for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(testFiles); embExit(); return 0; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPFile libr=NULL; AjPStr idformat = NULL; EmbPEntry entry; ajuint idtype = 0; ajuint idCount = 0; ajuint idDone; AjPList listInputFiles = NULL; void ** inputFiles = NULL; ajuint nfiles; ajuint ifile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; AjPRegexp regIdExp = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i; embInit("dbifasta", argc, argv); idformat = ajAcdGetListSingle("idformat"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint)maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); regIdExp = dbifasta_getExpr(idformat, &idtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listInputFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listInputFiles, &ajStrVcmp); nfiles = (ajuint) ajListToarray(listInputFiles, &inputFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) inputFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ for(ifile=0; ifile < nfiles; ifile++) { ajStrAssignS(&curfilename,(AjPStr) inputFiles[ifile]); embDbiFlatOpenlib(curfilename, &libr); ajFilenameTrimPath(&curfilename); if(ajStrGetLen(curfilename) >= maxfilelen) maxfilelen = ajStrGetLen(curfilename) + 1; ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%F' ...\n", libr); ajStrAssignS(&divfiles[ifile], curfilename); if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, ifile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbifasta_NextFlatEntry(libr, ifile, regIdExp, idtype, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ embDbiMemEntry(idlist, fieldList, nfields, entry, ifile); entry = NULL; } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); AJFREE(entry); } else { embDbiEntryDel(&dbifastaGEntry); } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); } /* write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajStrDel(&idformat); ajStrDelarray(&fields); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&tmpfname); ajFileClose(&libr); ajFileClose(&logfile); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], &embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); AJFREE(maxFieldLen); AJFREE(countField); AJFREE(fieldTot); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(inputFiles); embDbiEntryDel(&dbifastaGEntry); ajStrDel(&dbifastaGRline); ajStrDel(&dbifastaGTmpId); if(dbifastaGFdl) { for(i=0; i < nfields; i++) ajListFree(&dbifastaGFdl[i]); AJFREE(dbifastaGFdl); } ajListMap(idlist, &embDbiEntryDelMap, NULL); ajListFree(&idlist); ajListstrFreeData(&listInputFiles); AJFREE(entryIds); ajRegFree(&dbifastaGIdexp); ajRegFree(&dbifastaGWrdexp); ajRegFree(®IdExp); ajStrDel(&dbifastaGTmpAc); ajStrDel(&dbifastaGTmpSv); ajStrDel(&dbifastaGTmpGi); ajStrDel(&dbifastaGTmpDb); ajStrDel(&dbifastaGTmpDes); ajStrDel(&dbifastaGTmpFd); ajStrDel(&curfilename); embExit(); return 0; }
/* @prog ssematch *********************************************************** ** ** Searches a DCF file (domain classification file) for secondary structure ** matches. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variables declarations */ AjPFile dcfin = NULL; /* Domain classification file */ AjPFile ssin = NULL; /* Secondary structure input file*/ AjPMatrixf matrix = NULL; /* Substitution matrix */ AjPFile out_ss = NULL; /* For ss top matches*/ AjPFile out_se = NULL; /* For se top matches*/ AjPFile outfile = NULL; /* Output file*/ AjPFile logf = NULL; /* Log file */ float gapopen_sss = 0.0; /* Gap insertion penalty */ float gapopen_sse = 0.0; float gapopen = 0.0; float gapextend_sss = 0.0; /* Gap extension penalty */ float gapextend_sse = 0.0; float gapextend = 0.0; ajint max_hits = 0; /* number of top alignments to display*/ ajint mode = 0; ajint x = 0; AjPScop temp_scop = NULL; /* scop object pointer*/ AjPList scop_list = NULL; /* list of scop objects for entire domain classification file */ AjIList iter = NULL; AjPStr msg = NULL; /* Pointer to String used for messages */ AjPStr line = NULL; AjPStr qse = NULL; /* query secondary structure elements*/ AjPStr qss = NULL; /* query secondary structure (by residue)*/ AjPSeq q3se = NULL; /* query secondary structure elements, 3-letter code*/ AjPSeq q3ss = NULL; /* query secondary structure (by residue), 3-letter code*/ AjPSeq query = NULL; /* Read data from acd */ embInitPV("ssematch",argc,argv,"DOMAINATRIX",VERSION); dcfin = ajAcdGetInfile("dcfinfile"); ssin = ajAcdGetInfile("ssinfile"); max_hits = ajAcdGetInt("maxhits"); matrix = ajAcdGetMatrixf("datafile"); gapopen_sss = ajAcdGetFloat("rgapopen"); gapextend_sss = ajAcdGetFloat("rgapextend"); gapopen_sse = ajAcdGetFloat("egapopen"); gapextend_sse = ajAcdGetFloat("egapextend"); out_ss = ajAcdGetOutfile("outssfile"); out_se = ajAcdGetOutfile("outsefile"); logf = ajAcdGetOutfile("logfile"); /* Create list of scop objects for entire input domain classification file. */ scop_list = ajListNew(); while((temp_scop = (ajScopReadCNew(dcfin, "*")))) ajListPushAppend(scop_list,temp_scop); /* Error handing if domain classification file was empty. */ if(!(ajListGetLength(scop_list))) { ajWarn("Empty list from scop input file\n"); ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajListIterDel(&iter); ajExit(); return 1; } /* Error handling in case of empty query file. */ if(ssin == NULL) { ajWarn("Empty secondary structure query file\n"); ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajListIterDel(&iter); ajExit(); return 1; } /* Assign sequences in query file to sequence objects. */ qse = ajStrNew(); qss = ajStrNew(); while(ajReadlineTrim(ssin,&line)) { /* SE string */ if(ajStrPrefixC(line,"SE")) { ajFmtScanS(line, "%*s %S", &qse); /* Convert this string to 3-letter code & then convert to AjPSeq object. */ q3se = ssematch_convertbases(qse); } /* SS string */ else if(ajStrPrefixC(line,"SS")) { while((ajReadlineTrim(ssin,&line)) && !ajStrPrefixC(line,"XX")) ajStrAppendS(&qss,line); ajStrRemoveWhite(&qss); /* Convert this string to 3-letter code & then to AjPSeq object. */ q3ss = ssematch_convertbases(qss); } } /* For se & then for ss, modes 0 & 1. */ for(mode = 0; mode <= 1; mode++) { /* Assign arguments for alignment function. */ if (mode == 0) { query = q3se; gapopen = gapopen_sse; gapextend = gapextend_sse; outfile = out_se; } else if(mode == 1) { query = q3ss; gapopen = gapopen_sss; gapextend = gapextend_sss; outfile = out_ss; } /* Iterate through list of scop objects & calculate alignment scores. */ iter=ajListIterNew(scop_list); while((temp_scop=(AjPScop)ajListIterGet(iter))) { /* The function extracts the se (mode 0) or ss (mode 1) subject sequences from the scop object, performs a Needleman-Wunsch global alignment with the query sequence & allocates the score to the Score element of the scop object*/ if(!(ssematch_NWScore(temp_scop , query, mode, matrix, gapopen, gapextend))) { ajFmtPrintF(logf, "%-15s\n", "ALIGNMENT"); ajFmtPrintF(logf, "Could not align sequence in scop domain %S\n ", temp_scop->Entry); ajFmtPrintS(&msg, "Could not align sequence in scop domain %S\n ", temp_scop->Entry); ajWarn(ajStrGetPtr(msg)); continue; } } ajListIterDel(&iter); temp_scop = NULL; /* Sort list of Scop objects by Score */ ajListSort(scop_list, ssematch_CompScoreInv); iter=ajListIterNew(scop_list); /* Write top-scoring hits to outfile. */ for(x=0; x < max_hits; x++ ) { temp_scop=(AjPScop)ajListIterGet(iter); /* Print score to output file. */ ajFmtPrintF(outfile, "XX ALIGNMENT SCORE %.3f\nXX\n", temp_scop->Score); /* Could also write alignment - later modification. */ if(!ajScopWrite(outfile, temp_scop)) ajFatal("Could not write output file %S\n", outfile); } ajListIterDel(&iter); temp_scop = NULL; } /* Memoryt management. */ ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajStrDel(&msg); ajStrDel(&line); ajStrDel(&qse); ajStrDel(&qss); ajSeqDel(&q3se); ajSeqDel(&q3ss); ajExit(); return 0; }
int main(int argc, char **argv) { AjPFile outf = NULL; AjPSeq sequence = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr revstr = NULL; AjPStr p1; AjPStr p2; PPrimer eric = NULL; PPrimer fred = NULL; PPrimer f; PPrimer r; PPair pair; AjPList forlist = NULL; AjPList revlist = NULL; AjPList pairlist = NULL; AjBool targetrange; AjBool isDNA = ajTrue; AjBool dolist = ajFalse; ajint primerlen = 0; ajint minprimerlen = 0; ajint maxprimerlen = 0; ajint minprodlen = 0; ajint maxprodlen = 0; ajint prodlen = 0; ajint seqlen = 0; ajint stepping_value = 1; ajint targetstart = 0; ajint targetend = 0; ajint limit = 0; ajint limit2 = 0; ajint lastpos = 0; ajint startpos = 0; ajint endpos = 0; ajint begin; ajint end; ajint v1; ajint v2; ajint overlap; float minpmGCcont = 0.; float maxpmGCcont = 0.; float minprodGCcont = 0.; float maxprodGCcont = 0.; float prodTm; float prodGC; ajint i; ajint j; ajint neric=0; ajint nfred=0; ajint npair=0; float minprimerTm = 0.0; float maxprimerTm = 0.0; float saltconc = 0.0; float dnaconc = 0.0; embInit ("prima", argc, argv); substr = ajStrNew(); forlist = ajListNew(); revlist = ajListNew(); pairlist = ajListNew(); p1 = ajStrNew(); p2 = ajStrNew(); sequence = ajAcdGetSeq("sequence"); outf = ajAcdGetOutfile("outfile"); minprimerlen = ajAcdGetInt("minprimerlen"); maxprimerlen = ajAcdGetInt("maxprimerlen"); minpmGCcont = ajAcdGetFloat("minpmGCcont"); maxpmGCcont = ajAcdGetFloat("maxpmGCcont"); minprimerTm = ajAcdGetFloat("mintmprimer"); maxprimerTm = ajAcdGetFloat("maxtmprimer"); minprodlen = ajAcdGetInt("minplen"); maxprodlen = ajAcdGetInt("maxplen"); minprodGCcont = ajAcdGetFloat("minpgccont"); maxprodGCcont = ajAcdGetFloat("maxpgccont"); saltconc = ajAcdGetFloat("saltconc"); dnaconc = ajAcdGetFloat("dnaconc"); targetrange = ajAcdGetToggle("targetrange"); targetstart = ajAcdGetInt("targetstart"); targetend = ajAcdGetInt("targetend"); overlap = ajAcdGetInt("overlap"); dolist = ajAcdGetBoolean("list"); seqstr = ajSeqGetSeqCopyS(sequence); ajStrFmtUpper(&seqstr); begin = ajSeqGetBegin(sequence); end = ajSeqGetEnd(sequence); seqlen = end-begin+1; ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),begin-1,end-1); revstr = ajStrNewC(ajStrGetPtr(substr)); ajSeqstrReverse(&revstr); AJCNEW0(entropy, seqlen); AJCNEW0(enthalpy, seqlen); AJCNEW0(energy, seqlen); /* Initialise Tm calculation arrays */ ajMeltTempSave(ajStrGetPtr(substr),0,seqlen,saltconc,dnaconc,1, &entropy, &enthalpy, &energy); ajFmtPrintF(outf, "\n\nINPUT SUMMARY\n"); ajFmtPrintF(outf, "*************\n\n"); if(targetrange) ajFmtPrintF (outf, "Prima of %s from positions %d to %d bps\n", ajSeqGetNameC(sequence),targetstart, targetend); else ajFmtPrintF(outf, "Prima of %s\n", ajSeqGetNameC(sequence)); ajFmtPrintF(outf, "PRIMER CONSTRAINTS:\n"); ajFmtPrintF (outf, "PRIMA DOES NOT ALLOW PRIMER SEQUENCE AMBIGUITY OR "); ajFmtPrintF(outf,"DUPLICATE PRIMER ENDPOINTS\n"); ajFmtPrintF(outf, "Primer size range is %d-%d\n",minprimerlen,maxprimerlen); ajFmtPrintF(outf, "Primer GC content range is %.2f-%.2f\n",minpmGCcont, maxpmGCcont); ajFmtPrintF(outf,"Primer melting Temp range is %.2f - %.2f C\n", minprimerTm, maxprimerTm); ajFmtPrintF (outf, "PRODUCT CONSTRAINTS:\n"); ajFmtPrintF(outf,"Product GC content range is %.2f-%.2f\n", minprodGCcont, maxprodGCcont); ajFmtPrintF(outf, "Salt concentration is %.2f (mM)\n", saltconc); ajFmtPrintF(outf, "DNA concentration is %.2f (nM)\n", dnaconc); if(targetrange) ajFmtPrintF(outf, "Targeted range to amplify is from %d to %d\n", targetstart,targetend); else { ajFmtPrintF(outf,"Considering all suitable Primer pairs with "); ajFmtPrintF(outf,"Product length ranges %d to %d\n\n\n", minprodlen, maxprodlen); } ajFmtPrintF(outf, "\n\nPRIMER/PRODUCT PAIR CALCULATIONS & OUTPUT\n"); ajFmtPrintF(outf, "*****************************************\n\n"); if(seqlen-minprimerlen < 0) ajFatal("Sequence too short"); if(targetrange) { ajStrAssignSubC(&p1,ajStrGetPtr(substr),targetstart-begin,targetend-begin); prodGC = ajMeltGC(substr,seqlen); prodTm = ajMeltTempProd(prodGC,saltconc,seqlen); if(prodGC<minprodGCcont || prodGC>maxprodGCcont) { ajFmtPrintF(outf, "Product GC content [%.2f] outside acceptable range\n", prodGC); embExitBad(); return 0; } prima_testtarget(substr, revstr, targetstart-begin, targetend-begin, minprimerlen, maxprimerlen, seqlen, minprimerTm, maxprimerTm, minpmGCcont, maxpmGCcont, minprodGCcont, maxprodGCcont, saltconc, dnaconc, pairlist, &npair); } if(!targetrange) { limit = seqlen-minprimerlen-minprodlen+1; lastpos = seqlen-minprodlen; limit2 = maxprodlen-minprodlen; /* Outer loop selects all possible product start points */ for(i=minprimerlen; i<limit; ++i) { startpos = i; ajDebug("Position in sequence %d\n",startpos); endpos = i+minprodlen-1; /* Inner loop selects all possible product lengths */ for(j=0; j<limit2; ++j, ++endpos) { if(endpos>lastpos) break; v1 = endpos-startpos+1; ajStrAssignSubC(&p1,ajStrGetPtr(substr),startpos,endpos); prodGC = ajMeltGC(p1,v1); prodTm = ajMeltTempProd(prodGC,saltconc,v1); if(prodGC<minprodGCcont || prodGC>maxprodGCcont) continue; /* Only accept primers with acceptable Tm and GC */ neric = 0; nfred = 0; prima_testproduct(substr, startpos, endpos, primerlen, minprimerlen, maxprimerlen,minpmGCcont, maxpmGCcont, minprimerTm, maxprimerTm, minprodlen, maxprodlen, prodTm, prodGC, seqlen, &eric,&fred,forlist,revlist,&neric,&nfred, stepping_value, saltconc,dnaconc, isDNA, begin); if(!neric) continue; /* Now reject those primers with self-complementarity */ prima_reject_self(forlist,revlist,&neric,&nfred); if(!neric) continue; /* Reject any primers that could bind elsewhere in the sequence */ prima_test_multi(forlist,revlist,&neric,&nfred,substr,revstr, seqlen); /* Now select the least complementary pair (if any) */ prima_best_primer(forlist, revlist, &neric, &nfred); if(!neric) continue; AJNEW(pair); ajListPop(forlist,(void **)&f); ajListPop(revlist,(void **)&r); pair->f = f; pair->r = r; ++npair; ajListPush(pairlist,(void *)pair); } } } if(!targetrange) { /* Get rid of primer pairs nearby the top scoring ones */ prima_TwoSortscorepos(&pairlist); prima_prune_nearby(pairlist, &npair, maxprimerlen-1); ajListSort(pairlist,prima_PosCompare); prima_check_overlap(pairlist,&npair,overlap); } if(npair) { if(!targetrange) ajFmtPrintF(outf,"%d pairs found\n\n",npair); else ajFmtPrintF(outf, "Closest primer pair to specified product is:\n\n"); if((maxprimerlen<26 && seqlen<999999 && !dolist)) ajFmtPrintF(outf,"\n\t\tForward\t\t\t\t\tReverse\n\n"); } for(i=0;i<npair;++i) { if(!targetrange) ajFmtPrintF(outf,"[%d]\n",i+1); ajListPop(pairlist,(void **)&pair); prodlen = pair->r->start - (pair->f->start + pair->f->primerlen); if((maxprimerlen<26 && seqlen<999999 && !dolist)) { v1 = pair->f->start; v2 = v1 + pair->f->primerlen -1; ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf,"%6d %-25.25s %d\t", v1+begin, ajStrGetPtr(p1), v2+begin); v1 = pair->r->start; v2 = v1 + pair->r->primerlen -1; ajStrAssignSubS(&p2,substr,v1,v2); ajSeqstrReverse(&p2); ajFmtPrintF(outf, "%6d %-25.25s %d\n", v1+begin, ajStrGetPtr(p2), v2+begin); ajFmtPrintF(outf," Tm %.2f C (GC %.2f%%)\t\t ", pair->f->primerTm,pair->f->primGCcont*100.); ajFmtPrintF(outf,"Tm %.2f C (GC %.2f%%)\n", pair->r->primerTm,pair->r->primGCcont*100.); ajFmtPrintF(outf," Length: %-32dLength: %d\n", pair->f->primerlen,pair->r->primerlen); ajFmtPrintF(outf," Tma: %.2f C\t\t\t", ajAnneal(pair->f->primerTm,pair->f->prodTm)); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->r->primerTm,pair->f->prodTm)); ajFmtPrintF(outf," Product GC: %.2f%%\n", pair->f->prodGC * 100.0); ajFmtPrintF(outf," Product Tm: %.2f C\n", pair->f->prodTm); ajFmtPrintF(outf," Length: %d\n\n\n",prodlen); } else { ajFmtPrintF(outf," Product from %d to %d\n",pair->f->start+ pair->f->primerlen+begin,pair->r->start-1+begin); ajFmtPrintF(outf," Tm: %.2f C GC: %.2f%%\n", pair->f->prodTm,pair->f->prodGC*(float)100.); ajFmtPrintF(outf," Length: %d\n\n\n",prodlen); v1 = pair->f->start; v2 = v1 + pair->f->primerlen -1; ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf," Forward: 5' %s 3'\n",ajStrGetPtr(p1)); ajFmtPrintF(outf," Start: %d\n",v1+begin); ajFmtPrintF(outf," End: %d\n",v2+begin); ajFmtPrintF(outf," Tm: %.2f C\n", pair->f->primerTm); ajFmtPrintF(outf," GC: %.2f%%\n", pair->f->primGCcont*(float)100.); ajFmtPrintF(outf," Len: %d\n", pair->f->primerlen); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->f->primerTm,pair->f->prodTm)); v1 = pair->r->start; v2 = v1 + pair->r->primerlen -1; ajStrAssignSubS(&p2,substr,v1,v2); ajSeqstrReverse(&p2); ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf," Reverse: 5' %s 3'\n",ajStrGetPtr(p1)); ajFmtPrintF(outf," Start: %d\n",v1+begin); ajFmtPrintF(outf," End: %d\n",v2+begin); ajFmtPrintF(outf," Tm: %.2f C\n", pair->r->primerTm); ajFmtPrintF(outf," GC: %.2f%%\n", pair->r->primGCcont*(float)100.); ajFmtPrintF(outf," Len: %d\n", pair->r->primerlen); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->r->primerTm,pair->f->prodTm)); } prima_PrimerDel(&pair->f); prima_PrimerDel(&pair->r); AJFREE(pair); } ajStrDel(&seqstr); ajStrDel(&revstr); ajStrDel(&substr); ajStrDel(&p1); ajStrDel(&p2); ajListFree(&forlist); ajListFree(&revlist); ajListFree(&pairlist); ajFileClose(&outf); ajSeqDel(&sequence); AJFREE(entropy); AJFREE(enthalpy); AJFREE(energy); embExit(); return 0; }
void embPropCalcFragments(const char *s, ajint n, AjPList *l, AjPList *pa, AjBool unfavoured, AjBool overlap, AjBool allpartials, ajint *ncomp, ajint *npart, AjPStr *rname, AjBool nterm, AjBool cterm, AjBool dorag, EmbPPropMolwt const *mwdata, AjBool mono) { static const char *PROPENZReagent[]= { "Trypsin","Lys-C","Arg-C","Asp-N","V8-bicarb","V8-phosph", "Chymotrypsin","CNBr" }; static const char *PROPENZSite[]= { "KR","K","R","D","E","DE","FYWLM","M" }; static const char *PROPENZAminoCarboxyl[]= { "CC","C","C","N","C","CC","CCCCC","C" }; static const char *PROPENZUnfavoured[]= { "KRIFLP","P","P","","KREP","P","P","" }; ajint i; ajint j; ajint lim; ajint len; AjPList t; EmbPPropFrag fr; ajint *begsa = NULL; ajint *endsa = NULL; double molwt; double *molwtsa = NULL; AjBool *afrag = NULL; ajint mark; ajint bwp; ajint ewp; ajint *ival; ajint defcnt; ajint it; ajint st = 0; ajint mt = 0; ajint et = 0; ajStrAssignC(rname,PROPENZReagent[n]); defcnt = 0; len = (ajint) strlen(s); t = ajListNew(); /* Temporary list */ /* First get all potential cut points */ for(i=0;i<len;++i) { if(!strchr(PROPENZSite[n],s[i])) continue; if(len==i+1) continue; if(strchr(PROPENZUnfavoured[n],s[i+1]) && !unfavoured) continue; AJNEW0(ival); *ival = i; ajListPushAppend(t,(void *)ival); ++defcnt; } if(defcnt) { AJCNEW(begsa,(defcnt+1)); AJCNEW(endsa,(defcnt+1)); AJCNEW(molwtsa,(defcnt+1)); AJCNEW(afrag,(defcnt+1)); } for(i=0;i<defcnt;++i) /* Pop them into a temporary array */ { ajListPop(t,(void **)&ival); endsa[i] = *ival; AJFREE(ival); } mark = 0; for(i=0;i<defcnt;++i) /* Work out true starts, ends and molwts */ { bwp = mark; ewp = endsa[i]; if(strchr(PROPENZAminoCarboxyl[n],'N')) --ewp; molwt=embPropCalcMolwt(s,bwp,ewp,mwdata,mono); if(n==PROPENZCNBR) molwt -= (17.045 + 31.095); begsa[i] = mark; endsa[i] = ewp; molwtsa[i] = molwt; afrag[i] = ajFalse; mark = ewp+1; } if(defcnt) /* Special treatment for last fragment */ { molwt = embPropCalcMolwt(s,mark,len-1,mwdata,mono); if(n==PROPENZCNBR) molwt -= (17.045 + 31.095); begsa[i] = mark; endsa[i] = len-1; molwtsa[i] = molwt; afrag[i] = ajFalse; ++defcnt; } /* Push the hits */ for(i=0;i<defcnt;++i) { if(dorag) { st = begsa[i]; et = endsa[i]; for(it=st+RAG_MINPEPLEN-1; it < et; ++it) { AJNEW0(fr); fr->start = st; fr->end = it; fr->molwt = embPropCalcMolwt(s,st,it,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } AJNEW0(fr); fr->start = begsa[i]; fr->end = endsa[i]; fr->molwt = molwtsa[i]; fr->isfrag = afrag[i]; ajListPush(*l,(void *) fr); if(dorag && nterm) for(it=st+1; it < et-RAG_MINPEPLEN+2; ++it) { AJNEW0(fr); fr->start = it; fr->end = et; fr->molwt = embPropCalcMolwt(s,it,et,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } if(!dorag) ajListSort(*l, &propFragCompare); *ncomp = defcnt; /* Now deal with overlaps */ *npart = 0; lim = defcnt -1; if(overlap && !allpartials) { for(i=0;i<lim;++i) { if(dorag) { st = begsa[i]; mt = endsa[i]; et = endsa[i+1]; if(cterm) for(it=mt+1; it < et; ++it) { AJNEW0(fr); fr->start = st; fr->end = it; fr->molwt = embPropCalcMolwt(s,st,it,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } AJNEW0(fr); fr->isfrag = ajTrue; fr->molwt = embPropCalcMolwt(s,begsa[i],endsa[i+1],mwdata,mono); if(n==PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->start = begsa[i]; fr->end = endsa[i+1]; ajListPush(*pa,(void *)fr); ++(*npart); if(dorag && nterm) for(it=st+1; it<mt; ++it) { AJNEW0(fr); fr->start = it; fr->end = et; fr->molwt = embPropCalcMolwt(s,it,et,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } if(*npart) /* Remove complete sequence */ { --(*npart); ajListPop(*pa,(void **)&fr); } if(!dorag) ajListSort(*pa, &propFragCompare); } if(allpartials) { lim = defcnt; for(i=0;i<lim;++i) for(j=i+1;j<lim;++j) { AJNEW0(fr); fr->isfrag = ajTrue; fr->molwt = embPropCalcMolwt(s,begsa[i],endsa[j],mwdata,mono); if(n==PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->start = begsa[i]; fr->end = endsa[j]; ajListPush(*pa,(void *)fr); ++(*npart); } if(*npart) /* Remove complete sequence */ { --(*npart); ajListPop(*pa,(void **)&fr); } if(!dorag) ajListSort(*pa, &propFragCompare); } if(defcnt) { AJFREE(molwtsa); AJFREE(endsa); AJFREE(begsa); AJFREE(afrag); } ajListFree(&t); return; }
/* @prog domainreso *********************************************************** ** ** Removes low resolution domains from a DCF file (domain ** classification file). ** ******************************************************************************/ int main(ajint argc, char **argv) { AjPList cpdb_path = NULL; /* Location of coordinate files for input */ AjPStr cpdb_name = NULL; /* Name of coordinate file */ AjPStr temp = NULL; /* temp string */ AjPStr temp2 = NULL; /* temp string */ AjPList entry = NULL; /* List of pdb codes with resolution */ /* ABOVE the threshold */ AjPStr *entryarr = NULL; /* entry as an array */ AjPFile fptr_cpdb = NULL; /* Pointer to current coordinate file */ AjPFile dcfin = NULL; /* DCF input file */ AjPFile dcfout = NULL; /* DCF output file */ AjPPdb pdb = NULL; /* Pdb object pointer */ AjPDomain domain = NULL; /* Domain structure */ float threshold = 0.0; /* Resolution threshold */ ajint num = 0; /* number of nodes in list */ ajint type = 0; /* Type of domain (ajSCOP or ajCATH) in the DCF file */ /* Read data from acd */ embInitPV("domainreso",argc,argv,"DOMAINATRIX",VERSION); cpdb_path = ajAcdGetDirlist("cpdbpath"); threshold = ajAcdGetFloat("threshold"); dcfin = ajAcdGetInfile("dcfinfile"); dcfout = ajAcdGetOutfile("dcfoutfile"); /* Allocate strings etc. */ cpdb_name = ajStrNew(); temp = ajStrNew(); /* Create list . */ entry = ajListNew(); /* Create list of files in CPDB directory. */ /* Determine number of nodes on list */ num = ajListGetLength(cpdb_path); /* domainreso reads a directory of clean coordinate files file, creates a list of the files, then reads every list entry and extracts the resolution of the structure. If the value is less than a threshold (user defined) then the domain identifier is pushed onto a list. The DCF file (domain classification file) is then read and domain identifiers compared to those on the list, if found then the domain structure data is written the new DCF file. */ type = ajDomainDCFType(dcfin); /* Start of main application loop */ /* Produce list of pdb codes with resolution */ /* ABOVE the threshold. */ while(ajListPop(cpdb_path,(void **)&temp)) { /* Open coordinate file. */ if((fptr_cpdb=ajFileNewInNameS(temp))==NULL) { ajWarn("Could not open cpdb file"); ajStrDel(&temp); continue; } ajFmtPrint("%S\n", temp); fflush(stdout); /* Read coordinate data file. */ pdb = ajPdbReadFirstModelNew(fptr_cpdb); /* Check if resolution is above threshold. */ if(pdb->Reso > threshold) { /* assign ID to list. */ temp2=ajStrNew(); ajStrAssignS(&temp2, pdb->Pdb); ajListPush(entry, (AjPStr) temp2); } /* Close coordinate file and tidy up*/ ajPdbDel(&pdb); ajFileClose(&fptr_cpdb); ajStrDel(&temp); } num = ajListGetLength(entry); /* Sort the list of pdb codes & convert to an array. */ ajListSort(entry, domainreso_StrComp); ajListToarray(entry, (void ***)&entryarr); /* Read DCF file and compare IDs to those in list if not present then write domain structure data to output. . */ while((domain=(ajDomainReadCNew(dcfin, "*", type)))) { /* DOMAIN id not found in the list of domains with resolution above the threshold, so include it in the output file. */ if((domainreso_StrBinSearchDomain(ajDomainGetId(domain), entryarr, num))==-1) ajDomainWrite(dcfout, domain); /* Delete domain structure. */ ajDomainDel(&domain); } /* Tidy up. */ ajStrDel(&temp2); ajStrDel(&cpdb_name); ajFileClose(&dcfout); ajFileClose(&dcfin); ajListFree(&cpdb_path); ajListFree(&entry); AJFREE(entryarr); /* Return. */ ajExit(); return 0; }