static void acdrelations_readtypefile (AjPFile inf, PKtype *T) { AjPStr line = NULL; PKtypedat dattmp = NULL; AjPList datlist = NULL; if(!T) ajFatal("Null arg error 1 in acdrelations_readtypefile"); if(!inf) ajFatal("Null arg error 3 in acdrelations_readtypefile"); /* Allocate memory */ line = ajStrNew(); datlist = ajListNew(); /* Read data from file */ while(ajReadline(inf,&line)) { /* Discard comment lines */ if(ajStrPrefixC(line,"#")) continue; /* Create object for holding line */ dattmp = ajKtypedatNew(); /* Tokenise line delimited by '|' Parse first token (value of knowntype: attribute) */ ajStrAssignS(&dattmp->ktype, ajStrParseC(line, "|")); ajStrRemoveSetC(&dattmp->ktype, "_"); ajStrRemoveWhite(&dattmp->ktype); /* Parse second token (ACD datatype) */ ajStrAssignS(&dattmp->acdtype, ajStrParseC(NULL, "|")); /* Parse third token (EDAM relations: value ) */ ajStrAssignS(&dattmp->edam, ajStrParseC(NULL, "|")); /* Push line onto list */ ajListPushAppend(datlist, dattmp); } /* Write PKtype structure */ ((*T)->n) = ajListToarray(datlist, (void***) &((*T)->dat)); /* Free memory */ ajStrDel(&line); ajListFree(&datlist); return; }
/* @funcstatic sigscanlig_WriteFastaHit *************************************** ** ** Write a Hit from a Hitlist object to an output file in embl-like format ** (see documentation for the DOMAINATRIX "seqsearch" application). ** Text for Class, Fold, Superfamily and Family is only written if the text ** is available. ** ** @param [u] outf [AjPFile] Output file stream ** @param [r] hits [const AjPList] List of hit objects. ** @param [r] n [ajint] Number of hit to generate. ** @param [r] DOSEQ [AjBool] True if sequence is to be printed. ** ** @return [AjBool] True on success ** @@ ******************************************************************************/ AjBool sigscanlig_WriteFastaHit(AjPFile outf, AjPList hits, ajint n, AjBool DOSEQ) { /* AjBool sigscanlig_WriteFastaHit(AjPFile outf, AjPList siglist, AjPList hits, ajint n, AjBool DOSEQ) */ EmbPHit hit = NULL; EmbPSignature sig = NULL; /* EmbPSignature *sigarr = NULL; */ EmbPHit *hitarr = NULL; ajint sizarr=0; /* if(!outf || !siglist || !hits) return ajFalse; */ if(!outf || !hits) return ajFalse; /* sizarr = ajListToarray(siglist, (void ***) &sigarr); if(sizarr != ajListToarray(hits, (void ***) &hitarr)) ajFatal("Arrays are different sizes"); */ sizarr = ajListToarray(hits, (void ***) &hitarr); if(n>=sizarr) ajFatal("Requested hit out of range in sigscanlig_WriteFastaHit"); hit = hitarr[n]; sig = hitarr[n]->Sig; /* sig = sigarr[n]; */ ajFmtPrintF(outf, "> "); if(MAJSTRGETLEN(hit->Acc)) ajFmtPrintF(outf, "%S^", hit->Acc); else ajFmtPrintF(outf, ".^"); if(MAJSTRGETLEN(hit->Spr)) ajFmtPrintF(outf, "%S^", hit->Spr); else ajFmtPrintF(outf, ".^"); ajFmtPrintF(outf, "%d^%d^", hit->Start, hit->End); ajFmtPrintF(outf, "LIGAND^"); if(MAJSTRGETLEN(sig->Id)) ajFmtPrintF(outf, "%S^", sig->Id); else ajFmtPrintF(outf, ".^"); if(MAJSTRGETLEN(sig->Domid)) ajFmtPrintF(outf, "%S^", sig->Domid); else ajFmtPrintF(outf, ".^"); if(MAJSTRGETLEN(sig->Ligid)) ajFmtPrintF(outf, "%S^", sig->Ligid); else ajFmtPrintF(outf, ".^"); ajFmtPrintF(outf,"%d^", sig->sn); ajFmtPrintF(outf,"%d^", sig->ns); ajFmtPrintF(outf,"%d^", sig->pn); ajFmtPrintF(outf,"%d^", sig->np); if(sig->Typesig == aj1D) ajFmtPrintF(outf,"1D"); else if(sig->Typesig == aj3D) ajFmtPrintF(outf,"3D"); else ajFatal("Signature type unknown in sigscanlig_WriteFasta"); if(sig->np) ajFmtPrintF(outf,"P^"); else ajFmtPrintF(outf,"F^"); ajFmtPrintF(outf, "%.2f^", hit->Score); ajFmtPrintF(outf, "%.3e^", hit->Pval); ajFmtPrintF(outf, "%.3e", hit->Eval); if(DOSEQ) { ajFmtPrintF(outf, "\n"); ajFmtPrintF(outf, "%S\n", hit->Seq); } /* AJFREE(sigarr); */ AJFREE(hitarr); return ajTrue; }
AjBool sigscanlig_WriteFasta(AjPFile outf, AjPList hits) { ajint x = 0; EmbPHit hit = NULL; EmbPSignature sig = NULL; EmbPSignature *sigarr = NULL; EmbPHit *hitarr = NULL; ajint sizarr=0; /* if(!outf || !siglist || !hits) return ajFalse; */ if(!outf || !hits) return ajFalse; /* sizarr = ajListToarray(siglist, (void ***) &sigarr); if(sizarr != ajListToarray(hits, (void ***) &hitarr)) ajFatal("Arrays are different sizes"); */ sizarr = ajListToarray(hits, (void ***) &hitarr); for(x=0; x<sizarr; x++) { /* There has to be a hit for each signature for correct generation of the LHF by sigscanlig_WriteFasta. Therefore empty hits may have been pushed. Catch those here. */ /* if(!MAJSTRGETLEN(hit->Model)) continue; */ hit = hitarr[x]; /* sig = sigarr[x]; */ sig = hitarr[x]->Sig; ajFmtPrintF(outf, "> "); if(MAJSTRGETLEN(hit->Acc)) ajFmtPrintF(outf, "%S^", hit->Acc); else ajFmtPrintF(outf, ".^"); if(MAJSTRGETLEN(hit->Spr)) ajFmtPrintF(outf, "%S^", hit->Spr); else ajFmtPrintF(outf, ".^"); ajFmtPrintF(outf, "%d^%d^", hit->Start, hit->End); ajFmtPrintF(outf, "LIGAND^"); if(MAJSTRGETLEN(sig->Id)) ajFmtPrintF(outf, "%S^", sig->Id); else ajFmtPrintF(outf, ".^"); if(MAJSTRGETLEN(sig->Domid)) ajFmtPrintF(outf, "%S^", sig->Domid); else ajFmtPrintF(outf, ".^"); if(MAJSTRGETLEN(sig->Ligid)) ajFmtPrintF(outf, "%S^", sig->Ligid); else ajFmtPrintF(outf, ".^"); ajFmtPrintF(outf,"%d^", sig->sn); ajFmtPrintF(outf,"%d^", sig->ns); ajFmtPrintF(outf,"%d^", sig->pn); ajFmtPrintF(outf,"%d^", sig->np); if(sig->Typesig == aj1D) ajFmtPrintF(outf,"1D"); else if(sig->Typesig == aj3D) ajFmtPrintF(outf,"3D"); else ajFatal("Signature type unknown in sigscanlig_WriteFasta"); if(sig->np) ajFmtPrintF(outf,"P^"); else ajFmtPrintF(outf,"F^"); ajFmtPrintF(outf, "%.2f^", hit->Score); ajFmtPrintF(outf, "%.3e^", hit->Pval); ajFmtPrintF(outf, "%.3e", hit->Eval); ajFmtPrintF(outf, "\n"); ajFmtPrintF(outf, "%S\n", hit->Seq); } AJFREE(sigarr); AJFREE(hitarr); return ajTrue; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajint blastv = 0; char dbtype = '\0'; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPStr version = NULL; AjPStr seqtype = NULL; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjBool usesrc = AJTRUE; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPStr idformat = NULL; EmbPEntry entry; PBlastDb db = NULL; ajuint idCount = 0; ajuint idDone; AjPList listTestFiles = NULL; void ** testFiles = NULL; ajuint nfiles; ajuint ifile; ajuint jfile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i = 0; embInit("dbiblast", argc, argv); idformat = ajStrNewC("NCBI"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); version = ajAcdGetListSingle("blastversion"); seqtype = ajAcdGetListSingle("seqtype"); usesrc = ajAcdGetBoolean("sourcefile"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint) maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); if(ajUtilGetBigendian()) readReverse = ajFalse; else readReverse = ajTrue; ajStrToInt(version, &blastv); dbtype = ajStrGetCharFirst(seqtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listTestFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listTestFiles, ajStrVcmp); nfiles = ajListToarray(listTestFiles, &testFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) testFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ jfile = 0; for(ifile=0; ifile < nfiles; ifile++) { curfilename = (AjPStr) testFiles[ifile]; if(!dbiblast_blastopenlib(curfilename, usesrc, blastv, dbtype, &db)) continue; /* could be the wrong file type with "*.*" */ ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%S' ...\n", db->TFile->Name); ajStrAssignS(&divfiles[jfile], db->TFile->Name); ajFilenameTrimPath(&divfiles[jfile]); if(ajStrGetLen(divfiles[jfile]) >= maxfilelen) maxfilelen = ajStrGetLen(divfiles[jfile]) + 1; if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, jfile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbiblast_nextblastentry(db, jfile, idformat, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ { embDbiMemEntry(idlist, fieldList, nfields, entry, jfile); } } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); /* lost the entry, so can't free it :-) */ } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); dbiblast_dbfree(&db); jfile++; } nfiles = jfile; /* ** write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* ** Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* ** Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajListMap(idlist, embDbiEntryDelMap, NULL); ajListFree(&idlist); AJFREE(entryIds); ajStrDelarray(&fields); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); ajStrDel(&version); ajStrDel(&seqtype); ajFileClose(&elistfile); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(countField); AJFREE(fieldTot); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&idformat); ajStrDel(&tmpfname); AJFREE(maxFieldLen); ajFileClose(&logfile); ajListstrFreeData(&listTestFiles); ajStrDel(&t); ajStrDel(&id); ajStrDel(&acc); ajStrDel(&hline); ajStrDel(&tmpdes); ajStrDel(&tmpfd); ajStrDel(&tmpgi); ajStrDel(&tmpdb); ajStrDel(&tmpac); ajStrDel(&tmpsv); ajRegFree(&wrdexp); embDbiEntryDel(&dbiblastEntry); if(fdl) { for(i=0; i < nfields; i++) ajListFree(&fdl[i]); AJFREE(fdl); } for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(testFiles); embExit(); return 0; }
AjPMatrixf ajMatrixfNewFile(const AjPStr filename) { AjPMatrixf ret = NULL; AjPStr *orderstring = NULL; AjPStr buffer = NULL; AjPStr firststring = NULL; AjPStr reststring = NULL; const AjPStr tok = NULL; ajint len = 0; ajint i = 0; ajint l = 0; ajint k = 0; ajint cols = 0; ajint rows = 0; const char *ptr = NULL; AjPFile file = NULL; AjBool first = ajTrue; float **matrix = NULL; float *templine = NULL; float minval = -1.0; AjPList rlabel_list = NULL; AjPStr *rlabel_arr = NULL; #ifndef WIN32 static const char *delimstr = " :\t\n"; #else static const char *delimstr = " :\t\n\r"; #endif rlabel_list = ajListNew(); firststring = ajStrNew(); reststring = ajStrNew(); file = ajDatafileNewInNameS(filename); if(!file) { ajStrDel(&firststring); ajStrDel(&reststring); return NULL; } /* Read row labels */ while(ajReadline(file,&buffer)) { ptr = ajStrGetPtr(buffer); #ifndef WIN32 if(*ptr != '#' && *ptr != '\n') #else if(*ptr != '#' && *ptr != '\n' && *ptr != '\r') #endif { if(first) first = ajFalse; else { ajFmtScanC(ptr, "%S", &firststring); ajListPushAppend(rlabel_list, firststring); firststring = ajStrNew(); } } } first = ajTrue; ajStrDel(&firststring); rows = ajListToarray(rlabel_list, (void ***) &rlabel_arr); ajFileSeek(file, 0, 0); while(ajReadline(file,&buffer)) { ajStrRemoveWhiteExcess(&buffer); ptr = ajStrGetPtr(buffer); if(*ptr && *ptr != '#') { if(first) { cols = ajStrParseCountC(buffer,delimstr); AJCNEW0(orderstring, cols); for(i=0; i<cols; i++) orderstring[i] = ajStrNew(); tok = ajStrParseC(buffer, " :\t\n"); ajStrAssignS(&orderstring[l++], tok); while((tok = ajStrParseC(NULL, " :\t\n"))) ajStrAssignS(&orderstring[l++], tok); first = ajFalse; ret = ajMatrixfNewAsym(orderstring, cols, rlabel_arr, rows, filename); matrix = ret->Matrixf; } else { ajFmtScanC(ptr, "%S", &firststring); /* JISON 19/7/4 k = ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(firststring)); */ k = ajSeqcvtGetCodeS(ret->Cvt, firststring); len = MAJSTRGETLEN(firststring); ajStrAssignSubC(&reststring, ptr, len, -1); /* ** Must discard the first string (label) and use ** reststring otherwise ajArrFloatLine would fail (it ** cannot convert a string to a float) ** ** Use cols,1,cols in below because although 2nd and ** subsequent lines have one more string in them (the ** residue label in the 1st column) we've discarded that ** from the string that's passed */ templine = ajArrFloatLine(reststring,delimstr,1,cols); for(i=0; i<cols; i++) { if(templine[i] < minval) minval = templine[i]; /* JISON 19/7/4 matrix[k][ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(orderstring[i]))] = templine[i]; */ matrix[k][ajSeqcvtGetCodeAsymS(ret->Cvt, orderstring[i])] = templine[i]; } AJFREE(templine); } } } ajDebug("fill rest with minimum value %d\n", minval); ajFileClose(&file); ajStrDel(&buffer); for(i=0; i<cols; i++) ajStrDel(&orderstring[i]); AJFREE(orderstring); ajDebug("read matrix file %S\n", filename); ajStrDel(&firststring); ajStrDel(&reststring); for(i=0; i<rows; i++) ajStrDel(&rlabel_arr[i]); AJFREE(rlabel_arr); ajListFree(&rlabel_list); return ret; }
AjPMatrix ajMatrixNewFile(const AjPStr filename) { AjPMatrix ret = NULL; AjPStr buffer = NULL; const AjPStr tok = NULL; AjPStr firststring = NULL; AjPStr *orderstring = NULL; AjPFile file = NULL; AjBool first = ajTrue; const char *ptr = NULL; ajint **matrix = NULL; ajint minval = -1; ajint i = 0; ajint l = 0; ajint k = 0; ajint cols = 0; ajint rows = 0; ajint *templine = NULL; AjPList rlabel_list = NULL; AjPStr *rlabel_arr = NULL; #ifndef WIN32 static const char *delimstr = " :\t\n"; #else static const char *delimstr = " :\t\n\r"; #endif rlabel_list = ajListNew(); firststring = ajStrNew(); file = ajDatafileNewInNameS(filename); if(!file) { ajStrDel(&firststring); ajListFree(&rlabel_list); return NULL; } /* Read row labels */ while(ajReadline(file,&buffer)) { ptr = ajStrGetPtr(buffer); #ifndef WIN32 if(*ptr != '#' && *ptr != '\n') #else if(*ptr != '#' && *ptr != '\n' && *ptr != '\r') #endif { if(first) first = ajFalse; else { ajFmtScanC(ptr, "%S", &firststring); ajListPushAppend(rlabel_list, firststring); firststring = ajStrNew(); } } } first = ajTrue; ajStrDel(&firststring); rows = ajListToarray(rlabel_list, (void ***) &rlabel_arr); ajFileSeek(file, 0, 0); while(ajReadline(file,&buffer)) { ajStrRemoveWhiteExcess(&buffer); ptr = ajStrGetPtr(buffer); if(*ptr && *ptr != '#') { if(first) { cols = ajStrParseCountC(buffer,delimstr); AJCNEW0(orderstring, cols); for(i=0; i<cols; i++) orderstring[i] = ajStrNew(); tok = ajStrParseC(buffer, " :\t\n"); ajStrAssignS(&orderstring[l++], tok); while((tok = ajStrParseC(NULL, " :\t\n"))) ajStrAssignS(&orderstring[l++], tok); first = ajFalse; ret = ajMatrixNewAsym(orderstring, cols, rlabel_arr, rows, filename); matrix = ret->Matrix; } else { ajFmtScanC(ptr, "%S", &firststring); /* JISON 19/7/4 k = ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(firststring)); */ k = ajSeqcvtGetCodeS(ret->Cvt, firststring); /* ** cols+1 is used below because 2nd and subsequent lines have ** one more string in them (the residue label) */ templine = ajArrIntLine(buffer,delimstr,2,cols+1); for(i=0; i<cols; i++) { if(templine[i] < minval) minval = templine[i]; /* JISON 19/7/4 matrix[k][ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(orderstring[i]))] = templine[i]; */ matrix[k][ajSeqcvtGetCodeAsymS(ret->Cvt, orderstring[i])] = templine[i]; } AJFREE(templine); } } } ajDebug("fill rest with minimum value %d\n", minval); ajFileClose(&file); ajStrDel(&buffer); for(i=0; i<cols; i++) ajStrDel(&orderstring[i]); AJFREE(orderstring); ajDebug("read matrix file %S\n", filename); ajStrDel(&firststring); for(i=0; i<rows; i++) ajStrDel(&rlabel_arr[i]); AJFREE(rlabel_arr); ajListFree(&rlabel_list); return ret; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPFile libr=NULL; AjPStr idformat = NULL; EmbPEntry entry; ajuint idtype = 0; ajuint idCount = 0; ajuint idDone; AjPList listInputFiles = NULL; void ** inputFiles = NULL; ajuint nfiles; ajuint ifile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; AjPRegexp regIdExp = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i; embInit("dbifasta", argc, argv); idformat = ajAcdGetListSingle("idformat"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint)maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); regIdExp = dbifasta_getExpr(idformat, &idtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listInputFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listInputFiles, &ajStrVcmp); nfiles = (ajuint) ajListToarray(listInputFiles, &inputFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) inputFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ for(ifile=0; ifile < nfiles; ifile++) { ajStrAssignS(&curfilename,(AjPStr) inputFiles[ifile]); embDbiFlatOpenlib(curfilename, &libr); ajFilenameTrimPath(&curfilename); if(ajStrGetLen(curfilename) >= maxfilelen) maxfilelen = ajStrGetLen(curfilename) + 1; ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%F' ...\n", libr); ajStrAssignS(&divfiles[ifile], curfilename); if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, ifile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbifasta_NextFlatEntry(libr, ifile, regIdExp, idtype, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ embDbiMemEntry(idlist, fieldList, nfields, entry, ifile); entry = NULL; } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); AJFREE(entry); } else { embDbiEntryDel(&dbifastaGEntry); } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); } /* write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajStrDel(&idformat); ajStrDelarray(&fields); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&tmpfname); ajFileClose(&libr); ajFileClose(&logfile); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], &embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); AJFREE(maxFieldLen); AJFREE(countField); AJFREE(fieldTot); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(inputFiles); embDbiEntryDel(&dbifastaGEntry); ajStrDel(&dbifastaGRline); ajStrDel(&dbifastaGTmpId); if(dbifastaGFdl) { for(i=0; i < nfields; i++) ajListFree(&dbifastaGFdl[i]); AJFREE(dbifastaGFdl); } ajListMap(idlist, &embDbiEntryDelMap, NULL); ajListFree(&idlist); ajListstrFreeData(&listInputFiles); AJFREE(entryIds); ajRegFree(&dbifastaGIdexp); ajRegFree(&dbifastaGWrdexp); ajRegFree(®IdExp); ajStrDel(&dbifastaGTmpAc); ajStrDel(&dbifastaGTmpSv); ajStrDel(&dbifastaGTmpGi); ajStrDel(&dbifastaGTmpDb); ajStrDel(&dbifastaGTmpDes); ajStrDel(&dbifastaGTmpFd); ajStrDel(&curfilename); embExit(); return 0; }
static AjBool assemoutWriteNextBam(AjPOutfile outfile, const AjPAssem assem) { AjPFile outf = ajOutfileGetFile(outfile); AjPSeqBamHeader header = NULL; AjPAssemContig c = NULL; AjPSeqBam bam; AjPAssemRead r = NULL; AjPAssemContig* contigs = NULL; AjPAssemTag t = NULL; AjIList j = NULL; AjPSeqBamBgzf gzfile = NULL; AjPStr headertext=NULL; const AjPStr rgheadertext=NULL; AjBool ret = ajTrue; ajint i=0; ajulong ncontigs=0UL; if(!outf) return ajFalse; if(!assem) return ajFalse; if(!assem->Hasdata) { if(ajListGetLength(assem->ContigsOrder)) ncontigs = ajListToarray(assem->ContigsOrder, (void***)&contigs); else ncontigs = ajTableToarrayValues(assem->Contigs, (void***)&contigs); ajFmtPrintS(&headertext, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem)); header = ajSeqBamHeaderNewN((ajuint) ncontigs); gzfile = ajSeqBamBgzfNew(ajFileGetFileptr(outf), "w"); outfile->OutData = gzfile; while (contigs[i]) /* contigs */ { c = contigs[i]; if(ajStrMatchC(c->Name, "*")) { i++; continue; } header->target_name[i] = strdup(ajStrGetPtr(c->Name)); header->target_len[i++] = c->Length; ajFmtPrintAppS(&headertext, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length); if(c->URI) ajFmtPrintAppS(&headertext, "\tUR:%S", c->URI); if(c->MD5) ajFmtPrintAppS(&headertext, "\tM5:%S", c->MD5); if(c->Species) ajFmtPrintAppS(&headertext, "\tSP:%S", c->Species); ajFmtPrintAppS(&headertext, "\n"); j = ajListIterNewread(c->Tags); while (!ajListIterDone(j)) { t = ajListIterGet(j); ajFmtPrintAppS(&headertext, "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1, t->Comment); } ajListIterDel(&j); } rgheadertext = assemSAMGetReadgroupHeaderlines(assem); if(rgheadertext) ajStrAppendS(&headertext, rgheadertext); ajSeqBamHeaderSetTextC(header, ajStrGetPtr(headertext)); ajSeqBamHeaderWrite(gzfile, header); ajSeqBamHeaderDel(&header); ajStrDel(&headertext); AJFREE(contigs); if(!assem->BamHeader) return ajTrue; } /* data */ gzfile = outfile->OutData; AJNEW0(bam); bam->m_data=10; AJCNEW0(bam->data, bam->m_data); j = ajListIterNewread(assem->Reads); while (!ajListIterDone(j)) /* reads */ { r = ajListIterGet(j); assemoutWriteBamAlignment(gzfile, r, bam); } ajListIterDel(&j); AJFREE(bam->data); AJFREE(bam); /* ajSeqBamBgzfClose(gzfile);*/ return ret; }
static AjBool assemoutWriteNextSam(AjPOutfile outfile, const AjPAssem assem) { AjPFile outf = ajOutfileGetFile(outfile); AjPAssemContig c = NULL; AjPAssemRead r = NULL; AjPAssemTag t = NULL; AjPAssemContig* contigs = NULL; AjIList j = NULL; AjPStr argstr = NULL; const AjPStr headertext = NULL; ajint n = 0; ajulong i = 0UL; AjBool ret = ajTrue; if(!outf || !assem) return ajFalse; ajDebug("assemoutWriteSam: # of contigs = %d\n", n); if(!assem->Hasdata) { ajFmtPrintF(outf, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem)); /* Program record */ argstr = ajStrNewS(ajUtilGetCmdline()); ajStrExchangeKK(&argstr, '\n', ' '); ajFmtPrintF(outf, "@PG\tID:%S\tVN:%S\tCL:%S\n", ajUtilGetProgram(), ajNamValueVersion(), argstr); ajStrDel(&argstr); if(ajListGetLength(assem->ContigsOrder)) ajListToarray(assem->ContigsOrder, (void***)&contigs); else ajTableToarrayValues(assem->Contigs, (void***)&contigs); while (contigs[i]) /* contigs */ { c = contigs[i++]; if(!ajStrMatchC(c->Name, "*")) { ajFmtPrintF(outf, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length); if(c->URI) ajFmtPrintF(outf, "\tUR:%S", c->URI); if(c->MD5) ajFmtPrintF(outf, "\tM5:%S", c->MD5); if(c->Species) ajFmtPrintF(outf, "\tSP:%S", c->Species); ajFmtPrintF(outf, "\n"); j = ajListIterNewread(c->Tags); while (!ajListIterDone(j)) { t = ajListIterGet(j); ajFmtPrintF(outf, "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1, t->Comment); } ajListIterDel(&j); } } headertext = assemSAMGetReadgroupHeaderlines(assem); if(headertext) ajFmtPrintF(outf,"%S", headertext); AJFREE(contigs); if(!assem->BamHeader) return ajTrue; } /* data */ j = ajListIterNewread(assem->Reads); if(ajListGetLength(assem->ContigsOrder)) i = ajListToarray(assem->ContigsOrder, (void***)&contigs); else i = ajTableToarrayValues(assem->Contigs, (void***)&contigs); while (!ajListIterDone(j)) /* reads */ { r = ajListIterGet(j); assemoutWriteSamAlignment(outf, r, contigs, (ajuint) i); } ajListIterDel(&j); AJFREE(contigs); return ret; }
static void acdrelations_readdatfile (AjPFile inf, PEdam *P) { AjPStr line = NULL; const AjPStr tok = NULL; const AjPStr subtok = NULL; AjPStr strtmp = NULL; AjPList strlist = NULL; AjPStr acdtype = NULL; AjPStr relations = NULL; PEdamdat dattmp = NULL; AjPList datlist = NULL; if(!P) ajFatal("Null arg error 1 in acdrelations_readdatfile"); if(!inf) ajFatal("Null arg error 3 in acdrelations_readdatfile"); /* Allocate memory */ line = ajStrNew(); acdtype = ajStrNew(); relations = ajStrNew(); datlist = ajListNew(); /* Read data from file */ while(ajReadline(inf,&line)) { /* Discard comment lines */ if(ajStrPrefixC(line,"#")) continue; /* Tokenise line, delimited by '|'. Parse first token (ACD datatype ) */ ajStrAssignS(&acdtype, ajStrParseC(line, "|")); /* Parse second token (EDAM relations: value ) */ ajStrAssignS(&relations, ajStrParseC(NULL, "|")); /* Parse third token (attribute:value strings block) */ tok = ajStrParseC(NULL, "|"); /* Create new string list */ strlist = ajListstrNew(); /* Tokenise third token itself into tokens delimited by ' ' (space) Parse tokens (individual attribute:value strings)*/ if((subtok=ajStrParseC(tok, ";"))) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, subtok); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); while((subtok=ajStrParseC(NULL, ";"))) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, subtok); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); } } /* Write PEdamdat structure & push onto list */ dattmp = ajEdamdatNew(); ajStrRemoveWhite(&acdtype); ajStrAssignS(&dattmp->acdtype, acdtype); ajStrAssignS(&dattmp->edam, relations); dattmp->n = ajListstrToarray(strlist, &dattmp->acdattr); ajListPushAppend(datlist, dattmp); /* Clear nodes (but not strings) from string list */ ajListstrFree(&strlist); } /* Write PEdam structure */ ((*P)->n) = ajListToarray(datlist, (void***) &((*P)->dat)); /* Free memory */ ajStrDel(&line); ajStrDel(&acdtype); ajStrDel(&relations); ajListFree(&datlist); return; }
static void pdbplus_sort(AjPPdb pdb, int tS) { AjPResidue *arr = NULL; /* Array of Residue objects from list of Residue objects in Pdb chain object. */ ajint n = 0; /* Current position in array of residues. */ ajint x = 0; /* Loop counter. */ ajint z = 0; /* Loop counter. */ ajint siz = 0; /* Size of array of residues. */ ajint start = 0; /* Start position of element. */ ajint end = 0; /* End position of element. */ ajint esiz = 0; /* Size of current element. */ ajint eNum = 0; /* Sequential count of elements. */ ajint resnum = 0; /* Residue number of last residue, Idx value. */ char etype = ' '; /* Element type. */ AjBool foundStart =ajFalse; /* True if we have found the start of an element of any size*/ int numHelices = 0; int numStrands = 0; for(z=0; z < pdb->Nchn; z++) { /* Use ajListToArray to convert the list of residues for the current chain to an array. Returns size of array of pointers */ siz = ajListToarray((AjPList)pdb->Chains[z]->Residues, (void ***)&arr); /* Loop through the array to identify, index & then write SSE data to residues in the array. */ for(eNum=1, foundStart=ajFalse, n=0; n<siz; resnum = arr[n]->Idx, n++) { /* If residue is def. not in an element. */ if((arr[n]->eStrideType == 'C') || (arr[n]->eStrideType == 'B') || (arr[n]->eStrideType == 'b') || (arr[n]->eStrideType == 'T') || (arr[n]->eStrideType == '.')) { /* If element start already found, this residue defines the end of an element. */ if (foundStart) { /* Check if element size >= threshold size. */ if(esiz >= tS) { end = n-1; /* Write element data. */ pdbplus_writeElement(start, end, eNum, arr); eNum++; } /* Element written or element < threshold size. */ foundStart = ajFalse; esiz = 0; continue; /* Next residue in array. */ } continue; /* Next residue in array. */ } /* This residue might be in an element of tS or greater. */ else { if(foundStart) { if(arr[n]->eStrideType != etype) { /* Found the end of one & the start of next element. Check element size (esiz) >= threshold size. */ if(esiz >= tS) { end = n-1; /* Write element data. */ pdbplus_writeElement(start, end, eNum, arr); eNum++; } /* foundStart remains ajTrue. */ start = n; /* residue is first residue of next element. */ esiz = 1; etype = arr[n]->eStrideType; continue; } else { /* ** Residue type is same as first residue. ** Increase size of element if residue number has ** increased by 1 since the last residue but the residue ** identity is the same as that for the first residue. ** 'by 1' accounts for 'gaps' in the residue numbering, ** e.g.caused by missing electron density. */ if(arr[n]->Idx == resnum+1) esiz++; /* Cope with cases of jumps in residue numbering - such cases define the end of an element. */ /* If residue number has increased by more than 1, end of element is found. */ if(arr[n]->Idx > resnum+1) { if (esiz >= tS) { /* Write element data. */ end = n-1; pdbplus_writeElement(start, end, eNum, arr); eNum++; } /* Current residue is the start of the next element. */ esiz = 1; /* note-foundStart remains true. eType remains the same. */ start = n; continue; } /* If end of array is reached. */ if ((n == (siz-1)) && (esiz >= tS)) { end = n; pdbplus_writeElement(start, end, eNum, arr); } } } else /* We've not found the start yet. */ { start = n; etype = arr[n]->eStrideType; foundStart = ajTrue; esiz = 1; continue; } } } /* Count numHelices, num Strands. */ numHelices = 0; numStrands = 0; /* Loop through array of residues again. */ for(n=0, x=0; n<siz; n++) { /* eStrideNum starts at 1. */ if(!(arr[n]->eStrideNum)) continue; else if (arr[n]->eStrideNum > x) { if((arr[n]->eStrideType == 'H') || (arr[n]->eStrideType == 'G') || (arr[n]->eStrideType == 'I')) { numHelices++; x++; continue; } else { /* eNum will always be 0 for eStrideType's of C, T, B & b. */ numStrands++; x++; continue; } } } pdb->Chains[z]->numHelices = numHelices; pdb->Chains[z]->numStrands = numStrands; AJFREE(arr); } }
/* @prog domainreso *********************************************************** ** ** Removes low resolution domains from a DCF file (domain ** classification file). ** ******************************************************************************/ int main(ajint argc, char **argv) { AjPList cpdb_path = NULL; /* Location of coordinate files for input */ AjPStr cpdb_name = NULL; /* Name of coordinate file */ AjPStr temp = NULL; /* temp string */ AjPStr temp2 = NULL; /* temp string */ AjPList entry = NULL; /* List of pdb codes with resolution */ /* ABOVE the threshold */ AjPStr *entryarr = NULL; /* entry as an array */ AjPFile fptr_cpdb = NULL; /* Pointer to current coordinate file */ AjPFile dcfin = NULL; /* DCF input file */ AjPFile dcfout = NULL; /* DCF output file */ AjPPdb pdb = NULL; /* Pdb object pointer */ AjPDomain domain = NULL; /* Domain structure */ float threshold = 0.0; /* Resolution threshold */ ajint num = 0; /* number of nodes in list */ ajint type = 0; /* Type of domain (ajSCOP or ajCATH) in the DCF file */ /* Read data from acd */ embInitPV("domainreso",argc,argv,"DOMAINATRIX",VERSION); cpdb_path = ajAcdGetDirlist("cpdbpath"); threshold = ajAcdGetFloat("threshold"); dcfin = ajAcdGetInfile("dcfinfile"); dcfout = ajAcdGetOutfile("dcfoutfile"); /* Allocate strings etc. */ cpdb_name = ajStrNew(); temp = ajStrNew(); /* Create list . */ entry = ajListNew(); /* Create list of files in CPDB directory. */ /* Determine number of nodes on list */ num = ajListGetLength(cpdb_path); /* domainreso reads a directory of clean coordinate files file, creates a list of the files, then reads every list entry and extracts the resolution of the structure. If the value is less than a threshold (user defined) then the domain identifier is pushed onto a list. The DCF file (domain classification file) is then read and domain identifiers compared to those on the list, if found then the domain structure data is written the new DCF file. */ type = ajDomainDCFType(dcfin); /* Start of main application loop */ /* Produce list of pdb codes with resolution */ /* ABOVE the threshold. */ while(ajListPop(cpdb_path,(void **)&temp)) { /* Open coordinate file. */ if((fptr_cpdb=ajFileNewInNameS(temp))==NULL) { ajWarn("Could not open cpdb file"); ajStrDel(&temp); continue; } ajFmtPrint("%S\n", temp); fflush(stdout); /* Read coordinate data file. */ pdb = ajPdbReadFirstModelNew(fptr_cpdb); /* Check if resolution is above threshold. */ if(pdb->Reso > threshold) { /* assign ID to list. */ temp2=ajStrNew(); ajStrAssignS(&temp2, pdb->Pdb); ajListPush(entry, (AjPStr) temp2); } /* Close coordinate file and tidy up*/ ajPdbDel(&pdb); ajFileClose(&fptr_cpdb); ajStrDel(&temp); } num = ajListGetLength(entry); /* Sort the list of pdb codes & convert to an array. */ ajListSort(entry, domainreso_StrComp); ajListToarray(entry, (void ***)&entryarr); /* Read DCF file and compare IDs to those in list if not present then write domain structure data to output. . */ while((domain=(ajDomainReadCNew(dcfin, "*", type)))) { /* DOMAIN id not found in the list of domains with resolution above the threshold, so include it in the output file. */ if((domainreso_StrBinSearchDomain(ajDomainGetId(domain), entryarr, num))==-1) ajDomainWrite(dcfout, domain); /* Delete domain structure. */ ajDomainDel(&domain); } /* Tidy up. */ ajStrDel(&temp2); ajStrDel(&cpdb_name); ajFileClose(&dcfout); ajFileClose(&dcfin); ajListFree(&cpdb_path); ajListFree(&entry); AJFREE(entryarr); /* Return. */ ajExit(); return 0; }