/* @funcstatic domainalign_writeid ******************************************** ** ** House-keeping function. ** ** @param [r] domain [AjPDomain] Domain. ** @param [r] noden [ajint] Node number. ** @param [r] daf [AjPDirout] Domain alignment files. ** @param [r] super [AjPDirout] Structural superimposition files. ** @param [r] align [AjPStr *] Align. ** @param [r] alignc [AjPStr *] Alignc. ** ** @return [void] ** ** @@ ****************************************************************************/ static void domainalign_writeid(AjPDomain domain, ajint noden, AjPDirout daf, AjPDirout super, AjPStr *align, AjPStr *alignc) { AjPStr temp=NULL; temp=ajStrNew(); if(noden==1) ajStrFromInt(&temp, domain->Scop->Sunid_Class); else if (noden==2) ajStrFromInt(&temp, domain->Scop->Sunid_Fold); else if (noden==3) ajStrFromInt(&temp, domain->Scop->Sunid_Superfamily); else if (noden==4) ajStrFromInt(&temp, domain->Scop->Sunid_Family); else if (noden==5) ajStrFromInt(&temp, domain->Cath->Class_Id ); else if (noden==6) ajStrFromInt(&temp, domain->Cath->Arch_Id); else if (noden==7) ajStrFromInt(&temp, domain->Cath->Topology_Id); else if (noden==8) ajStrFromInt(&temp, domain->Cath->Superfamily_Id); else if (noden==9) ajStrFromInt(&temp, domain->Cath->Family_Id); ajStrAssignS(align, temp); ajStrInsertS(align, 0, ajDiroutGetPath(daf)); ajStrAppendC(align, "."); ajStrAppendS(align, ajDiroutGetExt(daf)); ajStrAssignS(alignc, temp); ajStrInsertS(alignc, 0, ajDiroutGetPath(super)); ajStrAppendC(alignc, "."); ajStrAppendS(alignc, ajDiroutGetExt(super)); ajDebug("daf file '%S' super file '%S'\n", *align, *alignc); ajStrDel(&temp); return; }
static void trimest_tolower(AjPStr *strng, ajint start, ajint end) { AjPStr substr; substr = ajStrNew(); /* extract the region and lowercase */ ajStrAppendSubS(&substr, *strng, start, end); ajStrFmtLower(&substr); /* remove and replace the lowercased region */ ajStrCutRange(strng, start, end); ajStrInsertS(strng, start, substr); ajStrDel(&substr); return; }
static void extractfeat_GetRegionPad(const AjPSeq seq, AjPStr *featstr, ajint start, ajint end, AjBool sense, AjBool beginning) { ajint tmp; ajint pad; AjPStr result; ajDebug("In extractfeat_GetRegionPad start=%d, end=%d\n", start, end); result = ajStrNew(); if(start > end) return; if(start < 0) { pad = -start; if(ajSeqIsNuc(seq)) ajStrAppendCountK(&result, 'N', pad); else ajStrAppendCountK(&result, 'X', pad); start = 0; } if(end > (ajint) ajSeqGetLen(seq)-1) tmp = ajSeqGetLen(seq)-1; else tmp = end; if(start <= (ajint) ajSeqGetLen(seq) && tmp >= 0) { ajDebug("Get subsequence %d-%d\n", start, tmp); ajStrAppendSubS(&result, ajSeqGetSeqS(seq), start, tmp); ajDebug("result=%S\n", result); } if(end > (ajint) ajSeqGetLen(seq)-1) { pad = end - ajSeqGetLen(seq)+1; if(ajSeqIsNuc(seq)) ajStrAppendCountK(&result, 'N', pad); else ajStrAppendCountK(&result, 'X', pad); ajDebug("result=%S\n", result); } /* if feature was in reverse sense, then get reverse complement */ if(!sense) { ajDebug("get reverse sense of subsequence\n"); ajSeqstrReverse(&result); ajDebug("result=%S\n", result); } if(beginning) { ajDebug("Prepend to featstr: %S\n", result); ajStrInsertS(featstr, 0, result); } else { ajDebug("Append to featstr: %S\n", result); ajStrAppendS(featstr, result); } ajDebug("featstr=%S\n", *featstr); ajStrDel(&result); return; }
int main(int argc, char **argv) { AjPSeqout outseq = NULL; AjPList list = NULL; AjPSeq seq = NULL; AjPStr insert = NULL; AjPStr seqstr = NULL; AjPStr* seqr = NULL; AjPFile data = NULL; ajint start = 0; ajint length = 0; ajint amount = 0; ajint scmax = 0; ajint extra = 0; embInit("makeprotseq", argc, argv); data = ajAcdGetInfile("pepstatsfile"); insert = ajAcdGetString("insert"); start = ajAcdGetInt("start"); length = ajAcdGetInt("length"); amount = ajAcdGetInt("amount"); outseq = ajAcdGetSeqoutall("outseq"); list = ajListstrNew(); /* this is checked by acd if(amount <=0 || length <= 0) ajFatal("Amount or length is 0 or less. " "Unable to create any sequences"); */ /* if insert, make sure sequence is large enough */ if(ajStrGetLen(insert)) { length -= ajStrGetLen(insert); /* start= start <= 1 ? 0 : --start; */ /* checked in acd */ start--; if(length <= 0) ajFatal("Sequence smaller than inserted part. " "Unable to create sequences."); } /* make the list of AjPStr to be used in sequence creation */ if(data) { ajDebug("Distribution datafile '%s' given checking type\n", ajFileGetPrintnameC(data)); seqstr = ajStrNew(); ajReadlineTrim(data,&seqstr); if(ajStrFindC(seqstr,"PEPSTATS") == 0) { makeprotseq_parse_pepstats(&list,data); } else { ajWarn("Not pepstats file. Making completely random sequences."); makeprotseq_default_chars(&list); } ajStrDel(&seqstr); ajFileClose(&data); } else makeprotseq_default_chars(&list); /* if insert, make sure type is correct */ /* typecheking code is not working, uncomment and test after it is if(ajStrGetLen(insert)) { seqstr = ajStrNew(); if(prot) ajStrAssignC(&seqstr,"pureprotein"); if(!ajSeqTypeCheckS(&insert,seqstr)) ajFatal("Insert not the same sequence type as sequence itself."); ajStrDel(&seqstr); } */ /* array allows fast creation of a sequences */ scmax = (ajuint) ajListstrToarray(list,&seqr); if(!scmax) ajFatal("No strings in list. No characters to make the sequence."); ajDebug("Distribution array done.\nscmax '%d', extra '%d', first '%S'\n", scmax,extra,seqr[0]); ajRandomSeed(); while(amount-- > 0) { seqstr = makeprotseq_random_sequence(seqr,scmax,length); if(ajStrGetLen(insert)) ajStrInsertS(&seqstr,start,insert); ajStrFmtLower(&seqstr); seq = ajSeqNew(); ajSeqAssignSeqS(seq, seqstr); ajSeqSetProt(seq); ajSeqoutWriteSeq(outseq, seq); ajSeqDel(&seq); ajStrDel(&seqstr); } ajSeqoutClose(outseq); ajSeqoutDel(&outseq); ajListstrFreeData(&list); ajStrDel(&insert); AJFREE(seqr); embExit(); return 0; }
/* @prog seqnr ************************************************************** ** ** Removes redundancy from DHF files (domain hits files) or other files of ** sequences. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variable declarations */ AjPList in = NULL; /* Names of domain hits files (input). */ AjPStr inname = NULL; /* Full name of the current DHF file. */ AjPFile inf = NULL; /* Current DHF file. */ EmbPHitlist infhits = NULL; /* Hitlist from DHF file */ AjBool dosing = ajFalse; /* Filter using singlet sequences. */ AjPDir singlets = NULL; /* Singlets (input). */ AjBool dosets = ajFalse; /* Filter using sets of sequences. */ AjPDir insets = NULL; /* Sets (input). */ AjPStr mode = NULL; /* Mode of operation */ ajint moden = 0; /* Mode 1: single threshold for redundancy removal, 2: lower and upper thresholds for redundancy removal. */ float thresh = 0.0; /* Threshold for non-redundancy. */ float threshlow = 0.0; /* Threshold (lower limit). */ float threshup = 0.0; /* Threshold (upper limit). */ AjPMatrixf matrix = NULL; /* Substitution matrix. */ float gapopen = 0.0; /* Gap insertion penalty. */ float gapextend = 0.0; /* Gap extension penalty. */ AjPDirout out = NULL; /* Domain hits files (output). */ AjPFile outf = NULL; /* Current DHF file (output). */ AjBool dored = ajFalse; /* True if redundant hits are output. */ AjPDirout outred = NULL; /* DHF files for redundant hits (output).*/ AjPFile redf = NULL; /* Current DHF file redundancy (output). */ AjPStr outname = NULL; /* Name of output file (re-used). */ AjPFile logf = NULL; /* Log file pointer. */ AjBool ok = ajFalse; /* Housekeeping. */ AjPSeqset seqset = NULL; /* Seqset (re-used). */ AjPSeqin seqin = NULL; /* Seqin (re-used). */ AjPList seq_list = NULL; /* Main list for redundancy removal. */ EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */ ajint seq_siz = 0; /* Size of seq_list. */ AjPUint keep = NULL; /* 1: Sequence in seq_list was classed as non-redundant, 0: redundant. */ AjPUint nokeep = NULL; /* Inversion of keep array. */ ajint nseqnr = 0; /* No. non-redundant seqs. in seq_list. */ AjPStr filtername= NULL; /* Name of filter file (re-used). */ AjPFile filterf = NULL; /* Current filter file. */ EmbPHitlist hitlist = NULL; /* Hitlist from input file (re-used). */ AjPScopalg scopalg = NULL; /* Scopalg from input file. */ ajint x = 0; /* Housekeeping. */ /* Read data from acd. */ embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION); in = ajAcdGetDirlist("dhfinpath"); dosing = ajAcdGetToggle("dosing"); singlets = ajAcdGetDirectory("singletsdir"); dosets = ajAcdGetToggle("dosets"); insets = ajAcdGetDirectory("insetsdir"); mode = ajAcdGetListSingle("mode"); thresh = ajAcdGetFloat("thresh"); threshlow = ajAcdGetFloat("threshlow"); threshup = ajAcdGetFloat("threshup"); matrix = ajAcdGetMatrixf("matrix"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); out = ajAcdGetOutdir("dhfoutdir"); dored = ajAcdGetToggle("dored"); outred = ajAcdGetOutdir("redoutdir"); logf = ajAcdGetOutfile("logfile"); /* Housekeeping. */ filtername = ajStrNew(); outname = ajStrNew(); if(!(ajStrToInt(mode, &moden))) ajFatal("Could not parse ACD node option"); /* Process each DHF (input) in turn. */ while(ajListPop(in,(void **)&inname)) { ajFmtPrint("Processing %S\n", inname); ajFmtPrintF(logf, "//\n%S\n", inname); seq_list = ajListNew(); keep = ajUintNew(); nokeep = ajUintNew(); /**********************************/ /* Open DHF file */ /**********************************/ if((inf = ajFileNewInNameS(inname)) == NULL) ajFatal("Could not open DHF file %S", inname); /* Read DHF file. */ ok = ajFalse; if(!(infhits = embHitlistReadFasta(inf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqsearch_psialigned"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(infhits->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&inf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty input file %S\n", inname); ajFmtPrintF(logf, "Empty input file %S\n", inname); if(infhits) embHitlistDel(&infhits); if(seqset) ajSeqsetDel(&seqset); if(seqin) ajSeqinDel(&seqin); continue; } /* 1. Create list of sequences from the main input directory.. */ if(infhits) { for(x=0; x<infhits->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } /**********************************/ /* Open singlets filter file */ /**********************************/ if(dosing) { /* Open singlets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(singlets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(singlets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DHF file %S", filtername); ajFmtPrint("Could not open singlets filter file %S", filtername); } else { /* Read DHF file. */ ok = ajFalse; if(!(hitlist = embHitlistReadFasta(filterf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(hitlist->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty singlets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty singlets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files). */ if(hitlist) { for(x=0; x<hitlist->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } embHitlistDel(&hitlist); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /**********************************/ /* Open sets filter file */ /**********************************/ if(dosets) { /* Open sets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(insets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(insets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DAF file %S", filtername); ajFmtPrint("Could not open sets filter file %S", filtername); } else { /* Read DAF file. */ ok = ajFalse; if(!(ajDmxScopalgRead(filterf, &scopalg))) { ajWarn("ajDmxScopalgRead call failed in seqnr"); ajFmtPrintF(logf, "ajDmxScopalgRead call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(scopalg->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty sets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty sets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files).. */ if(scopalg) { for(x=0; x<scopalg->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]); ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]); /* Remove gap char's & whitespace. */ ajStrRemoveGap(&seq_tmp->Seq->Seq); ajListPushAppend(seq_list,seq_tmp); } ajDmxScopalgDel(&scopalg); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /* 4. Identify redundant domains.. */ if(moden == 1) { if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, thresh, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } else { if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, threshlow, threshup, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } seq_siz = ajListGetLength(seq_list); for(x=0; x<seq_siz; x++) if(ajUintGet(keep, x) == 1) ajUintPut(&nokeep, x, 0); else ajUintPut(&nokeep, x, 1); /* Create output files. */ ajStrAssignS(&outname, inname); ajFilenameTrimPathExt(&outname); outf = ajFileNewOutNameDirS(outname, out); if(dored) redf = ajFileNewOutNameDirS(outname, outred); /* 5. Write non-redundant domains to main output directory. 6. If specified, write redundant domains to output directory. */ embHitlistWriteSubsetFasta(outf, infhits, keep); if(dored) embHitlistWriteSubsetFasta(redf, infhits, nokeep); embHitlistDel(&infhits); ajFileClose(&outf); ajFileClose(&redf); ajStrDel(&inname); while(ajListPop(seq_list, (void **) &seq_tmp)) { ajSeqDel(&seq_tmp->Seq); AJFREE(seq_tmp); } ajListFree(&seq_list); ajUintDel(&keep); ajUintDel(&nokeep); } /* Tidy up. */ ajListFree(&in); if(singlets) ajDirDel(&singlets); if(insets) ajDirDel(&insets); ajDiroutDel(&out); if(outred) ajDiroutDel(&outred); ajFileClose(&logf); ajMatrixfDel(&matrix); ajStrDel(&filtername); ajStrDel(&outname); ajStrDel(&mode); embExit(); return 0; }