Esempio n. 1
0
static void prima_TwoSortscorepos(AjPList *pairlist)
{
    PPair tmp = NULL;
    AjPList intlist = NULL;
    AjPList filist  = NULL;
    PPair save  = NULL;
    float   score = 0.0;


    ajListSort(*pairlist,prima_Compare);
    intlist = ajListNew();
    filist  = ajListNew();

    score = (float) -1.0;

    while(ajListPop(*pairlist,(void **)&tmp))
    {
	if(tmp->f->score == score)
	{
	    ajListPush(intlist,(void *)tmp);
	    continue;
	}

	save = tmp;
	ajListSort(intlist,prima_PosCompare);
	score = tmp->f->score;
	prima_RevSort(&intlist);

	while(ajListPop(intlist,(void **)&tmp))
	    ajListPushAppend(filist,(void *)tmp);
	ajListPush(intlist,(void *)save);
    }

    ajListSort(intlist,prima_PosCompare);
    prima_RevSort(&intlist);

    while(ajListPop(intlist,(void **)&tmp))
	ajListPushAppend(filist,(void *)tmp);

    ajListFree(&intlist);
    ajListFree(pairlist);

    *pairlist = filist;
    return;
}
Esempio n. 2
0
static void prima_RevSort(AjPList *alist)
{
    PPair tmp = NULL;
    AjPList intlist = NULL;
    AjPList filist  = NULL;
    PPair save = NULL;
    ajint pos = -1;


    intlist = ajListNew();
    filist  = ajListNew();

    pos = -1;

    while(ajListPop(*alist,(void **)&tmp))
    {
	if(tmp->f->start+tmp->f->primerlen == pos)
	{
	    ajListPush(intlist,(void *)tmp);
	    continue;
	}

	save = tmp;
	ajListSort(intlist,prima_PosEndCompare);
	pos = tmp->f->start+tmp->f->primerlen;
	while(ajListPop(intlist,(void **)&tmp))
	    ajListPushAppend(filist,(void *)tmp);
	ajListPush(intlist,(void *)save);
    }

    ajListSort(intlist,prima_PosEndCompare);
    while(ajListPop(intlist,(void **)&tmp))
	ajListPushAppend(filist,(void *)tmp);

    ajListFree(&intlist);
    ajListFree(alist);

    *alist = filist;

    return;
}
Esempio n. 3
0
static void silent_fmt_hits(AjPList hits, AjPFeattable feat,
			    AjBool silent, AjBool rev)
{
    PSilent res;
    AjPFeature sf = NULL;
    AjPStr tmpFeatStr = NULL;

    ajListSort(hits,silent_basecompare);

    while(ajListPop(hits,(void **)&res))
    {
	if (rev)
	    sf = ajFeatNewIIRev(feat,
				res->match, res->match+ajStrGetLen(res->site)-1);
	else
	    sf = ajFeatNewII(feat,
			     res->match, res->match+ajStrGetLen(res->site)-1);

	if (silent)
	{
	    ajFmtPrintS(&tmpFeatStr, "*silent Yes");
	    ajFeatTagAdd (sf, NULL, tmpFeatStr);
	}
	ajFmtPrintS(&tmpFeatStr, "*enzyme %S", res->code);
	ajFeatTagAdd (sf, NULL, tmpFeatStr);
	ajFmtPrintS(&tmpFeatStr, "*rspattern %S", res->site);
	ajFeatTagAdd (sf, NULL, tmpFeatStr);
	ajFmtPrintS(&tmpFeatStr, "*baseposn %d", res->base);
	ajFeatTagAdd (sf, NULL, tmpFeatStr);
	ajFmtPrintS(&tmpFeatStr, "*aa %S.%S", res->seqaa, res->reaa);
	ajFeatTagAdd (sf, NULL, tmpFeatStr);
	ajFmtPrintS(&tmpFeatStr, "*mutation %c->%c", res->obase,res->nbase);
	ajFeatTagAdd (sf, NULL, tmpFeatStr);
	
       ajStrDel(&res->code);
       ajStrDel(&res->site);
       ajStrDel(&res->seqaa);
       ajStrDel(&res->reaa);
       AJFREE(res);
    }

    ajStrDel(&tmpFeatStr);
    return;
}
Esempio n. 4
0
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable,
			    AjBool html, const AjPStr enzymes, AjBool blunt,
			    AjBool sticky, ajuint sitelen, AjBool commercial,
			    AjBool ambiguity, AjBool limit,
			    const AjPTable retable)
{

    /* for iterating over hittable */
    PValue value;
    void **keyarray = NULL;			/* array for table */
    void **valarray = NULL;			/* array for table */
    ajint i;

    /* list of enzymes that cut */
    AjPList cutlist;
    AjIList citer;			/* iterator for cutlist */
    AjPStr cutname = NULL;
    AjBool found;

    /* for parsing value->iso string */
    AjPStrTok tok;
    char tokens[] = " ,";
    AjPStr code = NULL;
    const char *p;

    /* for reading in enzymes names */
    AjPFile enzfile = NULL;
    AjPStr *ea;
    ajint ne;				/* number of enzymes */
    AjBool isall = ajTrue;

    /* list of enzymes that don't cut */
    AjPList nocutlist;
    AjIList niter;			/* iterator for nocutlist */
    AjPStr nocutname = NULL;

    /* count of rejected enzymes not matching criteria */
    ajint rejected_count = 0;

    EmbPPatRestrict enz;

    /* for renaming preferred isoschizomers */
    AjPList newlist;

     /*
     **
     ** Make a list of enzymes('cutlist') that hit
     ** including the isoschizomer names
     **
     */
    ajDebug("Make a list of all enzymes that cut\n");


    cutlist   = ajListstrNew();
    nocutlist = ajListstrNew();


    ajTableToarrayKeysValues(hittable, &keyarray, &valarray);
    for(i = 0; keyarray[i]; i++)
    {
        value = (PValue) valarray[i];
        cutname = ajStrNew();
        ajStrAssignRef(&cutname, keyarray[i]);
        ajListstrPushAppend(cutlist, cutname);

        /* Add to cutlist all isoschizomers of enzymes that cut */
        ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n");

        /* start token to parse isoschizomers names */
        tok = ajStrTokenNewC(value->iso,  tokens);
        while(ajStrTokenNextParseC(&tok, tokens, &code))
        {
            cutname = ajStrNew();
            ajStrAssignS(&cutname, code);
            ajListstrPushAppend(cutlist, cutname);
        }
        ajStrTokenDel(&tok);
    }
    ajStrDel(&code);
    AJFREE(keyarray);
    AJFREE(valarray);



     /*
     ** Read in list of enzymes ('nocutlist') - either all or
     ** the input enzyme list.
     ** Exclude those that don't match the selection criteria - count these.
     */

    ajDebug("Read in a list of all input enzyme names\n");

    ne = 0;
    if(!enzymes)
	isall = ajTrue;
    else
    {
	/* get input list of enzymes into ea[] */
	ne = ajArrCommaList(enzymes, &ea);
	if(ajStrMatchCaseC(ea[0], "all"))
	    isall = ajTrue;
	else
	{
	    isall = ajFalse;
	    for(i=0; i<ne; ++i)
		ajStrRemoveWhite(&ea[i]);
	}
    }

    enzfile = ajDatafileNewInNameC(ENZDATA);

    /* push all enzyme names without the required criteria onto nocutlist */

    enz = embPatRestrictNew();
    while(!ajFileIsEof(enzfile))
    {
        if(!embPatRestrictReadEntry(enz, enzfile))
	    continue;

         /* 
	 ** If user entered explicit enzyme list, then check to see if
	 ** this is one of that explicit list 
	 */
        if(!isall)
	{
            found = AJFALSE;
            for(i=0; i<ne; ++i)
                if(ajStrMatchCaseS(ea[i], enz->cod))
		{
		    found = AJTRUE;
                    break;
                }

	    if(!found)			/* not in the explicit list */
		continue;

	    ajDebug("RE %S is in the input explicit list of REs\n", enz->cod);
	}

	/* ignore ncuts==0 as they are unknown */
	if(!enz->ncuts)
	{
	    /* number of cut positions */
            ajDebug("RE %S has an unknown number of cut positions\n",
		    enz->cod);
	    continue;
	}
        ajDebug("RE %S has a known number of cut sites\n", enz->cod);

	if(enz->len < sitelen)
	{
	    /* recognition site length */
            ajDebug("RE %S does not have a long enough recognition site\n", 
		    enz->cod);
	    rejected_count++;
	    continue;
	}

	if(!blunt && enz->blunt)
	{
	    /* blunt/sticky */
            ajDebug("RE %S is blunt\n", enz->cod);
	    rejected_count++;
	    continue;
	}

	if(!sticky && !enz->blunt)
	{
	    /* blunt/sticky */
            ajDebug("RE %S is sticky\n", enz->cod);
	    rejected_count++;
	    continue;
	}

	/* commercially available enzymes have uppercase patterns */
	p = ajStrGetPtr(enz->pat);

         /* 
	 ** The -commercial qualifier is only used if we are searching
	 ** through 'all' of the REBASE database - if we have specified an
	 ** explicit list of enzymes then they are searched for whether or
	 ** not they are commercially available
	 */
	if((*p >= 'a' && *p <= 'z') && commercial && isall)
	{
            ajDebug("RE %S is not commercial\n", enz->cod);
	    rejected_count++;
	    continue;
        }

	if(!ambiguity && remap_Ambiguous(enz->pat)) {
	    ajDebug("RE %S is ambiguous\n", enz->cod);
	    rejected_count++;
	    continue;	
	}

        ajDebug("RE %S matches all required criteria\n", enz->cod);

        code = ajStrNew();
	ajStrAssignS(&code, enz->cod);
	ajListstrPushAppend(nocutlist, code);
    }
    embPatRestrictDel(&enz);
    ajFileClose(&enzfile);


    for(i=0; i<ne; ++i)
	if(ea[i])
	    ajStrDel(&ea[i]);

    if(ne)
	AJFREE(ea);

    /*
     ** Change names of enzymes in the non-cutter list
     ** to that of preferred (prototype) 
     ** enzyme name so that the isoschizomers of cutters
     ** will be removed from the 
     ** non-cutter list in the next bit.
     ** Remove duplicate prototype names.
     */
    if(limit)
    {
        newlist = ajListstrNew();
        remap_RenamePreferred(nocutlist, retable, newlist);
        ajListstrFreeData(&nocutlist);
        nocutlist = newlist;
        ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel);
    }


     /*
     ** Iterate through the list of input enzymes removing those that are in
     ** the cutlist.
     */

    ajDebug("Remove from the nocutlist all enzymes and isoschizomers "
	    "that cut\n");

     /*
     **  This steps down both lists at the same time, comparing names and
     **  iterating to the next name in whichever list whose name compares
     **  alphabetically before the other.  Where a match is found, the
     **  nocutlist item is deleted.
     */

    ajListSort(nocutlist, remap_cmpcase);
    ajListSort(cutlist, remap_cmpcase);

    citer = ajListIterNewread(cutlist);
    niter = ajListIterNew(nocutlist);

    /*
       while((cutname = (AjPStr)ajListIterGet(citer)) != NULL)
       ajDebug("dbg cutname = %S\n", cutname);
       */

    nocutname = (AjPStr)ajListIterGet(niter);
    cutname   = (AjPStr)ajListIterGet(citer);

    ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname);

    while(nocutname != NULL && cutname != NULL)
    {
	i = ajStrCmpCaseS(cutname, nocutname);
	ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname);
	ajDebug("ajStrCmpCase=%d\n", i);
	if(i == 0)
	{
	    /* match - so remove from nocutlist */
	    ajDebug("ajListstrRemove %S\n", nocutname);
	    ajListstrIterRemove(niter);
	    nocutname = (AjPStr)ajListIterGet(niter);
	     /* 
	     ** Don't increment the cutname list pointer here
	     ** - there may be more than one entry in the nocutname
	     ** list with the same name because we have converted 
	     ** isoschizomers to their preferred name
	     */
	    /* cutname = (AjPStr)ajListIterGet(citer); */
	}
	else if(i == -1)
	    /* cutlist name sorts before nocutlist name */
	    cutname = (AjPStr)ajListIterGet(citer);
	else if(i == 1)
	    /* nocutlist name sorts before cutlist name */
	    nocutname = (AjPStr)ajListIterGet(niter);
    }

    ajListIterDel(&citer);
    ajListIterDel(&niter);
    ajListstrFreeData(&cutlist);


     /* Print the resulting list of those that do not cut*/

    ajDebug("Print out the list\n");

    /* print the title */
    if(html)
	ajFmtPrintF(outfile, "<H2>");
    ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n");

    if(html)
	ajFmtPrintF(outfile, "</H2>\n");

    if(html)
	ajFmtPrintF(outfile, "<PRE>");

    /*  ajListSort(nocutlist, ajStrVcmp);*/
    niter = ajListIterNewread(nocutlist);
    i = 0;
    while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL)
    {
	ajFmtPrintF(outfile, "%-10S", nocutname);
	/* new line after every 7 names printed */
	if(i++ == 7)
	{
	    ajFmtPrintF(outfile, "\n");
	    i = 0;
	}
    }
    ajListIterDel(&niter);


    /* end the output */
    ajFmtPrintF(outfile, "\n");
    if(html) {ajFmtPrintF(outfile, "</PRE>\n");}



     /*
     ** Print the count of rejected enzymes
     ** N.B. This is the count of ALL rejected enzymes including all
     ** isoschizomers
     */

    if(html)
        ajFmtPrintF(outfile, "<H2>");
    ajFmtPrintF(outfile,
		"\n\n# No. of cutting enzymes which do not match the\n"
		"# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n");
    if(html)
	ajFmtPrintF(outfile, "</H2>\n");
    ajFmtPrintF(outfile, "%d\n", rejected_count);

    ajDebug("Tidy up\n");
    ajListstrFreeData(&nocutlist);
    ajListstrFreeData(&cutlist);

    return;
}
Esempio n. 5
0
/* @funcstatic sigscanlig_score_ligands_site *********************************
**
** Writes score data for ligands (Lighit objects) from individual hits to 
** signatures (Hit objects).  Site score method is used. List of hit objects
** must be sorted by ligand type and site number.
** 
** @param [r] hits    [const AjPList] List of hit objects.
** @return [AjBool] True on success
** @@
******************************************************************************/
AjPList sigscanlig_score_ligands_site(AjPList hits)

{ 
    AjPList ret         = NULL;
    AjIList iter        = NULL;   /* Iterator. */
    EmbPHit  hit         = NULL;   
    AjPStr  prev_ligand = NULL;
    SigPLighit lighit    = NULL;


    ajint  prevsn      = -1;        /* Previous site number */


    float  score        = 0.0;    /* Score for current ligand */
    ajint  nhits        = 0;      /* No. of hits (patches) for current
				     ligand. */
    ajint  nsites       = 0;      /* No. of sites for current ligand */

    float  score_site   = 0.0;    /* Score for this site. */
    ajint  patch_cnt    = 0;      /* No. of patches in current site. */
    
    

    ret = ajListNew();
    prev_ligand = ajStrNew();
  
    iter = ajListIterNew(hits);


    /* Hits are already sorted by ligid & site number */
    while((hit = (EmbPHit) ajListIterGet(iter)))
    {
	/* ajFmtPrint("Current hit: %S (ligid: %S, sn: %d) score: %f\n", 
		   hit->Acc, hit->Sig->Ligid, hit->Sig->sn, hit->Score); */
	
	/* New site */
	if(hit->Sig->sn != prevsn)
	{
	    if(patch_cnt)
		score_site /= patch_cnt;
	    score += score_site;

	    patch_cnt  = 0;
	    score_site = 0;
	}

	/* New ligand */
	if((!ajStrMatchS(hit->Sig->Ligid, prev_ligand)))
	{
	    if(nsites)
		score /= nsites;
	    
	    if(lighit)
	    {
		/* lighit->ns = snarr_siz; */
		lighit->ns = nsites;
		lighit->np = nhits; 
		lighit->score =  score;

		ajStrAssignS(&lighit->ligid, prev_ligand);
		ajListPushAppend(ret, lighit);

	    }
	    
	    lighit = sigscanlig_LighitNew();

	    nsites = 0;
	    nhits=0;
	    prevsn = -1;
	    score = 0;
	}

	
	/* Increment count of sites and hits/patches (for current
           ligand) and patches (for current site) */
	if(hit->Sig->sn != prevsn)
	    nsites++;
	score_site += hit->Score;

	nhits++;
	patch_cnt++;
	
	ajStrAssignS(&prev_ligand, hit->Sig->Ligid);
	prevsn = hit->Sig->sn;
    }
    
    /* Process last hit */
    if(patch_cnt)
	score_site /= patch_cnt;
    score += score_site;
    
    if(nsites)
	score /= nsites;
	    
    if(lighit)
    {
	/* lighit->ns = snarr_siz; */
	lighit->ns = nsites;
	lighit->np = nhits; 
	lighit->score = score;
	ajStrAssignS(&lighit->ligid, prev_ligand);
	ajListPushAppend(ret, lighit);
    }
	

    ajListSort(ret, sigscanlig_MatchinvScore);
    

    ajListIterDel(&iter);
    ajStrDel(&prev_ligand);

    return ret;
}
Esempio n. 6
0
AjPList sigscanlig_score_ligands_patch(AjPList hits)
{ 
    AjPList ret         = NULL;
    AjIList iter        = NULL;   /* Iterator. */
    EmbPHit  hit         = NULL;   
    AjPStr  prev_ligand = NULL;
    SigPLighit lighit    = NULL;
    float  score        = 0.0;
    ajint  nhits      = 0;      /* No. of hits (patches) for current ligand. */
    ajint  nsites     = 0;      /* No. of sites for current ligand. */


    ajint  prevsn      = -1;        /* Previous site number */    

    

    ret = ajListNew();
    prev_ligand = ajStrNew();

    iter = ajListIterNew(hits);

    while((hit = (EmbPHit) ajListIterGet(iter)))
    {
	/* New ligand */
	if((!ajStrMatchS(hit->Sig->Ligid, prev_ligand)))
	{
	    if(nhits)
		score /= nhits;
	    
	    if(lighit)
	    {
		lighit->ns = nsites; 
		lighit->np = nhits; 
		lighit->score =  score;
		ajStrAssignS(&lighit->ligid, prev_ligand);
		ajListPushAppend(ret, lighit);

	    }
	    
	    
	    lighit = sigscanlig_LighitNew();

	    nsites = 0;
	    nhits=0;
	    prevsn = -1;
	    score = 0;
	}
	
	
	/* Increment count of sites and hits/patches (for current ligand)
	   and patches (for current site) */
	if(hit->Sig->sn != prevsn)
	    nsites++;
	score+= hit->Score;
	
	nhits++;

	ajStrAssignS(&prev_ligand, hit->Sig->Ligid);
	prevsn = hit->Sig->sn;
    }
    
    /* Process last hit */
    if(nhits)
	score /= nhits;
	    
    if(lighit)
    {
	lighit->ns = nsites;
	lighit->np = nhits; 
	lighit->score = score;
	ajStrAssignS(&lighit->ligid, prev_ligand);
	ajListPushAppend(ret, lighit);
    }
	
    ajListSort(ret, sigscanlig_MatchinvScore);
    

    ajListIterDel(&iter);
    ajStrDel(&prev_ligand);

    return ret;
}
Esempio n. 7
0
int main(int argc, char **argv)
{
    AjPList      sigin   = NULL;   /* Signature input file names.            */
    AjPStr       signame = NULL;   /* Name of signature file.                */
    AjPFile      sigf    = NULL;   /* Signature input file.                  */
    EmbPSignature sig    = NULL;   /* Signature.                             */
    AjPList      siglist = NULL;   /* List of signatures.                    */
    AjIList      sigiter = NULL;   /* Iterator for siglist.                  */
    AjBool       sigok  = ajFalse; /* True if signature processed ok.        */
    
    EmbPHit      hit = NULL;      /* Hit to store signature-sequence match.  */
    AjPList      hits = NULL;     /* List of hits */


    AjPList      ligands = NULL;     /* List of top-scoring ligands. */

    AjPSeqall    database=NULL;   /* Protein sequences to match signature 
				     against.                                */
    AjPSeq       seq = NULL;      /* Current sequence.                       */
    AjPMatrixf   sub  =NULL;      /* Residue substitution matrix.            */
    float        gapo =0.0;       /* Gap insertion penalty.                  */
    float        gape =0.0;       /* Gap extension penalty.                  */

    AjPStr        nterm=NULL;     /* Holds N-terminal matching options from 
				     acd.                                    */
    ajint         ntermi=0;        /* N-terminal option as int. */

    AjPFile      hitsf =NULL;     /* Hits output file.                       
				     sequence matches.                       */
    AjPDirout    hitsdir=NULL;    /* Directory of hits files (output).       */

    AjPFile      alignf =NULL;    /* Alignment output file.                  */
    AjPDirout    aligndir=NULL;   /* Directory of alignment files (output).  */

    
    AjPFile    resultsf =NULL;    /* Results file (output).  */
    AjPDirout  resultsdir=NULL;   /* Directory of results files (output).  */

    AjPStr  mode         = NULL;  /* Mode, 1: Patch score mode, 2:
				     Site score mode.  */
    ajint   modei        = 0;     /* Selected mode as integer.  */

    SigPLighit lighit   = NULL;

    embInitPV("sigscanlig", argc, argv, "SIGNATURE",VERSION);
    

    /* GET VALUES FROM ACD */
    sigin      = ajAcdGetDirlist("siginfilesdir");
    database   = ajAcdGetSeqall("dbseqall");
    sub        = ajAcdGetMatrixf("sub");
    gapo       = ajAcdGetFloat("gapo");
    gape       = ajAcdGetFloat("gape");
    nterm      = ajAcdGetListSingle("nterm");
    hitsdir    = ajAcdGetOutdir("hitsoutdir");
    aligndir   = ajAcdGetOutdir("alignoutdir"); 
    resultsdir = ajAcdGetOutdir("resultsoutdir"); 
    mode        = ajAcdGetListSingle("mode");



    /*Assign N-terminal matching option etc. */
    ajFmtScanS(nterm, "%d", &ntermi);
    modei       = (ajint) ajStrGetCharFirst(mode)-48;



    /* READ & PROCESS SIGNATURES */
    siglist = ajListNew();
    while(ajListPop(sigin, (void **) &signame))
    {
	/* Read signature files, compile signatures and populate list. */
	sigok = ajFalse;
	if((sigf = ajFileNewInNameS(signame)))
	    if((sig = embSignatureReadNew(sigf)))
		if(embSignatureCompile(&sig, gapo, gape, sub))
		{
		    sigok=ajTrue;
		    ajListPushAppend(siglist, sig);
		    /*
		    ajFmtPrint("Id: %S\nDomid: %S\nLigid: %S\nns: %d\n"
                               "sn: %d\nnp: %d\npn: %d\nminpatch: %d\n"
                               "maxgap: %d\n", 
			       sig->Id, sig->Domid, sig->Ligid, sig->ns,
                               sig->sn, sig->np, sig->pn, sig->minpatch,
                               sig->maxgap); */
		    

		}
	if(!sigok)
	{
	    ajWarn("Could not process %S", signame);
	    embSignatureDel(&sig);
	    ajFileClose(&sigf);
	    ajStrDel(&signame);
	    continue;
	}

	ajFileClose(&sigf);
	ajStrDel(&signame);
    }
    ajListFree(&sigin);

    
    
    /* ALIGN EACH QUERY SEQUENCE TO LIST OF SIGNATURE */
    while(ajSeqallNext(database, &seq))
    {
	/* Do sequence-signature alignment and save results */
	hits = ajListNew();
	sigiter = ajListIterNew(siglist);
	
	while((sig = (EmbPSignature) ajListIterGet(sigiter)))
	{
	    if(embSignatureAlignSeq(sig, seq, &hit, ntermi))
	    {
		hit->Sig = sig;
		
		ajListPushAppend(hits, hit);
		hit=NULL; /* To force reallocation by embSignatureAlignSeq */
	    }
	    /* There has to be a hit for each signature for correct
	       generation of the LHF by sigscanlig_WriteFasta. So push
	       an empty hit if necessary.  'hit'=NULL forces
	       reallocation by embSignatureAlignSeq. */
	    /*
	       else
	       {
		hit = embHitNew();
		ajListPushAppend(hits, hit);
		hit=NULL; 
		}
		*/
	}
	
	ajListIterDel(&sigiter);
	

	/* Rank-order the list of hits by score */
	ajListSort(hits, embMatchinvScore);

	
	/* Write ligand hits & alignment files (output)  */	
	hitsf    = ajFileNewOutNameDirS(ajSeqGetNameS(seq), hitsdir);
	alignf   = ajFileNewOutNameDirS(ajSeqGetNameS(seq), aligndir);
	resultsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), resultsdir);
	

	
	/* if((!sigscanlig_WriteFasta(hitsf, siglist, hits)))
	    ajFatal("Bad args to sigscanlig_WriteFasta"); */

	if((!sigscanlig_WriteFasta(hitsf, hits)))
	    ajFatal("Bad args to sigscanlig_WriteFasta");


    	if((!sigscanlig_SignatureAlignWriteBlock(alignf, hits)))
	    ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock");

    	/* if((!sigscanlig_SignatureAlignWriteBlock(alignf, siglist, hits)))
	    ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); */


	/* Sort list of hits by ligand type and site number.
	   Process list of ligands and print out. */
	ajListSortTwo(hits, embMatchLigid, embMatchSN);


	if(modei==1)
	    ligands = sigscanlig_score_ligands_patch(hits);
	else if(modei==2)
	    ligands = sigscanlig_score_ligands_site(hits);
	else 
	    ajFatal("Unrecognised mode");
	

	sigscanlig_WriteResults(ligands, resultsf);	
	

    	ajFileClose(&hitsf);
	ajFileClose(&alignf);
	ajFileClose(&resultsf);


	/* Memory management */
	while(ajListPop(hits, (void **) &hit))
	    embHitDel(&hit);
	ajListFree(&hits);

        while(ajListPop(ligands, (void **) &lighit))
            sigscanlig_LigHitDel(&lighit);
        ajListFree(&ligands);
    }	
    

    /* MEMORY MANAGEMENT */
    while(ajListPop(siglist, (void **) &sig))
	embSignatureDel(&sig);
    ajListFree(&siglist);

    ajSeqallDel(&database);
    ajMatrixfDel(&sub);
	
    ajStrDel(&nterm);    
    ajDiroutDel(&hitsdir);
    ajDiroutDel(&aligndir);
    ajDiroutDel(&resultsdir);
    ajStrDel(&mode);


    embExit();

    return 0;    
}
Esempio n. 8
0
int main(int argc, char **argv)
{

    AjPList idlist;
    AjPList* fieldList = NULL;

    AjBool systemsort;
    AjBool cleanup;

    ajint blastv = 0;
    char dbtype  = '\0';

    ajuint maxindex;
    ajuint maxidlen = 0;
    ajuint maxlen;

    AjPStr version = NULL;
    AjPStr seqtype = NULL;

    AjPFile elistfile  = NULL;
    AjPFile* alistfile = NULL;

    AjPStr dbname   = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjPStr sortopt  = NULL;
    void **entryIds = NULL;

    AjBool usesrc = AJTRUE;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr curfilename = NULL;

    AjPStr idformat = NULL;

    EmbPEntry entry;

    PBlastDb db = NULL;

    ajuint idCount = 0;
    ajuint idDone;
    AjPList listTestFiles = NULL;
    void ** testFiles = NULL;
    ajuint nfiles;
    ajuint ifile;
    ajuint jfile;

    ajuint filesize;
    short recsize;
    ajuint maxfilelen = 20;
    char date[4] =
    {
	0,0,0,0
    };

    AjPStr tmpfname = NULL;
    AjPStr* fields  = NULL;

    AjPFile entFile = NULL;

    AjPStr* divfiles   = NULL;
    ajint* maxFieldLen = NULL;

    ajuint ifield  = 0;
    ajuint nfields = 0;

    AjPFile logfile = NULL;
    ajuint* countField = NULL;
    ajuint* fieldTot = NULL;
    ajuint idCountFile = 0;
    ajuint i = 0;

    embInit("dbiblast", argc, argv);

    idformat = ajStrNewC("NCBI");

    fields     = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    systemsort = ajAcdGetBoolean("systemsort");
    cleanup    = ajAcdGetBoolean("cleanup");
    sortopt    = ajAcdGetString("sortoptions");
    maxindex   = ajAcdGetInt("maxindex");
    version    = ajAcdGetListSingle("blastversion");
    seqtype    = ajAcdGetListSingle("seqtype");
    usesrc     = ajAcdGetBoolean("sourcefile");
    logfile    = ajAcdGetOutfile("outfile");

    while(fields[nfields])		/* array ends with a NULL */
	nfields++;

    if(nfields)
    {
	AJCNEW(maxFieldLen, nfields);
	AJCNEW0(countField, nfields);
	AJCNEW0(fieldTot, nfields);
	for(ifield=0; ifield < nfields; ifield++)
	    maxFieldLen[ifield] = (ajint) maxindex * -1;

	if(systemsort)
	    AJCNEW(alistfile, nfields);
	else
	{
	    AJCNEW(fieldList, nfields);
	    for(ifield=0; ifield < nfields; ifield++)
		fieldList[ifield] = ajListNew();
	}
    }
    
    if(ajStrMatchC(datestr, "00/00/00"))
	ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex"));

    ajStrRemoveWhite(&dbname);		/* used for temp filenames */
    embDbiDateSet(datestr, date);
    idlist = ajListNew();
    
    if(ajUtilGetBigendian())
	readReverse = ajFalse;
    else
	readReverse = ajTrue;
    
    ajStrToInt(version, &blastv);
    dbtype = ajStrGetCharFirst(seqtype);
    
    ajDebug("reading '%S/%S'\n", directory, filename);
    ajDebug("writing '%S/'\n", indexdir);
    
    listTestFiles = embDbiFileListExc(directory, filename, exclude);
    ajListSort(listTestFiles, ajStrVcmp);
    nfiles = ajListToarray(listTestFiles, &testFiles);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);
    
    embDbiLogHeader(logfile, dbname, release, datestr,
		     indexdir, maxindex);

    embDbiLogFields(logfile, fields, nfields);
    embDbiLogSource(logfile, directory, filename, exclude,
		    (AjPStr*) testFiles, nfiles);
    embDbiLogCmdline(logfile);

    AJCNEW0(divfiles, nfiles);
    
    /*
    ** process each input file, one at a time
    */
    
    jfile = 0;
    for(ifile=0; ifile < nfiles; ifile++)
    {
	curfilename = (AjPStr) testFiles[ifile];
	if(!dbiblast_blastopenlib(curfilename,
				  usesrc, blastv, dbtype, &db))
	    continue;	 /* could be the wrong file type with "*.*" */

	ajDebug("processing filename '%S' ...\n", curfilename);
	ajDebug("processing file '%S' ...\n", db->TFile->Name);


	ajStrAssignS(&divfiles[jfile], db->TFile->Name);
	ajFilenameTrimPath(&divfiles[jfile]);
	if(ajStrGetLen(divfiles[jfile]) >= maxfilelen)
	    maxfilelen = ajStrGetLen(divfiles[jfile]) + 1;

	if(systemsort)	 /* elistfile for entries, alist for fields */
	    elistfile = embDbiSortOpen(alistfile, jfile,
				       dbname, fields, nfields);

	idCountFile = 0;
	for(i=0;i<nfields;i++)
	    countField[i] = 0;
	while((entry=dbiblast_nextblastentry(db, jfile,
					     idformat, systemsort,
					     fields,
					     maxFieldLen,
					     &maxidlen, countField,
					     elistfile, alistfile)))
	{
	    idCountFile++;
	    if(!systemsort)	    /* save the entry data in lists */
	    {
		embDbiMemEntry(idlist, fieldList, nfields, entry, jfile);
	    }
	}
	idCount += idCountFile;
	if(systemsort)
	{
	    embDbiSortClose(&elistfile, alistfile, nfields);
	    /* lost the entry, so can't free it :-) */
	}

	embDbiLogFile(logfile, curfilename, idCountFile, fields,
		      countField, nfields);
	dbiblast_dbfree(&db);
	jfile++;
    }
    nfiles = jfile;
    
    /*
    ** write the division.lkp file
    */
    
    embDbiWriteDivision(indexdir, dbname, release, date,
			maxfilelen, nfiles, divfiles, NULL);
    
    /*
    ** Write the entryname.idx index
    */
    
    ajStrAssignC(&tmpfname, "entrynam.idx");
    entFile = ajFileNewOutNamePathS(tmpfname, indexdir);
    
    recsize = maxidlen+10;
    filesize = 300 + (idCount*(ajint)recsize);
    embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date);
    
    if(systemsort)
        idDone = embDbiSortWriteEntry(entFile, maxidlen,
				      dbname, nfiles, cleanup, sortopt);
    else			  /* save entries in entryIds array */
    {
        idDone = embDbiMemWriteEntry(entFile, maxidlen,
				     idlist, &entryIds);
	if(idDone != idCount)
	    ajFatal("Duplicates not allowed for in-memory processing");
    }
    
    embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone);
    ajFileClose(&entFile);
    
    /*
    ** Write the fields index files
    */
    
    for(ifield=0; ifield < nfields; ifield++)
    {

        if(maxindex)
	    maxlen = maxindex;
	else
	{
	    if(maxFieldLen[ifield] >= 0)
		maxlen = maxFieldLen[ifield];
	    else
		maxlen = - maxFieldLen[ifield];
	}

        if(systemsort)
	    fieldTot[ifield] = embDbiSortWriteFields(dbname, release,
						     date, indexdir,
						     fields[ifield], maxlen,
						     nfiles, idCount,
						     cleanup, sortopt);
	else
	    fieldTot[ifield] = embDbiMemWriteFields(dbname, release,
						    date, indexdir,
						    fields[ifield], maxlen,
						    fieldList[ifield],
						    entryIds);
    }
    
    embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot,
		   nfields, nfiles, idDone, idCount);

    if(systemsort)
	embDbiRmEntryFile(dbname, cleanup);
    
    ajListMap(idlist, embDbiEntryDelMap, NULL);
    ajListFree(&idlist);
    AJFREE(entryIds);

    ajStrDelarray(&fields);

    for(i=0;i<nfields;i++)
    {
	if(systemsort)
	{
	    ajFileClose(&alistfile[i]);
	}
	else
	{
	    ajListMap(fieldList[i], embDbiFieldDelMap, NULL);
	    ajListFree(&fieldList[i]);
	}
    }
    AJFREE(alistfile);
    AJFREE(fieldList);
    ajStrDel(&version);
    ajStrDel(&seqtype);
    ajFileClose(&elistfile);
    for(i=0;i<nfiles;i++)
    {
	ajStrDel(&divfiles[i]);
    }
    AJFREE(countField);
    AJFREE(fieldTot);

    ajStrDel(&dbname);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&sortopt);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&idformat);
    ajStrDel(&tmpfname);

    AJFREE(maxFieldLen);

    ajFileClose(&logfile);

    ajListstrFreeData(&listTestFiles);

    ajStrDel(&t);
    ajStrDel(&id);
    ajStrDel(&acc);
    ajStrDel(&hline);
    ajStrDel(&tmpdes);
    ajStrDel(&tmpfd);
    ajStrDel(&tmpgi);
    ajStrDel(&tmpdb);
    ajStrDel(&tmpac);
    ajStrDel(&tmpsv);
    ajRegFree(&wrdexp);

    embDbiEntryDel(&dbiblastEntry);

    if(fdl)
    {
        for(i=0; i < nfields; i++)
            ajListFree(&fdl[i]);
        AJFREE(fdl);
    }

    for(i=0;i<nfiles;i++)
    {
        ajStrDel(&divfiles[i]);
    }
    AJFREE(divfiles);
    AJFREE(testFiles);

    embExit();

    return 0;
}
Esempio n. 9
0
int main(int argc, char **argv)
{
    AjPList idlist;
    AjPList* fieldList = NULL;

    AjBool systemsort;
    AjBool cleanup;

    ajuint maxindex;
    ajuint maxidlen = 0;
    ajuint maxlen;

    AjPFile elistfile  = NULL;
    AjPFile* alistfile = NULL;

    AjPStr dbname   = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjPStr sortopt  = NULL;
    void **entryIds = NULL;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr curfilename = NULL;

    AjPFile libr=NULL;
    AjPStr idformat = NULL;

    EmbPEntry entry;

    ajuint idtype  = 0;

    ajuint idCount = 0;
    ajuint idDone;
    AjPList listInputFiles = NULL;
    void ** inputFiles = NULL;
    ajuint nfiles;
    ajuint ifile;

    ajuint filesize;
    short recsize;
    ajuint maxfilelen = 20;
    char date[4] =
    {
	0,0,0,0
    };

    AjPStr tmpfname = NULL;
    AjPStr* fields  = NULL;

    AjPFile entFile  = NULL;

    AjPStr* divfiles   = NULL;
    AjPRegexp regIdExp      = NULL;
    ajint* maxFieldLen = NULL;

    ajuint ifield  = 0;
    ajuint nfields = 0;

    AjPFile logfile = NULL;
    ajuint* countField = NULL;
    ajuint* fieldTot = NULL;
    ajuint idCountFile = 0;
    ajuint i;

    embInit("dbifasta", argc, argv);

    idformat   = ajAcdGetListSingle("idformat");
    fields     = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    systemsort = ajAcdGetBoolean("systemsort");
    cleanup    = ajAcdGetBoolean("cleanup");
    sortopt    = ajAcdGetString("sortoptions");
    maxindex   = ajAcdGetInt("maxindex");
    logfile    = ajAcdGetOutfile("outfile");

    while(fields[nfields])		/* array ends with a NULL */
	nfields++;

    if(nfields)
    {
	AJCNEW(maxFieldLen, nfields);
	AJCNEW0(countField, nfields);
	AJCNEW0(fieldTot, nfields);
	for(ifield=0; ifield < nfields; ifield++)
	    maxFieldLen[ifield] = (ajint)maxindex * -1;

	if(systemsort)
	    AJCNEW(alistfile, nfields);
	else
	{
	    AJCNEW(fieldList, nfields);
	    for(ifield=0; ifield < nfields; ifield++)
		fieldList[ifield] = ajListNew();
	}
    }

    if(ajStrMatchC(datestr, "00/00/00"))
	ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex"));

    ajStrRemoveWhite(&dbname);		/* used for temp filenames */
    embDbiDateSet(datestr, date);
    idlist = ajListNew();

    regIdExp = dbifasta_getExpr(idformat, &idtype);

    ajDebug("reading '%S/%S'\n", directory, filename);
    ajDebug("writing '%S/'\n", indexdir);

    listInputFiles = embDbiFileListExc(directory, filename, exclude);
    ajListSort(listInputFiles, &ajStrVcmp);
    nfiles = (ajuint) ajListToarray(listInputFiles, &inputFiles);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);

    embDbiLogHeader(logfile, dbname, release, datestr,
		     indexdir, maxindex);

    embDbiLogFields(logfile, fields, nfields);
    embDbiLogSource(logfile, directory, filename, exclude,
		    (AjPStr*) inputFiles, nfiles);
    embDbiLogCmdline(logfile);

    AJCNEW0(divfiles, nfiles);

    /*
    ** process each input file, one at a time
    */

    for(ifile=0; ifile < nfiles; ifile++)
    {
	ajStrAssignS(&curfilename,(AjPStr) inputFiles[ifile]);
	embDbiFlatOpenlib(curfilename, &libr);
	ajFilenameTrimPath(&curfilename);
	if(ajStrGetLen(curfilename) >= maxfilelen)
	    maxfilelen = ajStrGetLen(curfilename) + 1;

	ajDebug("processing filename '%S' ...\n", curfilename);
	ajDebug("processing file '%F' ...\n", libr);
	ajStrAssignS(&divfiles[ifile], curfilename);

	if(systemsort)	 /* elistfile for entries, alist for fields */
	    elistfile = embDbiSortOpen(alistfile, ifile,
				       dbname, fields, nfields);

	idCountFile = 0;
	for(i=0;i<nfields;i++)
	    countField[i] = 0;
	while((entry=dbifasta_NextFlatEntry(libr, ifile,
					    regIdExp, idtype,
					    systemsort, fields, 
					    maxFieldLen, &maxidlen,
					    countField, elistfile,
					    alistfile)))
	{
	    idCountFile++;
	    if(!systemsort)	    /* save the entry data in lists */
		embDbiMemEntry(idlist, fieldList, nfields,
			       entry, ifile);
		entry = NULL;
	}
	idCount += idCountFile;
	if(systemsort)
	{
	    embDbiSortClose(&elistfile, alistfile, nfields);
	    AJFREE(entry);
	}
	else
	{
	    embDbiEntryDel(&dbifastaGEntry);
	}
	embDbiLogFile(logfile, curfilename, idCountFile, fields,
		      countField, nfields);
    }

    /*  write the division.lkp file */
    embDbiWriteDivision(indexdir, dbname, release, date,
			maxfilelen, nfiles, divfiles, NULL);

    /* Write the entryname.idx index */
    ajStrAssignC(&tmpfname, "entrynam.idx");
    entFile = ajFileNewOutNamePathS(tmpfname, indexdir);

    recsize = maxidlen+10;
    filesize = 300 + (idCount*(ajint)recsize);
    embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date);

    if(systemsort)
        idDone = embDbiSortWriteEntry(entFile, maxidlen,
				      dbname, nfiles, cleanup, sortopt);
    else			  /* save entries in entryIds array */
    {
        idDone = embDbiMemWriteEntry(entFile, maxidlen,
				     idlist, &entryIds);
	if(idDone != idCount)
	    ajFatal("Duplicates not allowed for in-memory processing");
    }

    embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone);
    ajFileClose(&entFile);

    /* Write the fields index files */
    for(ifield=0; ifield < nfields; ifield++)
    {
        if(maxindex)
	    maxlen = maxindex;
	else
	{
	    if(maxFieldLen[ifield] >= 0)
		maxlen = maxFieldLen[ifield];
	    else
		maxlen = - maxFieldLen[ifield];
	}

        if(systemsort)
	    fieldTot[ifield] = embDbiSortWriteFields(dbname, release,
						     date, indexdir,
						     fields[ifield], maxlen,
						     nfiles, idCount,
						     cleanup, sortopt);
	else
	    fieldTot[ifield] = embDbiMemWriteFields(dbname, release,
						    date, indexdir,
						    fields[ifield], maxlen,
						    fieldList[ifield],
						    entryIds);
    }

    embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot,
		   nfields, nfiles, idDone, idCount);

    if(systemsort)
	embDbiRmEntryFile(dbname, cleanup);

    ajStrDel(&idformat);
    ajStrDelarray(&fields);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&dbname);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&sortopt);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&tmpfname);
    ajFileClose(&libr);
    ajFileClose(&logfile);

    for(i=0;i<nfields;i++)
    {
	if(systemsort)
	{
	    ajFileClose(&alistfile[i]);
	}
	else
	{
	    ajListMap(fieldList[i], &embDbiFieldDelMap, NULL);
	    ajListFree(&fieldList[i]);
	}
    }

    AJFREE(alistfile);
    AJFREE(fieldList);
    AJFREE(maxFieldLen);
    AJFREE(countField);
    AJFREE(fieldTot);

    for(i=0;i<nfiles;i++)
    {
	ajStrDel(&divfiles[i]);
    }

    AJFREE(divfiles);
    AJFREE(inputFiles);

    embDbiEntryDel(&dbifastaGEntry);

    ajStrDel(&dbifastaGRline);
    ajStrDel(&dbifastaGTmpId);

    if(dbifastaGFdl)
    {
	for(i=0; i < nfields; i++)
	    ajListFree(&dbifastaGFdl[i]);
	AJFREE(dbifastaGFdl);
    }

    ajListMap(idlist, &embDbiEntryDelMap, NULL);
    ajListFree(&idlist);
    ajListstrFreeData(&listInputFiles);
    AJFREE(entryIds);
    ajRegFree(&dbifastaGIdexp);
    ajRegFree(&dbifastaGWrdexp);
    ajRegFree(&regIdExp);

    ajStrDel(&dbifastaGTmpAc);
    ajStrDel(&dbifastaGTmpSv);
    ajStrDel(&dbifastaGTmpGi);
    ajStrDel(&dbifastaGTmpDb);
    ajStrDel(&dbifastaGTmpDes);
    ajStrDel(&dbifastaGTmpFd);
    ajStrDel(&curfilename);

    embExit();

    return 0;
}
Esempio n. 10
0
/* @prog ssematch ***********************************************************
**
** Searches a DCF file (domain classification file) for secondary structure 
** matches.
**
****************************************************************************/
int main(int argc, char **argv)
{
    /* Variables declarations */
    AjPFile dcfin        = NULL; /* Domain classification file */
    AjPFile ssin         = NULL; /* Secondary structure input file*/
    AjPMatrixf matrix    = NULL; /* Substitution matrix */
    AjPFile out_ss       = NULL; /* For ss top matches*/
    AjPFile out_se       = NULL; /* For se top matches*/
    AjPFile outfile      = NULL; /* Output file*/
    AjPFile logf         = NULL; /* Log file */
    float gapopen_sss    = 0.0;  /* Gap insertion penalty */
    float gapopen_sse    = 0.0;
    float gapopen        = 0.0;
    float gapextend_sss  = 0.0;  /* Gap extension penalty */
    float gapextend_sse  = 0.0;
    float gapextend      = 0.0;
    ajint max_hits       = 0;    /* number of top alignments to display*/
    ajint mode           = 0;
    ajint x              = 0;

    AjPScop temp_scop    = NULL; /* scop object pointer*/

    AjPList  scop_list   = NULL; /* list of scop objects for entire domain
				    classification file */
     
    AjIList       iter   = NULL;
    AjPStr        msg    = NULL; /* Pointer to String used for messages */
    AjPStr        line   = NULL;
   
    AjPStr    qse        = NULL; /* query secondary structure elements*/
    AjPStr    qss        = NULL; /* query secondary structure (by residue)*/
    AjPSeq    q3se       = NULL; /* query secondary structure elements, 3-letter 
				    code*/
    AjPSeq    q3ss       = NULL; /* query secondary structure 
				    (by residue), 3-letter code*/
    AjPSeq    query      = NULL;
    




    /* Read data from acd */
    embInitPV("ssematch",argc,argv,"DOMAINATRIX",VERSION);
    dcfin       = ajAcdGetInfile("dcfinfile");
    ssin       = ajAcdGetInfile("ssinfile");
    max_hits      = ajAcdGetInt("maxhits");
    matrix        = ajAcdGetMatrixf("datafile");
    gapopen_sss   = ajAcdGetFloat("rgapopen");
    gapextend_sss = ajAcdGetFloat("rgapextend");
    gapopen_sse   = ajAcdGetFloat("egapopen");
    gapextend_sse = ajAcdGetFloat("egapextend");
    out_ss        = ajAcdGetOutfile("outssfile");
    out_se        = ajAcdGetOutfile("outsefile");
    logf       = ajAcdGetOutfile("logfile");






    /* Create list of scop objects for entire input domain classification file. */
    scop_list  = ajListNew();
    while((temp_scop = (ajScopReadCNew(dcfin, "*"))))
        ajListPushAppend(scop_list,temp_scop);


    /* Error handing if domain classification file was empty. */
    if(!(ajListGetLength(scop_list)))
    {
      
        ajWarn("Empty list from scop input file\n");
	ajFileClose(&dcfin);
	ajFileClose(&ssin);
	ajMatrixfDel(&matrix);
       	ajFileClose(&out_ss);
	ajFileClose(&out_se);
       	ajFileClose(&logf);
	while(ajListPop(scop_list, (void *) &temp_scop))
	    ajScopDel(&temp_scop);
	ajListFree(&scop_list);
        ajListIterDel(&iter);    
	
	ajExit();
	return 1;
    }

    /* Error handling in case of empty query file. */
    if(ssin == NULL)   
    {
        ajWarn("Empty secondary structure query file\n");
	ajFileClose(&dcfin);
	ajFileClose(&ssin);
	ajMatrixfDel(&matrix);
       	ajFileClose(&out_ss);
	ajFileClose(&out_se);
       	ajFileClose(&logf);
	while(ajListPop(scop_list, (void *) &temp_scop))
	    ajScopDel(&temp_scop);
	ajListFree(&scop_list);
        ajListIterDel(&iter);    
	
	ajExit();
	return 1; 
    }      
    
    /* Assign sequences in query file to sequence objects. */
    qse = ajStrNew();
    qss = ajStrNew();
    
    while(ajReadlineTrim(ssin,&line))
    {
        /* SE string */
        if(ajStrPrefixC(line,"SE"))
	{
	    ajFmtScanS(line, "%*s %S", &qse);
            /* Convert this string to 3-letter code & then convert to AjPSeq 
	       object. */
            q3se = ssematch_convertbases(qse);
        }
        /* SS string */
	else if(ajStrPrefixC(line,"SS"))
	{
            while((ajReadlineTrim(ssin,&line)) && !ajStrPrefixC(line,"XX"))
                ajStrAppendS(&qss,line);
            ajStrRemoveWhite(&qss);

            /* Convert this string to 3-letter code & then to AjPSeq object. */
            q3ss = ssematch_convertbases(qss);
      	}
    }



    /* For se & then for ss, modes 0 & 1. */
    for(mode = 0; mode <= 1; mode++)
    {
        /* Assign arguments for alignment function. */
        if (mode == 0) 
        {
            query = q3se;
            gapopen = gapopen_sse; 
            gapextend = gapextend_sse;
            outfile =  out_se;
        } 
        else if(mode == 1)
        {
            query = q3ss;  
            gapopen = gapopen_sss; 
            gapextend = gapextend_sss;
            outfile =  out_ss;   
        }


        /* Iterate through list of scop objects & calculate alignment scores. */
        iter=ajListIterNew(scop_list);
        while((temp_scop=(AjPScop)ajListIterGet(iter)))
        {
            /* The function extracts the se (mode 0) or ss (mode 1) subject 
	       sequences from the scop object, performs a Needleman-Wunsch 
	       global alignment with the query sequence & allocates the score 
	       to the Score element of the scop object*/ 

	    if(!(ssematch_NWScore(temp_scop , query, mode, matrix, gapopen, gapextend)))

	    {
                ajFmtPrintF(logf, "%-15s\n", "ALIGNMENT");
                ajFmtPrintF(logf, "Could not align sequence in scop domain %S\n ", 
			    temp_scop->Entry); 
	        ajFmtPrintS(&msg, "Could not align sequence in scop domain %S\n ", 
			    temp_scop->Entry);
	        ajWarn(ajStrGetPtr(msg));
                continue;
	    }
	}


        ajListIterDel(&iter);
      	temp_scop    = NULL;


        /* Sort list of Scop objects by Score */
        ajListSort(scop_list, ssematch_CompScoreInv);
	
	
        iter=ajListIterNew(scop_list);
        /* Write top-scoring hits to outfile. */
        for(x=0; x < max_hits; x++ )
	{
            temp_scop=(AjPScop)ajListIterGet(iter);
 
	    /* Print score to output file. */
	    ajFmtPrintF(outfile, "XX   ALIGNMENT SCORE %.3f\nXX\n", temp_scop->Score);
	    
	    /* Could also write alignment - later modification. */
	    if(!ajScopWrite(outfile, temp_scop))
		ajFatal("Could not write output file %S\n", outfile);

        
        }

	ajListIterDel(&iter); 
     	temp_scop    = NULL;
    }


      
    /* Memoryt management. */
    ajFileClose(&dcfin);
    ajFileClose(&ssin);
    ajMatrixfDel(&matrix);
    ajFileClose(&out_ss);
    ajFileClose(&out_se);
    ajFileClose(&logf);
    while(ajListPop(scop_list, (void *) &temp_scop))
	ajScopDel(&temp_scop);
    ajListFree(&scop_list);
    ajStrDel(&msg);
    ajStrDel(&line);
    ajStrDel(&qse); 
    ajStrDel(&qss);
    ajSeqDel(&q3se);
    ajSeqDel(&q3ss);
    

    ajExit();
    return 0;
}
Esempio n. 11
0
int main(int argc, char **argv)
{
    AjPFile outf = NULL;

    AjPSeq sequence = NULL;
    AjPStr substr   = NULL;
    AjPStr seqstr = NULL;
    AjPStr revstr = NULL;

    AjPStr p1;
    AjPStr p2;

    PPrimer eric = NULL;
    PPrimer fred = NULL;

    PPrimer f;
    PPrimer r;

    PPair pair;

    AjPList forlist  = NULL;
    AjPList revlist  = NULL;
    AjPList pairlist = NULL;

    AjBool targetrange;
    AjBool isDNA  = ajTrue;
    AjBool dolist = ajFalse;

    ajint primerlen    = 0;
    ajint minprimerlen = 0;
    ajint maxprimerlen = 0;
    ajint minprodlen   = 0;
    ajint maxprodlen   = 0;
    ajint prodlen      = 0;

    ajint seqlen = 0;
    ajint stepping_value = 1;

    ajint targetstart = 0;
    ajint targetend   = 0;

    ajint limit    = 0;
    ajint limit2   = 0;
    ajint lastpos  = 0;
    ajint startpos = 0;
    ajint endpos   = 0;

    ajint begin;
    ajint end;
    ajint v1;
    ajint v2;

    ajint overlap;

    float minpmGCcont   = 0.;
    float maxpmGCcont   = 0.;
    float minprodGCcont = 0.;
    float maxprodGCcont = 0.;
    float prodTm;
    float prodGC;

    ajint i;
    ajint j;

    ajint neric=0;
    ajint nfred=0;
    ajint npair=0;

    float minprimerTm = 0.0;
    float maxprimerTm = 0.0;

    float saltconc = 0.0;
    float dnaconc  = 0.0;

    embInit ("prima", argc, argv);

    substr = ajStrNew();

    forlist  = ajListNew();
    revlist  = ajListNew();
    pairlist = ajListNew();

    p1 = ajStrNew();
    p2 = ajStrNew();


    sequence = ajAcdGetSeq("sequence");
    outf     = ajAcdGetOutfile("outfile");

    minprimerlen = ajAcdGetInt("minprimerlen");
    maxprimerlen = ajAcdGetInt("maxprimerlen");
    minpmGCcont  = ajAcdGetFloat("minpmGCcont");
    maxpmGCcont  = ajAcdGetFloat("maxpmGCcont");
    minprimerTm  = ajAcdGetFloat("mintmprimer");
    maxprimerTm  = ajAcdGetFloat("maxtmprimer");

    minprodlen    = ajAcdGetInt("minplen");
    maxprodlen    = ajAcdGetInt("maxplen");
    minprodGCcont = ajAcdGetFloat("minpgccont");
    maxprodGCcont = ajAcdGetFloat("maxpgccont");

    saltconc = ajAcdGetFloat("saltconc");
    dnaconc  = ajAcdGetFloat("dnaconc");

    targetrange = ajAcdGetToggle("targetrange");
    targetstart = ajAcdGetInt("targetstart");
    targetend   = ajAcdGetInt("targetend");

    overlap = ajAcdGetInt("overlap");
    dolist  = ajAcdGetBoolean("list");

    seqstr = ajSeqGetSeqCopyS(sequence);
    ajStrFmtUpper(&seqstr);

    begin  = ajSeqGetBegin(sequence);
    end    = ajSeqGetEnd(sequence);
    seqlen = end-begin+1;

    ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),begin-1,end-1);
    revstr = ajStrNewC(ajStrGetPtr(substr));
    ajSeqstrReverse(&revstr);

    AJCNEW0(entropy, seqlen);
    AJCNEW0(enthalpy, seqlen);
    AJCNEW0(energy, seqlen);

    /* Initialise Tm calculation arrays */
    ajMeltTempSave(ajStrGetPtr(substr),0,seqlen,saltconc,dnaconc,1,
	  &entropy, &enthalpy, &energy);


    ajFmtPrintF(outf, "\n\nINPUT SUMMARY\n");
    ajFmtPrintF(outf, "*************\n\n");

    if(targetrange)
	ajFmtPrintF
	    (outf, "Prima of %s from positions %d to %d bps\n",
	     ajSeqGetNameC(sequence),targetstart, targetend);
    else
	ajFmtPrintF(outf, "Prima of %s\n", ajSeqGetNameC(sequence));

    ajFmtPrintF(outf, "PRIMER CONSTRAINTS:\n");
    ajFmtPrintF
	(outf, "PRIMA DOES NOT ALLOW PRIMER SEQUENCE AMBIGUITY OR ");
    ajFmtPrintF(outf,"DUPLICATE PRIMER ENDPOINTS\n");
    ajFmtPrintF(outf,
		"Primer size range is %d-%d\n",minprimerlen,maxprimerlen);
    ajFmtPrintF(outf,
		"Primer GC content range is %.2f-%.2f\n",minpmGCcont,
		maxpmGCcont);
    ajFmtPrintF(outf,"Primer melting Temp range is %.2f - %.2f C\n",
		minprimerTm, maxprimerTm);

    ajFmtPrintF (outf, "PRODUCT CONSTRAINTS:\n");

    ajFmtPrintF(outf,"Product GC content range is %.2f-%.2f\n",
		minprodGCcont, maxprodGCcont);

    ajFmtPrintF(outf, "Salt concentration is %.2f (mM)\n", saltconc);
    ajFmtPrintF(outf, "DNA concentration is %.2f (nM)\n", dnaconc);



    if(targetrange)
	ajFmtPrintF(outf, "Targeted range to amplify is from %d to %d\n",
		    targetstart,targetend);
    else
    {
	ajFmtPrintF(outf,"Considering all suitable Primer pairs with ");
	ajFmtPrintF(outf,"Product length ranges %d to %d\n\n\n", minprodlen,
		    maxprodlen);
    }


    ajFmtPrintF(outf, "\n\nPRIMER/PRODUCT PAIR CALCULATIONS & OUTPUT\n");
    ajFmtPrintF(outf, "*****************************************\n\n");


    if(seqlen-minprimerlen < 0)
	ajFatal("Sequence too short");

    if(targetrange)
    {
	ajStrAssignSubC(&p1,ajStrGetPtr(substr),targetstart-begin,targetend-begin);

	prodGC = ajMeltGC(substr,seqlen);
	prodTm = ajMeltTempProd(prodGC,saltconc,seqlen);

	if(prodGC<minprodGCcont || prodGC>maxprodGCcont)
	{
	    ajFmtPrintF(outf,
			"Product GC content [%.2f] outside acceptable range\n",
			prodGC);
	    embExitBad();
	    return 0;
	}

	prima_testtarget(substr, revstr, targetstart-begin, targetend-begin,
			 minprimerlen, maxprimerlen,
			 seqlen, minprimerTm, maxprimerTm, minpmGCcont,
			 maxpmGCcont, minprodGCcont, maxprodGCcont, saltconc,
			 dnaconc, pairlist, &npair);
    }



    if(!targetrange)
    {

    limit   = seqlen-minprimerlen-minprodlen+1;
    lastpos = seqlen-minprodlen;
    limit2  = maxprodlen-minprodlen;

    /* Outer loop selects all possible product start points */
    for(i=minprimerlen; i<limit; ++i)
    {
	startpos = i;
	ajDebug("Position in sequence %d\n",startpos);
	endpos = i+minprodlen-1;
	/* Inner loop selects all possible product lengths  */
	for(j=0; j<limit2; ++j, ++endpos)
	{
	    if(endpos>lastpos)
		break;

	    v1 = endpos-startpos+1;
	    ajStrAssignSubC(&p1,ajStrGetPtr(substr),startpos,endpos);
	    prodGC = ajMeltGC(p1,v1);
	    prodTm = ajMeltTempProd(prodGC,saltconc,v1);

	    if(prodGC<minprodGCcont || prodGC>maxprodGCcont)
		continue;

	    /* Only accept primers with acceptable Tm and GC */
	    neric = 0;
	    nfred = 0;
	    prima_testproduct(substr, startpos, endpos, primerlen,
			      minprimerlen, maxprimerlen,minpmGCcont,
			      maxpmGCcont, minprimerTm, maxprimerTm,
			      minprodlen, maxprodlen, prodTm, prodGC, seqlen,
			      &eric,&fred,forlist,revlist,&neric,&nfred,
			      stepping_value, saltconc,dnaconc, isDNA, begin);
	    if(!neric)
		continue;



	    /* Now reject those primers with self-complementarity */

	    prima_reject_self(forlist,revlist,&neric,&nfred);
	    if(!neric)
		continue;

	    /* Reject any primers that could bind elsewhere in the
               sequence */
	    prima_test_multi(forlist,revlist,&neric,&nfred,substr,revstr,
			     seqlen);



	    /* Now select the least complementary pair (if any) */
	    prima_best_primer(forlist, revlist, &neric, &nfred);
	    if(!neric)
		continue;

	    AJNEW(pair);
	    ajListPop(forlist,(void **)&f);
	    ajListPop(revlist,(void **)&r);
	    pair->f = f;
	    pair->r = r;
	    ++npair;
	    ajListPush(pairlist,(void *)pair);
	}
     }

  }


    if(!targetrange)
    {
	/* Get rid of primer pairs nearby the top scoring ones */
	prima_TwoSortscorepos(&pairlist);
	prima_prune_nearby(pairlist, &npair, maxprimerlen-1);
	ajListSort(pairlist,prima_PosCompare);
	prima_check_overlap(pairlist,&npair,overlap);
    }



    if(npair)
    {
	if(!targetrange)
	    ajFmtPrintF(outf,"%d pairs found\n\n",npair);
	else
	    ajFmtPrintF(outf,
			"Closest primer pair to specified product is:\n\n");

	if((maxprimerlen<26 && seqlen<999999 && !dolist))
	    ajFmtPrintF(outf,"\n\t\tForward\t\t\t\t\tReverse\n\n");
    }



    for(i=0;i<npair;++i)
    {
	if(!targetrange)
	    ajFmtPrintF(outf,"[%d]\n",i+1);

	ajListPop(pairlist,(void **)&pair);


	prodlen = pair->r->start - (pair->f->start + pair->f->primerlen);

	if((maxprimerlen<26 && seqlen<999999 && !dolist))
	{
	    v1 = pair->f->start;
	    v2 = v1 + pair->f->primerlen -1;

	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"%6d %-25.25s %d\t", v1+begin, ajStrGetPtr(p1),
			v2+begin);


	    v1 = pair->r->start;
	    v2 = v1 + pair->r->primerlen -1;
	    ajStrAssignSubS(&p2,substr,v1,v2);
	    ajSeqstrReverse(&p2);
	    ajFmtPrintF(outf,
			"%6d %-25.25s %d\n", v1+begin, ajStrGetPtr(p2), v2+begin);


	    ajFmtPrintF(outf,"       Tm  %.2f C  (GC %.2f%%)\t\t       ",
			pair->f->primerTm,pair->f->primGCcont*100.);
	    ajFmtPrintF(outf,"Tm  %.2f C  (GC %.2f%%)\n",
			pair->r->primerTm,pair->r->primGCcont*100.);

	    ajFmtPrintF(outf,"             Length: %-32dLength: %d\n",
			pair->f->primerlen,pair->r->primerlen);
	    ajFmtPrintF(outf,"             Tma:    %.2f C\t\t\t",
			ajAnneal(pair->f->primerTm,pair->f->prodTm));
	    ajFmtPrintF(outf,"     Tma:    %.2f C\n\n\n",
			ajAnneal(pair->r->primerTm,pair->f->prodTm));


	    ajFmtPrintF(outf,"       Product GC: %.2f%%\n",
			pair->f->prodGC * 100.0);
	    ajFmtPrintF(outf,"       Product Tm: %.2f C\n",
			pair->f->prodTm);
	    ajFmtPrintF(outf,"       Length:     %d\n\n\n",prodlen);
	}
	else
	{
	    ajFmtPrintF(outf,"    Product from %d to %d\n",pair->f->start+
			pair->f->primerlen+begin,pair->r->start-1+begin);
	    ajFmtPrintF(outf,"                 Tm: %.2f C   GC: %.2f%%\n",
			pair->f->prodTm,pair->f->prodGC*(float)100.);
	    ajFmtPrintF(outf,"                 Length: %d\n\n\n",prodlen);


	    v1 = pair->f->start;
	    v2 = v1 + pair->f->primerlen -1;
	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"    Forward: 5' %s 3'\n",ajStrGetPtr(p1));
	    ajFmtPrintF(outf,"             Start: %d\n",v1+begin);
	    ajFmtPrintF(outf,"             End:   %d\n",v2+begin);
	    ajFmtPrintF(outf,"             Tm:    %.2f C\n",
			pair->f->primerTm);
	    ajFmtPrintF(outf,"             GC:    %.2f%%\n",
			pair->f->primGCcont*(float)100.);
	    ajFmtPrintF(outf,"             Len:   %d\n",
			pair->f->primerlen);
	    ajFmtPrintF(outf,"             Tma:   %.2f C\n\n\n",
			ajAnneal(pair->f->primerTm,pair->f->prodTm));

	    v1 = pair->r->start;
	    v2 = v1 + pair->r->primerlen -1;
	    ajStrAssignSubS(&p2,substr,v1,v2);
	    ajSeqstrReverse(&p2);
	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"    Reverse: 5' %s 3'\n",ajStrGetPtr(p1));
	    ajFmtPrintF(outf,"             Start: %d\n",v1+begin);
	    ajFmtPrintF(outf,"             End:   %d\n",v2+begin);
	    ajFmtPrintF(outf,"             Tm:    %.2f C\n",
			pair->r->primerTm);
	    ajFmtPrintF(outf,"             GC:    %.2f%%\n",
			pair->r->primGCcont*(float)100.);
	    ajFmtPrintF(outf,"             Len:   %d\n",
			pair->r->primerlen);
	    ajFmtPrintF(outf,"             Tma:   %.2f C\n\n\n",
			ajAnneal(pair->r->primerTm,pair->f->prodTm));
	}

	prima_PrimerDel(&pair->f);
	prima_PrimerDel(&pair->r);
	AJFREE(pair);
    }



    ajStrDel(&seqstr);
    ajStrDel(&revstr);
    ajStrDel(&substr);
    ajStrDel(&p1);
    ajStrDel(&p2);

    ajListFree(&forlist);
    ajListFree(&revlist);
    ajListFree(&pairlist);

    ajFileClose(&outf);
    ajSeqDel(&sequence);

    AJFREE(entropy);
    AJFREE(enthalpy);
    AJFREE(energy);

    embExit();

    return 0;
}
Esempio n. 12
0
void embPropCalcFragments(const char *s, ajint n,
			  AjPList *l, AjPList *pa,
			  AjBool unfavoured, AjBool overlap,
			  AjBool allpartials, ajint *ncomp, ajint *npart,
			  AjPStr *rname, AjBool nterm, AjBool cterm,
			  AjBool dorag, EmbPPropMolwt const *mwdata,
			  AjBool mono)
{
    static const char *PROPENZReagent[]=
    {
	"Trypsin","Lys-C","Arg-C","Asp-N","V8-bicarb","V8-phosph",
	"Chymotrypsin","CNBr"
    };

    static const char *PROPENZSite[]=
    {
	"KR","K","R","D","E","DE","FYWLM","M"
    };

    static const char *PROPENZAminoCarboxyl[]=
    {
	"CC","C","C","N","C","CC","CCCCC","C"
    };

    static const char *PROPENZUnfavoured[]=
    {
	"KRIFLP","P","P","","KREP","P","P",""
    };


    ajint i;
    ajint j;
    ajint lim;
    ajint len;
    AjPList t;
    EmbPPropFrag fr;
    ajint *begsa = NULL;
    ajint *endsa = NULL;
    double molwt;
    double *molwtsa = NULL;
    AjBool *afrag   = NULL;
    ajint mark;
    ajint bwp;
    ajint ewp;
    ajint *ival;
    ajint defcnt;

    ajint it;
    ajint st = 0;
    ajint mt = 0;
    ajint et = 0;

    ajStrAssignC(rname,PROPENZReagent[n]);
    defcnt = 0;
    len = (ajint) strlen(s);

    t = ajListNew();			/* Temporary list */


    /* First get all potential cut points */
    for(i=0;i<len;++i)
    {
	if(!strchr(PROPENZSite[n],s[i]))
	    continue;

	if(len==i+1)
	    continue;

	if(strchr(PROPENZUnfavoured[n],s[i+1])
	   && !unfavoured)
	    continue;

	AJNEW0(ival);
	*ival = i;
	ajListPushAppend(t,(void *)ival);
	++defcnt;
    }

    if(defcnt)
    {
	AJCNEW(begsa,(defcnt+1));
	AJCNEW(endsa,(defcnt+1));
	AJCNEW(molwtsa,(defcnt+1));
	AJCNEW(afrag,(defcnt+1));
    }

    for(i=0;i<defcnt;++i)  /* Pop them into a temporary array 	 */
    {
	ajListPop(t,(void **)&ival);
	endsa[i] = *ival;
	AJFREE(ival);
    }


    mark = 0;

    for(i=0;i<defcnt;++i)  /* Work out true starts, ends and molwts */
    {
	bwp = mark;
	ewp = endsa[i];

	if(strchr(PROPENZAminoCarboxyl[n],'N'))
	    --ewp;

	molwt=embPropCalcMolwt(s,bwp,ewp,mwdata,mono);

	if(n==PROPENZCNBR)
	    molwt -= (17.045 + 31.095);

	begsa[i]   = mark;
	endsa[i]   = ewp;
	molwtsa[i] = molwt;
	afrag[i]   = ajFalse;
	mark       = ewp+1;
    }

    if(defcnt)		   /* Special treatment for last fragment   */
    {
	molwt = embPropCalcMolwt(s,mark,len-1,mwdata,mono);
	if(n==PROPENZCNBR)
	    molwt -= (17.045 + 31.095);
	begsa[i]   = mark;
	endsa[i]   = len-1;
	molwtsa[i] = molwt;
	afrag[i]   = ajFalse;
	++defcnt;
    }

    /* Push the hits */
    for(i=0;i<defcnt;++i)
    {
	if(dorag)
	{
	    st = begsa[i];
	    et = endsa[i];

	    for(it=st+RAG_MINPEPLEN-1; it < et; ++it)
	    {
		AJNEW0(fr);
		fr->start = st;
		fr->end   = it;
		fr->molwt = embPropCalcMolwt(s,st,it,mwdata,mono);

		if(n == PROPENZCNBR)
		    fr->molwt -= (17.045 + 31.095);

		fr->isfrag = ajTrue;
		ajListPush(*l,(void *)fr);
	    }
	}
	
	AJNEW0(fr);
	fr->start  = begsa[i];
	fr->end    = endsa[i];
	fr->molwt  = molwtsa[i];
	fr->isfrag = afrag[i];
	ajListPush(*l,(void *) fr);

	if(dorag && nterm)
	    for(it=st+1; it < et-RAG_MINPEPLEN+2; ++it)
	    {
		AJNEW0(fr);
		fr->start = it;
		fr->end   = et;
		fr->molwt = embPropCalcMolwt(s,it,et,mwdata,mono);

		if(n == PROPENZCNBR)
		    fr->molwt -= (17.045 + 31.095);

		fr->isfrag = ajTrue;
		ajListPush(*l,(void *)fr);
	    }
    }

    if(!dorag)
	ajListSort(*l, &propFragCompare);
    *ncomp = defcnt;


    /* Now deal with overlaps */
    *npart = 0;

    lim = defcnt -1;

    if(overlap && !allpartials)
    {
	for(i=0;i<lim;++i)
	{
	    if(dorag)
	    {
		st = begsa[i];
		mt = endsa[i];
		et = endsa[i+1];

		if(cterm)
		    for(it=mt+1; it < et; ++it)
		    {
			AJNEW0(fr);
			fr->start = st;
			fr->end   = it;
			fr->molwt = embPropCalcMolwt(s,st,it,mwdata,mono);

			if(n == PROPENZCNBR)
			    fr->molwt -= (17.045 + 31.095);

			fr->isfrag = ajTrue;
			ajListPush(*l,(void *)fr);
		    }
	    }

	    AJNEW0(fr);
	    fr->isfrag = ajTrue;
	    fr->molwt = embPropCalcMolwt(s,begsa[i],endsa[i+1],mwdata,mono);
	    if(n==PROPENZCNBR)
		fr->molwt -= (17.045 + 31.095);
	    fr->start = begsa[i];
	    fr->end   = endsa[i+1];
	    ajListPush(*pa,(void *)fr);
	    ++(*npart);

	    if(dorag && nterm)
		for(it=st+1; it<mt; ++it)
		{
		    AJNEW0(fr);
		    fr->start = it;
		    fr->end   = et;
		    fr->molwt = embPropCalcMolwt(s,it,et,mwdata,mono);

		    if(n == PROPENZCNBR)
			fr->molwt -= (17.045 + 31.095);

		    fr->isfrag = ajTrue;
		    ajListPush(*l,(void *)fr);
		}
	}

	if(*npart)			/* Remove complete sequence */
	{
	    --(*npart);
	    ajListPop(*pa,(void **)&fr);
	}

	if(!dorag)
	    ajListSort(*pa, &propFragCompare);
    }

    if(allpartials)
    {
        lim = defcnt;

	for(i=0;i<lim;++i)
	    for(j=i+1;j<lim;++j)
	    {
		AJNEW0(fr);
		fr->isfrag = ajTrue;
		fr->molwt = embPropCalcMolwt(s,begsa[i],endsa[j],mwdata,mono);

		if(n==PROPENZCNBR)
		    fr->molwt -= (17.045 + 31.095);

		fr->start = begsa[i];
		fr->end   = endsa[j];
		ajListPush(*pa,(void *)fr);
		++(*npart);
	    }

	if(*npart)			/* Remove complete sequence */
	{
	    --(*npart);
	    ajListPop(*pa,(void **)&fr);
	}

	if(!dorag)
	    ajListSort(*pa, &propFragCompare);
    }

    if(defcnt)
    {
	AJFREE(molwtsa);
	AJFREE(endsa);
	AJFREE(begsa);
	AJFREE(afrag);
    }

    ajListFree(&t);

    return;
}
Esempio n. 13
0
/* @prog domainreso ***********************************************************
**
** Removes low resolution domains from a DCF file (domain
** classification file).
**
******************************************************************************/
int main(ajint argc, char **argv)
{

    AjPList     cpdb_path  = NULL; /* Location of coordinate files for input */
    AjPStr      cpdb_name  = NULL; /* Name of coordinate file                */
    AjPStr      temp       = NULL; /* temp string                            */
    AjPStr      temp2      = NULL; /* temp string                            */
    AjPList     entry      = NULL; /* List of pdb codes with resolution      */
                                   /* ABOVE the threshold                    */
    AjPStr     *entryarr   = NULL; /* entry as an array                      */
    

    AjPFile     fptr_cpdb  = NULL; /* Pointer to current coordinate file     */
    AjPFile     dcfin      = NULL; /* DCF input file                         */
    AjPFile     dcfout     = NULL; /* DCF output file                        */

    AjPPdb      pdb        = NULL; /* Pdb object pointer                     */    
    AjPDomain   domain     = NULL; /* Domain structure                       */
 

    float       threshold  = 0.0;  /* Resolution threshold                   */
    ajint       num        = 0;    /* number of nodes in list                */

    ajint       type       = 0;    /* Type of domain (ajSCOP or ajCATH) in 
				      the DCF file                           */


    
    
                                           
    /* Read data from acd */
    embInitPV("domainreso",argc,argv,"DOMAINATRIX",VERSION);
    cpdb_path     = ajAcdGetDirlist("cpdbpath");    
    threshold     = ajAcdGetFloat("threshold");
    dcfin         = ajAcdGetInfile("dcfinfile");
    dcfout        = ajAcdGetOutfile("dcfoutfile");




    
    /* Allocate strings etc. */
    cpdb_name     = ajStrNew();
    temp          = ajStrNew();

    /* Create list . */
    entry    = ajListNew();


    /* Create list of files in CPDB directory. */
    

    /* Determine number of nodes on list    */
    num = ajListGetLength(cpdb_path);



/*
 domainreso reads a directory of clean coordinate files file, creates a list 
 of the files, then reads every list entry and extracts the resolution of the 
 structure.  If the value is less than a threshold (user defined) then the 
 domain identifier is pushed onto a list.  The DCF file (domain classification 
 file) is then read and domain identifiers compared to those on the list, if 
 found then the domain structure data is written the new DCF file.    
*/


    type = ajDomainDCFType(dcfin);


    /* Start of main application loop                         */
    /* Produce list of pdb codes with resolution              */
    /* ABOVE the threshold.                                   */
    while(ajListPop(cpdb_path,(void **)&temp))
    {
        /* Open coordinate file. */
        if((fptr_cpdb=ajFileNewInNameS(temp))==NULL)
        {
	    ajWarn("Could not open cpdb file");
            ajStrDel(&temp);
            continue;       
        }
	ajFmtPrint("%S\n", temp);
	fflush(stdout);
	
	

        /* Read coordinate data file. */ 
	pdb = ajPdbReadFirstModelNew(fptr_cpdb);
        

        /* Check if resolution is above threshold. */
        if(pdb->Reso > threshold)
	{
	    /* assign ID to list. */
	    temp2=ajStrNew();
	    ajStrAssignS(&temp2, pdb->Pdb);
	    ajListPush(entry, (AjPStr) temp2);
	}        
	
        /* Close coordinate file and tidy up*/
        ajPdbDel(&pdb);
        ajFileClose(&fptr_cpdb);
	ajStrDel(&temp);
    }
    num = ajListGetLength(entry);
    

    /* Sort the list of pdb codes & convert to an array. */
    ajListSort(entry, domainreso_StrComp);
    ajListToarray(entry, (void ***)&entryarr);
    
    
    /* Read DCF file and compare IDs to those in list          
     if not present then write domain structure data to output. . */
    while((domain=(ajDomainReadCNew(dcfin, "*", type))))
    {
	/* DOMAIN id not found in the list of domains with resolution 
	   above the threshold, so include it in the output file. */
	if((domainreso_StrBinSearchDomain(ajDomainGetId(domain), 
					  entryarr, num))==-1)
	    ajDomainWrite(dcfout, domain);

        /* Delete domain structure. */
        ajDomainDel(&domain);
    }


    /* Tidy up. */
    ajStrDel(&temp2);

    ajStrDel(&cpdb_name);
    ajFileClose(&dcfout);
    ajFileClose(&dcfin);
    ajListFree(&cpdb_path);
    ajListFree(&entry);
    AJFREE(entryarr);
    
  
    /* Return. */
    ajExit();
    return 0;
}