예제 #1
0
/* @funcstatic domainalign_writeid ********************************************
**
** House-keeping function.
**
** @param [r] domain [AjPDomain]   Domain.
** @param [r] noden  [ajint]       Node number.
** @param [r] daf    [AjPDirout]   Domain alignment files.
** @param [r] super  [AjPDirout]   Structural superimposition files.
** @param [r] align  [AjPStr *]    Align.
** @param [r] alignc [AjPStr *]    Alignc.
** 
** @return [void]
** 
** @@
****************************************************************************/
static void domainalign_writeid(AjPDomain domain, 
				ajint noden, 
				AjPDirout daf, 
				AjPDirout super, 
				AjPStr *align, 
				AjPStr *alignc) 
{
    AjPStr temp=NULL;
    
    temp=ajStrNew();
    
    if(noden==1) 
	ajStrFromInt(&temp, domain->Scop->Sunid_Class);
    else if (noden==2)
	ajStrFromInt(&temp, domain->Scop->Sunid_Fold);
    else if (noden==3)
	ajStrFromInt(&temp, domain->Scop->Sunid_Superfamily);
    else if (noden==4)
	ajStrFromInt(&temp, domain->Scop->Sunid_Family);
    else if (noden==5)
	ajStrFromInt(&temp, domain->Cath->Class_Id );
    else if (noden==6)
	ajStrFromInt(&temp, domain->Cath->Arch_Id);
    else if (noden==7)
	ajStrFromInt(&temp, domain->Cath->Topology_Id);
    else if (noden==8)
	ajStrFromInt(&temp, domain->Cath->Superfamily_Id);
    else if (noden==9)
	ajStrFromInt(&temp, domain->Cath->Family_Id);

    ajStrAssignS(align, temp);	
    ajStrInsertS(align, 0, ajDiroutGetPath(daf));	
    ajStrAppendC(align, ".");
    ajStrAppendS(align, ajDiroutGetExt(daf));

    ajStrAssignS(alignc, temp);	
    ajStrInsertS(alignc, 0, ajDiroutGetPath(super));	
    ajStrAppendC(alignc, ".");
    ajStrAppendS(alignc, ajDiroutGetExt(super));

    ajDebug("daf file '%S' super file '%S'\n", *align, *alignc);
    ajStrDel(&temp);
    return;
}
예제 #2
0
파일: trimest.c 프로젝트: WenchaoLin/JAMg
static void trimest_tolower(AjPStr *strng, ajint start, ajint end)
{

    AjPStr substr;
    substr = ajStrNew();
    
    /* extract the region and lowercase */
    ajStrAppendSubS(&substr, *strng, start, end);
    ajStrFmtLower(&substr);
    
    /* remove and replace the lowercased region */
    ajStrCutRange(strng, start, end);
    ajStrInsertS(strng, start, substr);
    ajStrDel(&substr);

    return;
}
예제 #3
0
static void extractfeat_GetRegionPad(const AjPSeq seq, AjPStr *featstr,
				     ajint start, ajint end, AjBool sense,
				     AjBool beginning)
{
    ajint tmp;
    ajint pad;

    AjPStr result;

    ajDebug("In extractfeat_GetRegionPad start=%d, end=%d\n", start, end);

    result = ajStrNew();


    if(start > end)
    	return;

    if(start < 0)
    {
        pad = -start;
        if(ajSeqIsNuc(seq))
            ajStrAppendCountK(&result, 'N', pad);
        else
            ajStrAppendCountK(&result, 'X', pad);
        start = 0;
    }

    if(end > (ajint) ajSeqGetLen(seq)-1)
    	tmp = ajSeqGetLen(seq)-1;
    else
    	tmp = end;

    if(start <= (ajint) ajSeqGetLen(seq) && tmp >= 0)
    {
        ajDebug("Get subsequence %d-%d\n", start, tmp);
        ajStrAppendSubS(&result, ajSeqGetSeqS(seq), start, tmp);
        ajDebug("result=%S\n", result);
    }

    if(end > (ajint) ajSeqGetLen(seq)-1)
    {
        pad = end - ajSeqGetLen(seq)+1;
        if(ajSeqIsNuc(seq))
            ajStrAppendCountK(&result, 'N', pad);
        else
            ajStrAppendCountK(&result, 'X', pad);
        ajDebug("result=%S\n", result);
    }


    /* if feature was in reverse sense, then get reverse complement */
    if(!sense)
    {
	ajDebug("get reverse sense of subsequence\n");
    	ajSeqstrReverse(&result);
	ajDebug("result=%S\n", result);
    }

    if(beginning)
    {
	ajDebug("Prepend to featstr: %S\n", result);
        ajStrInsertS(featstr, 0, result);
    }
    else
    {
	ajDebug("Append to featstr: %S\n", result);
    	ajStrAppendS(featstr, result);
    }
    ajDebug("featstr=%S\n", *featstr);


    ajStrDel(&result);

    return;
}
예제 #4
0
int main(int argc, char **argv)
{
    AjPSeqout outseq = NULL;
    AjPList list     = NULL;
    AjPSeq seq       = NULL;
    AjPStr insert    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr* seqr     = NULL;
    AjPFile data     = NULL;
    ajint start   = 0;
    ajint length  = 0;
    ajint amount  = 0;
    ajint scmax   = 0;
    ajint extra   = 0;

    embInit("makeprotseq", argc, argv);

    data     = ajAcdGetInfile("pepstatsfile");
    insert   = ajAcdGetString("insert");
    start    = ajAcdGetInt("start");
    length   = ajAcdGetInt("length");
    amount   = ajAcdGetInt("amount");
    outseq   = ajAcdGetSeqoutall("outseq");

    list = ajListstrNew();

    /* this is checked by acd
    if(amount <=0 || length <= 0)
    ajFatal("Amount or length is 0 or less. "
                 "Unable to create any sequences"); */

    /* if insert, make sure sequence is large enough */
    if(ajStrGetLen(insert))
    {
        length -= ajStrGetLen(insert);
        /* start= start <= 1 ? 0 : --start; */ /* checked in acd */
        start--;

        if(length <= 0)
            ajFatal("Sequence smaller than inserted part. "
                    "Unable to create sequences.");
    }

    /* make the list of AjPStr to be used in sequence creation */
    if(data)
    {
        ajDebug("Distribution datafile '%s' given checking type\n",
                ajFileGetPrintnameC(data));
        seqstr = ajStrNew();
        ajReadlineTrim(data,&seqstr);

        if(ajStrFindC(seqstr,"PEPSTATS") == 0)
        {
            makeprotseq_parse_pepstats(&list,data);
        }
        else
        {
            ajWarn("Not pepstats file. Making completely random sequences.");
            makeprotseq_default_chars(&list);
        }

        ajStrDel(&seqstr);
        ajFileClose(&data);
    }
    else
        makeprotseq_default_chars(&list);

    /* if insert, make sure type is correct */
    /* typecheking code is not working, uncomment and test after it is
    if(ajStrGetLen(insert))
    {
    seqstr = ajStrNew();
    if(prot)
        ajStrAssignC(&seqstr,"pureprotein");
    if(!ajSeqTypeCheckS(&insert,seqstr))
        ajFatal("Insert not the same sequence type as sequence itself.");
    ajStrDel(&seqstr);
    } */

    /* array allows fast creation of a sequences */
    scmax = (ajuint) ajListstrToarray(list,&seqr);
    if(!scmax)
        ajFatal("No strings in list. No characters to make the sequence.");

    ajDebug("Distribution array done.\nscmax '%d', extra '%d', first '%S'\n",
            scmax,extra,seqr[0]);

    ajRandomSeed();

    while(amount-- > 0)
    {
        seqstr = makeprotseq_random_sequence(seqr,scmax,length);

        if(ajStrGetLen(insert))
            ajStrInsertS(&seqstr,start,insert);

        ajStrFmtLower(&seqstr);
        seq = ajSeqNew();

        ajSeqAssignSeqS(seq, seqstr);
        ajSeqSetProt(seq);

        ajSeqoutWriteSeq(outseq, seq);
        ajSeqDel(&seq);
        ajStrDel(&seqstr);
    }

    ajSeqoutClose(outseq);
    ajSeqoutDel(&outseq);
    ajListstrFreeData(&list);
    ajStrDel(&insert);
    AJFREE(seqr);

    embExit();

    return 0;
}
예제 #5
0
/* @prog seqnr **************************************************************
**
** Removes redundancy from DHF files (domain hits files) or other files of 
** sequences.
**
****************************************************************************/
int main(int argc, char **argv)
{
    /* Variable declarations */
    AjPList    in        = NULL;    /* Names of domain hits files (input).   */
    AjPStr     inname    = NULL;    /* Full name of the current DHF file.    */
    AjPFile    inf       = NULL;    /* Current DHF file.                     */
    EmbPHitlist infhits   = NULL;   /* Hitlist from DHF file                 */
    AjBool     dosing    = ajFalse; /* Filter using singlet sequences.       */
    AjPDir     singlets  = NULL;    /* Singlets (input).                     */
    AjBool     dosets    = ajFalse; /* Filter using sets of sequences.       */
    AjPDir     insets    = NULL;    /* Sets (input).                         */
    AjPStr     mode      = NULL;    /* Mode of operation                     */
    ajint      moden     = 0;       /* Mode 1: single threshold for redundancy
				       removal, 2: lower and upper thresholds
				       for redundancy removal.               */
    float      thresh    = 0.0;     /* Threshold for non-redundancy.         */
    float      threshlow = 0.0;	    /* Threshold (lower limit).              */
    float      threshup  = 0.0;	    /* Threshold (upper limit).              */
    AjPMatrixf matrix    = NULL;    /* Substitution matrix.                  */
    float      gapopen   = 0.0;     /* Gap insertion penalty.                */
    float      gapextend = 0.0;     /* Gap extension penalty.                */
    AjPDirout  out       = NULL;    /* Domain hits files (output).           */
    AjPFile    outf      = NULL;    /* Current DHF file (output).            */
    AjBool     dored     = ajFalse; /* True if redundant hits are output.    */
    AjPDirout  outred    = NULL;    /* DHF files for redundant hits (output).*/
    AjPFile    redf      = NULL;    /* Current DHF file redundancy (output). */
    AjPStr     outname   = NULL;    /* Name of output file (re-used).        */
    AjPFile    logf      = NULL;    /* Log file pointer.                     */
 
    AjBool     ok        = ajFalse; /* Housekeeping.                         */
    AjPSeqset  seqset    = NULL;    /* Seqset (re-used).                     */
    AjPSeqin   seqin     = NULL;    /* Seqin (re-used).                      */
    AjPList    seq_list  = NULL;    /* Main list for redundancy removal.     */
    EmbPDmxNrseq seq_tmp = NULL;    /* Temp. pointer for making seq_list.    */
    ajint      seq_siz   = 0;       /* Size of seq_list.                     */
    AjPUint    keep      = NULL;    /* 1: Sequence in seq_list was classed as
				       non-redundant, 0: redundant.          */
    AjPUint    nokeep    = NULL;    /* Inversion of keep array.              */
    ajint      nseqnr    = 0;       /* No. non-redundant seqs. in seq_list.  */
    

    AjPStr     filtername= NULL;    /* Name of filter file (re-used).        */
    AjPFile    filterf   = NULL;    /* Current filter file.                  */
    EmbPHitlist hitlist   = NULL;   /* Hitlist from input file (re-used).    */
    AjPScopalg scopalg   = NULL;    /* Scopalg from input file.              */
    ajint      x         = 0;       /* Housekeeping.                         */
    

    


    /* Read data from acd. */
    embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION);

    in        = ajAcdGetDirlist("dhfinpath");
    dosing    = ajAcdGetToggle("dosing");
    singlets    = ajAcdGetDirectory("singletsdir");
    dosets    = ajAcdGetToggle("dosets");
    insets    = ajAcdGetDirectory("insetsdir");
    mode      = ajAcdGetListSingle("mode");  
    thresh    = ajAcdGetFloat("thresh");
    threshlow = ajAcdGetFloat("threshlow");
    threshup  = ajAcdGetFloat("threshup");
    matrix    = ajAcdGetMatrixf("matrix");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    out       = ajAcdGetOutdir("dhfoutdir");
    dored     = ajAcdGetToggle("dored");
    outred    = ajAcdGetOutdir("redoutdir");
    logf      = ajAcdGetOutfile("logfile");



    /* Housekeeping. */
    filtername  = ajStrNew();
    outname     = ajStrNew();


    if(!(ajStrToInt(mode, &moden)))
	ajFatal("Could not parse ACD node option");


    
    /* Process each DHF (input) in turn. */
    while(ajListPop(in,(void **)&inname))
    {
	ajFmtPrint("Processing %S\n", inname);
	ajFmtPrintF(logf, "//\n%S\n", inname);


	seq_list    = ajListNew();
	keep        = ajUintNew();  	    
	nokeep      = ajUintNew();  	    	
	
	/**********************************/
	/*         Open DHF file          */
	/**********************************/
	if((inf = ajFileNewInNameS(inname)) == NULL)
	    ajFatal("Could not open DHF file %S", inname);

	/* Read DHF file. */
	ok = ajFalse;
	if(!(infhits = embHitlistReadFasta(inf)))
	{
	    ajWarn("embHitlistReadFasta call failed in seqnr");
	    ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n");
	
	    /* Read sequence set instead. */ 
	    seqset = ajSeqsetNew();
	    seqin  = ajSeqinNew();
	    ajSeqinUsa(&seqin, inname);
	
	    if(!(ajSeqsetRead(seqset, seqin)))
		ajFatal("SeqsetRead failed in seqsearch_psialigned");

	    if(ajSeqsetGetSize(seqset))
		ok = ajTrue;
	}
	else
	    if(infhits->N)
		ok = ajTrue;

	/* Close DHF file. */
	ajFileClose(&inf);
	
	/* Process empty DHF files (should never occur). */
	if(!ok)
	{		
	    ajWarn("Empty input file %S\n", inname);
	    ajFmtPrintF(logf, "Empty input file %S\n", inname);
	    if(infhits)
		embHitlistDel(&infhits);
	    if(seqset)
		ajSeqsetDel(&seqset);
	    if(seqin)
		ajSeqinDel(&seqin);
	    continue;
	}	

	
	/* 1.  Create list of sequences from the main input directory.. */
	if(infhits)
	{
	    for(x=0; x<infhits->N; x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc);
		ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq);
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	} 
	else
	{	 
	    for(x=0;x<ajSeqsetGetSize(seqset);x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x));
		ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x));
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	    ajSeqsetDel(&seqset);
	    ajSeqinDel(&seqin);
	}
	
    

	/**********************************/
	/*   Open singlets filter file    */
	/**********************************/
	if(dosing)
	{
	    /* Open singlets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(singlets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(singlets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DHF file %S",
		       filtername);
		ajFmtPrint("Could not open singlets filter file %S",
			   filtername);
	    }
	    else
	    {
		/* Read DHF file. */
		ok = ajFalse;
		if(!(hitlist = embHitlistReadFasta(filterf)))
		{
		    ajWarn("embHitlistReadFasta call failed in seqnr");
		    ajFmtPrintF(logf, 
				"embHitlistReadFasta call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
	
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(hitlist->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty singlets filter file %S\n", filtername);
		    ajFmtPrintF(logf, "Empty singlets filter file %S\n", 
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files). */
		if(hitlist)
		{
		    for(x=0; x<hitlist->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc);
			ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq);
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    embHitlistDel(&hitlist);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	
	/**********************************/
	/*      Open sets filter file     */
	/**********************************/
	if(dosets)
	{
	    /* Open sets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(insets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(insets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DAF file %S", filtername);
		ajFmtPrint("Could not open sets filter file %S", filtername);
	    }
	    else
	    {
		/* Read DAF file. */
		ok = ajFalse;

		if(!(ajDmxScopalgRead(filterf, &scopalg)))
		{
		    ajWarn("ajDmxScopalgRead call failed in seqnr");
		    ajFmtPrintF(logf,
				"ajDmxScopalgRead call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
		    
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(scopalg->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty sets filter file %S\n",
			   filtername);
		    ajFmtPrintF(logf, "Empty sets filter file %S\n",
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files).. */
		if(scopalg)
		{
		    for(x=0; x<scopalg->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]);
			ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]);
			/* Remove gap char's & whitespace. */
			ajStrRemoveGap(&seq_tmp->Seq->Seq);  
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajDmxScopalgDel(&scopalg);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	/* 4. Identify redundant domains.. */
	if(moden == 1)
	{
	    if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, thresh, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}		
	else
	{
	    if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, threshlow, threshup, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}	
	seq_siz = ajListGetLength(seq_list);
	for(x=0; x<seq_siz; x++)
	    if(ajUintGet(keep, x) == 1)
		ajUintPut(&nokeep, x, 0);
	    else
		ajUintPut(&nokeep, x, 1);
	

	/* Create output files. */
	ajStrAssignS(&outname, inname);
	ajFilenameTrimPathExt(&outname);
	outf = ajFileNewOutNameDirS(outname, out);
	if(dored)
	    redf = ajFileNewOutNameDirS(outname, outred);
	

	/* 5. Write non-redundant domains to main output directory.  
	   6.  If specified, write redundant domains to output directory. */
	embHitlistWriteSubsetFasta(outf, infhits, keep);
	if(dored)
	    embHitlistWriteSubsetFasta(redf, infhits, nokeep);

	embHitlistDel(&infhits);
	ajFileClose(&outf);
	ajFileClose(&redf);
	ajStrDel(&inname);

	while(ajListPop(seq_list, (void **) &seq_tmp))
	{
	    ajSeqDel(&seq_tmp->Seq);
	    AJFREE(seq_tmp);
	}
	ajListFree(&seq_list);
	ajUintDel(&keep);	
	ajUintDel(&nokeep);
    }	    


    /* Tidy up. */
    ajListFree(&in);
    if(singlets)
	ajDirDel(&singlets);
    if(insets)
	ajDirDel(&insets);
    ajDiroutDel(&out);
    if(outred)
	ajDiroutDel(&outred);
    ajFileClose(&logf);

    ajMatrixfDel(&matrix);

    ajStrDel(&filtername);
    ajStrDel(&outname);
    ajStrDel(&mode);


    embExit();
    return 0;
}