Ejemplo n.º 1
0
static void eprimer3_send_range2(FILE * stream, const char * tag,
                                 const AjPRange value)
{
    AjPStr str;
    ajuint n;
    ajuint start;
    ajuint end;

    str=ajStrNew();

    if(ajRangeGetSize(value))
    {
        ajFmtPrintS(&str, "%s=", tag);
        eprimer3_write(str, stream);
        ajStrSetClear(&str);

        for(n=0; n < ajRangeGetSize(value); n++)
        {
            ajRangeElementGetValues(value, n, &start, &end);
            ajFmtPrintS(&str, "%d-%d ", start, end);
            eprimer3_write(str, stream);
            ajStrSetClear(&str);
        }

        ajFmtPrintS(&str, "\n");
        eprimer3_write(str, stream);
    }

    ajStrDel(&str);

    return;
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
{

    AjPSeqall seqall;
    AjPSeqout seqout;
    AjPSeqout junkout;
    AjPSeq seq = NULL;
    AjPStr exclude = NULL;
    AjPStr pattern = NULL;
    AjPStr name = NULL;
    AjPStr acc  = NULL;

    embInit("notseq", argc, argv);

    seqout  = ajAcdGetSeqoutall("outseq");
    junkout = ajAcdGetSeqoutall("junkoutseq");
    seqall  = ajAcdGetSeqall("sequence");
    exclude = ajAcdGetString("exclude");

    notseq_readfile(exclude, &pattern);

    while(ajSeqallNext(seqall, &seq))
    {
	ajStrAssignS(&name, ajSeqGetNameS(seq));
	ajStrAssignS(&acc, ajSeqGetAccS(seq));

	if(embMiscMatchPatternDelimC(name, pattern, ",;") ||
           embMiscMatchPatternDelimC(acc, pattern, ",;"))
	    ajSeqoutWriteSeq(junkout, seq);
	else
	    /* no match, so not excluded */
	    ajSeqoutWriteSeq(seqout, seq);

	ajStrSetClear(&name);
	ajStrSetClear(&acc);
    }

    ajSeqoutClose(seqout);
    ajSeqoutClose(junkout);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);
    ajSeqoutDel(&junkout);
    ajStrDel(&exclude);
    ajStrDel(&pattern);
    ajStrDel(&name);
    ajStrDel(&acc);

    embExit();

    return 0;
}
Ejemplo n.º 3
0
static void remap_read_file_of_enzyme_names(AjPStr *enzymes)
{
    AjPFile file = NULL;
    AjPStr line;
    const char *p = NULL;

    if(ajStrFindC(*enzymes, "@") == 0)
    {
	ajStrTrimC(enzymes, "@");	/* remove the @ */
	file = ajFileNewInNameS(*enzymes);
	if(file == NULL)
	    ajFatal("Cannot open the file of enzyme names: '%S'", enzymes);

	/* blank off the enzyme file name and replace with the enzyme names */
	ajStrSetClear(enzymes);
	line = ajStrNew();
	while(ajReadlineTrim(file, &line))
	{
	    p = ajStrGetPtr(line);
	    if(!*p || *p == '#' || *p == '!')
		continue;
	    ajStrAppendS(enzymes, line);
	    ajStrAppendC(enzymes, ",");
	}
	ajStrDel(&line);

	ajFileClose(&file);
    }

    return;
}
Ejemplo n.º 4
0
void ajXmlClear(AjPXml xml)
{
    if(MAJSTRGETLEN(xml->Id))
        ajStrSetClear(&xml->Id);

    if(MAJSTRGETLEN(xml->Db))
        ajStrSetClear(&xml->Db);

    if(MAJSTRGETLEN(xml->Setdb))
        ajStrSetClear(&xml->Setdb);

    if(MAJSTRGETLEN(xml->Full))
        ajStrSetClear(&xml->Full);

    if(MAJSTRGETLEN(xml->Qry))
        ajStrSetClear(&xml->Qry);

    if(MAJSTRGETLEN(xml->Formatstr))
        ajStrSetClear(&xml->Formatstr);

    if(MAJSTRGETLEN(xml->Filename))
        ajStrSetClear(&xml->Filename);

    ajStrDel(&xml->TextPtr);

    xml->Count = 0;
    xml->Fpos = 0L;
    xml->Format = 0;

    if(xml->Doc)
        ajDomDocumentDestroyNode(xml->Doc, &xml->Doc);

    return;
}
Ejemplo n.º 5
0
static void notseq_readfile(const AjPStr exclude, AjPStr *pattern)
{
    AjPFile file = NULL;
    AjPStr line;
    AjPStr filename = NULL;
    const char *p = NULL;

    if(ajStrFindC(exclude, "@") != 0)
    {
	ajStrAssignS(pattern, exclude);
    }
    else
    {
	ajStrAssignS(&filename, exclude);
        ajStrTrimC(&filename, "@");       /* remove the @ */
        file = ajFileNewInNameS(filename);
        if(file == NULL)
            ajFatal("Cannot open the file of sequence names: '%S'", filename);

        /* blank off the file name and replace with the sequence names */
        ajStrSetClear(pattern);
        line = ajStrNew();
        while(ajReadlineTrim(file, &line))
        {
            p = ajStrGetPtr(line);

            if(!*p || *p == '#' || *p == '!')
		continue;

            ajStrAppendS(pattern, line);
            ajStrAppendC(pattern, ",");
        }
        ajStrDel(&line);
        ajStrDel(&filename);

        ajFileClose(&file);
    }

    return;
}
Ejemplo n.º 6
0
void ajRefseqClear(AjPRefseq refseq)
{
    AjPSeqRange tmprange = NULL;

    if(MAJSTRGETLEN(refseq->Id))
       ajStrSetClear(&refseq->Id);

    if(MAJSTRGETLEN(refseq->Db))
       ajStrSetClear(&refseq->Db);

    if(MAJSTRGETLEN(refseq->Setdb))
       ajStrSetClear(&refseq->Setdb);

    if(MAJSTRGETLEN(refseq->Full))
       ajStrSetClear(&refseq->Full);

    if(MAJSTRGETLEN(refseq->Qry))
       ajStrSetClear(&refseq->Qry);

    if(MAJSTRGETLEN(refseq->Formatstr))
       ajStrSetClear(&refseq->Formatstr);

    if(MAJSTRGETLEN(refseq->Filename))
       ajStrSetClear(&refseq->Filename);

    ajStrDel(&refseq->TextPtr);

    ajStrDel(&refseq->Desc);
    ajStrDel(&refseq->Seq);

    while(ajListPop(refseq->Seqlist,(void **)&tmprange))
	ajSeqrangeDel(&tmprange);

    ajListFree(&refseq->Seqlist);

    refseq->Count = 0;
    refseq->Fpos = 0L;
    refseq->Format = 0;

    return;
}
Ejemplo n.º 7
0
void ajResourceClear(AjPResource resource)
{
    AjPReslink lnk = NULL;
    AjPResquery qry = NULL;
    AjPResterm resterm = NULL;
    AjPStr ptr = NULL;

    if(MAJSTRGETLEN(resource->Id))
       ajStrSetClear(&resource->Id);

    if(ajListGetLength(resource->Idalt)) 
        while(ajListstrPop(resource->Idalt,&ptr))
            ajStrDel(&ptr);

    if(MAJSTRGETLEN(resource->Acc))
       ajStrSetClear(&resource->Acc);

    if(MAJSTRGETLEN(resource->Name))
       ajStrSetClear(&resource->Name);

    if(MAJSTRGETLEN(resource->Desc))
       ajStrSetClear(&resource->Desc);

    if(MAJSTRGETLEN(resource->Url))
       ajStrSetClear(&resource->Url);

    if(MAJSTRGETLEN(resource->Urllink))
       ajStrSetClear(&resource->Urllink);

    if(MAJSTRGETLEN(resource->Urlrest))
       ajStrSetClear(&resource->Urlrest);

    if(MAJSTRGETLEN(resource->Urlsoap))
       ajStrSetClear(&resource->Urlsoap);

    if(ajListGetLength(resource->Cat))
        while(ajListstrPop(resource->Cat,&ptr))
            ajStrDel(&ptr);

    if(ajListGetLength(resource->Taxon))
        while(ajListPop(resource->Taxon,(void**)&resterm))
            ajRestermDel(&resterm);

    if(ajListGetLength(resource->Edamdat))
        while(ajListPop(resource->Edamdat,(void**)&resterm))
            ajRestermDel(&resterm);

    if(ajListGetLength(resource->Edamfmt))
        while(ajListPop(resource->Edamfmt,(void**)&resterm))
            ajRestermDel(&resterm);

    if(ajListGetLength(resource->Edamid))
        while(ajListPop(resource->Edamid,(void**)&resterm))
            ajRestermDel(&resterm);

    if(ajListGetLength(resource->Edamtpc))
        while(ajListPop(resource->Edamtpc,(void**)&resterm))
            ajRestermDel(&resterm);

    if(ajListGetLength(resource->Xref))
        while(ajListPop(resource->Xref,(void**)&lnk))
            ajReslinkDel(&lnk);

    if(ajListGetLength(resource->Query))
        while(ajListPop(resource->Query,(void**)&qry))
            ajResqueryDel(&qry);

    if(ajListGetLength(resource->Example))
        while(ajListstrPop(resource->Example,&ptr))
            ajStrDel(&ptr);

    if(MAJSTRGETLEN(resource->Db))
       ajStrSetClear(&resource->Db);

    if(MAJSTRGETLEN(resource->Setdb))
       ajStrSetClear(&resource->Setdb);

    if(MAJSTRGETLEN(resource->Full))
       ajStrSetClear(&resource->Full);

    if(MAJSTRGETLEN(resource->Qry))
       ajStrSetClear(&resource->Qry);

    if(MAJSTRGETLEN(resource->Formatstr))
       ajStrSetClear(&resource->Formatstr);

    if(MAJSTRGETLEN(resource->Filename))
       ajStrSetClear(&resource->Filename);

    ajStrDel(&resource->TextPtr);

    resource->Count = 0;
    resource->Fpos = 0L;
    resource->Format = 0;

    return;
}
Ejemplo n.º 8
0
static void extractfeat_FeatSeqExtract(const AjPSeq seq, AjPSeqout seqout,
				       AjPFeattable featab, ajint before,
				       ajint after, AjBool join,
				       AjBool featinname,
				       const AjPStr describe)
{
    AjIList iter = NULL;
    AjPFeature gf = NULL;
    AjBool  single;		/* ajtrue = is not a multiple */
    AjBool  parent;		/* ajtrue = is a parent of a multiple */
    AjBool  child;		/* ajTrue = is a child of a multiple */
    AjBool  compall;		/* ajTrue = reverse comp all of join */
    AjBool  sense;		/* ajTrue = forward sense */
    AjBool  remote;		/* ajTrue = remote ID */
    AjPStr  type = NULL;	/* name of feature */
    AjPStr  featseq = NULL;	/* feature sequence string */
    AjPStr  tmpseq = NULL;	/* temporary sequence string */
    ajint   firstpos;
    ajint   lastpos;	        /* bounds of feature in sequence */
    AjPStr  describeout = NULL;	/* tag names/values to add to descriptions */
    ajuint count = 0;

    /* For all features... */
    if(featab && ajFeattableGetSize(featab))
    {
	/* initialise details of a feature */
        featseq = ajStrNew();
        tmpseq  = ajStrNew();
        type    = ajStrNew();
        remote  = ajFalse;
        compall = ajFalse;
        sense   = ajTrue;
        firstpos = 0;
        lastpos  = 0;
        describeout = ajStrNew();


	iter = ajListIterNewread(featab->Features);
	while(!ajListIterDone(iter))
	{
	    gf = ajListIterGet(iter) ;

	    /*
	    ** Determine what sort of thing this feature is. Only one of
	    ** these will be true.
	    ** True if this is part of a multiple join and it is not
	    ** the parent
	    */
	    child = ajFalse;

	    /* True if this is part of a multiple join and it is the parent */
	    parent = ajFalse;

	    /* True if this is not part of a multiple join */
	    single = ajFalse;

            if(ajFeatIsMultiple(gf))
	    {
            	if(ajFeatIsChild(gf))
            	    child = ajTrue;
		else
            	    parent = ajTrue;
            }
	    else
            	single = ajTrue;

	    /* 
	    ** If not wish to assembling joins(), then force all features
	    ** to be treated as single 
	    */
	    if(!join)
	    {
	    	child = ajFalse;
	    	parent = ajFalse;
	    	single = ajTrue;
	    }



	    ajDebug("feature %S %d-%d is parent %B, child %B, single %B\n",
		    ajFeatGetType(gf), ajFeatGetStart(gf), ajFeatGetEnd(gf),
		    parent, child, single);
/*
	    ajUser("feature %S %d-%d is parent %B, child %B, single %B",
		    ajFeatGetType(gf), ajFeatGetStart(gf), ajFeatGetEnd(gf),
		    parent, child, single);
*/
	    /*
	    ** If single or parent, write out any stored previous feature
	    ** sequence
	    */	    
            if(count++ && !child)
	    {
            	extractfeat_WriteOut(seqout, &featseq, compall, sense,
				     firstpos, lastpos, before, after, seq,
				     remote, type, 
				     featinname, describeout);

                /* reset joined feature information */
                ajStrSetClear(&featseq);
                ajStrSetClear(&tmpseq);
                ajStrSetClear(&type);
		ajStrSetClear(&describeout);
                remote = ajFalse;
                compall = ajFalse;
                sense = ajTrue;
                firstpos = 0;
                lastpos = 0;
            }


	    /* if parent, note if have Complemented Join */
            if(parent)
                compall = ajFeatIsCompMult(gf);

	    /*
	    ** Get the sense of the feature
	    ** NB.  if complementing several joined features, then pretend they
	    ** are forward sense until its possible to  reverse-complement
	    ** them all together.
	    */
	    if(!compall && ajFeatGetStrand(gf) == '-')
	        sense = ajFalse;
	    
	    /* get 'type' name of feature */
	    if(single || parent)
	    	ajStrAssignS(&type, ajFeatGetType(gf));
	    
	    /*
	    ** if single or parent, get 'before' + 'after' sequence
	    ** positions
	    */
            if(single || parent)
	    {
                firstpos = ajFeatGetStart(gf)-1;
                lastpos = ajFeatGetEnd(gf)-1;
            }
	    
	    /* if child, update the boundary positions */
            if(child)
	    {
                if(sense)
                    lastpos = ajFeatGetEnd(gf)-1;
		else
		    firstpos = ajFeatGetStart(gf)-1;
            }
	    
            extractfeat_MatchPatternDescribe(gf, describe, &describeout);
	    
	    /* get feature sequence(complement if required) */
            if(!child)
            {
                if(join)
                    ajFeatGetSeqJoin(gf, featab, seq, &tmpseq);
                else
                    ajFeatGetSeq(gf, seq, &tmpseq);
                ajDebug("extracted feature = %d bases\n", ajStrGetLen(tmpseq));
                /*ajUser("extracted feature = %d bases", ajStrGetLen(tmpseq));*/
            	ajStrAssignS(&featseq, tmpseq);
	    }
	}
	ajListIterDel(&iter) ;
	
	/*
	** write out any previous sequence(s)
	** - add before + after, complement all
	*/
        extractfeat_WriteOut(seqout, &featseq, compall, sense,
			     firstpos, lastpos, before, after,
			     seq, remote, type, 
			     featinname, describeout);
	
        ajStrDel(&featseq);
        ajStrDel(&tmpseq);
        ajStrDel(&type);
        ajStrDel(&describeout);
    }
    
    return;
}
Ejemplo n.º 9
0
AjPPatlistRegex ajPatlistRegexRead (const AjPStr patspec,
				    const AjPStr patname,
				    const AjPStr fmt,
				    ajuint type, AjBool upper, AjBool lower)
{
    AjPPatlistRegex patlist = NULL;
    AjPStr line = NULL;
    AjPStr pat  = NULL;
    AjPStr name = NULL;
    AjPFilebuff infile = NULL;
    AjPStr patstr = NULL;
    ajuint ifmt;
    ajuint npat = 0;
    AjPStr namestr = NULL;

    ajStrAssignS(&namestr, patname);
    ajStrAssignEmptyC(&namestr, "regex");

    ajStrAssignS(&patstr, patspec);

    patlist = ajPatlistRegexNewType(type);

    ifmt = patternRegexFormat(fmt);

    if(ajStrGetCharFirst(patspec) ==  '@')
    {
	ajStrCutStart(&patstr, 1);
	infile = ajFilebuffNewNameS(patstr);

	if(!infile)
        {
	    ajErr("Unable to open regular expression file '%S'", patstr);
	    return NULL;
	}

	line = ajStrNew();
	pat  = ajStrNew();
	name = ajStrNew();

	if(!ifmt)
	{
	    ajBuffreadLineTrim(infile,&line);

	    if(ajStrPrefixC(line, ">"))
		ifmt = 2;
	    else
		ifmt = 1;
	    ajFilebuffReset(infile);
	}
	
	switch(ifmt)
	{
	case 1:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		npat++;
		ajStrAppendS (&pat,line);

		if(lower)
		    ajStrFmtLower(&pat);

		if(upper)
		    ajStrFmtUpper(&pat);

		ajFmtPrintS(&name, "%S%u", namestr, npat);
		ajPatternRegexNewList(patlist,name,pat);
		ajStrSetClear(&pat);
	    }
	    break;
	default:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		if (ajStrFindC(line,">")>-1)
		{
		    npat++;

		    if (ajStrGetLen(name))
		    {
			if(lower)
			    ajStrFmtLower(&pat);

			if(upper)
			    ajStrFmtUpper(&pat);

			ajPatternRegexNewList(patlist,name,pat);
			ajStrSetClear(&name);
			ajStrSetClear(&pat);
		    }
		    ajStrCutStart(&line,1);
		    ajStrAssignS (&name,line);

		    if(!ajStrGetLen(name))
			ajFmtPrintS(&name, "%S%u", namestr, npat);
		}
		else
		    ajStrAppendS (&pat,line);
	    }

	    ajStrAssignEmptyS(&name, patname);
	    ajPatternRegexNewList(patlist,name,pat);
	    ajStrSetClear(&pat);
	    break;
	}

	ajFilebuffDel(&infile);
    }
    else
    {
	ajStrAssignS(&pat, patspec);

	if(lower)
	    ajStrFmtLower(&pat);

	if(upper)
	    ajStrFmtUpper(&pat);

	ajStrAssignS(&name, namestr);
	ajPatternRegexNewList(patlist,name,pat);
    }

    ajStrDel(&name);
    ajStrDel(&namestr);
    ajStrDel(&patstr);
    ajStrDel(&line);
    ajStrDel(&pat);

    return patlist;
}
Ejemplo n.º 10
0
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec,
				const AjPStr patname,
				const AjPStr fmt,
				AjBool protein, ajuint mismatches)
{
    AjPPatlistSeq patlist = NULL;
    AjPStr line = NULL;
    AjPStr name = NULL;
    AjPFilebuff infile = NULL;
    AjPRegexp mismreg = NULL;
    AjPStr patstr = NULL;
    AjPStr pat = NULL;
    ajuint mismatch = 0;
    ajint ifmt = 0;
    ajuint npat = 0;
    AjPStr namestr = NULL;

    ajStrAssignS(&namestr, patname);
    ajStrAssignEmptyC(&namestr, "pattern");

    ajStrAssignS(&patstr, patspec);

    patlist = ajPatlistSeqNewType(protein);

    ifmt = patternSeqFormat(fmt);

    ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' "
	    "protein: %B mismatches: %d\n",
	    patspec, patname, protein, mismatches);

    if(ajStrGetCharFirst(patstr) == '@')
    {
	ajStrCutStart(&patstr, 1);
	infile = ajFilebuffNewNameS(patstr);

	if(!infile)
	{
	    ajErr("Unable to open pattern file '%S'", patstr);

	    return NULL;
	}

	line = ajStrNew();
	name = ajStrNew();

	if(!ifmt)
	{
	    ajBuffreadLineTrim(infile,&line);

	    if(ajStrPrefixC(line, ">"))
		ifmt = 2;
	    else
		ifmt = 1;
	    ajFilebuffReset(infile);
	}
	
	switch(ifmt)
	{
	case 1:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		npat++;
		ajStrAppendS (&pat,line);
		ajFmtPrintS(&name, "%S%u", namestr, npat);
		ajPatternSeqNewList(patlist,name,pat,mismatches);
		ajStrSetClear(&pat);
	    }
	    break;
	default:
	    mismreg = ajRegCompC("<mismatch=(\\d+)>");

	    while (ajBuffreadLineTrim(infile,&line))
	    {
		if (ajStrGetCharFirst(line) == '>')
		{
		    if (ajStrGetLen(name))
		    {
			ajPatternSeqNewList(patlist,name,pat,
					    mismatch);
			ajStrSetClear(&name);
			ajStrSetClear(&pat);
			mismatch=mismatches;
		    }

		    ajStrCutStart(&line,1);

		    if (ajRegExec(mismreg,line))
		    {
			ajRegSubI(mismreg,1,&name);
			ajStrToUint(name,&mismatch);
			ajStrTruncateLen(&line,ajRegOffset(mismreg));
			ajStrTrimWhiteEnd(&line);
		    }
		    ajStrAssignS (&name,line);
		    ajStrAssignEmptyS(&name, patname);
		}
		else
		    ajStrAppendS (&pat,line);
	    }

	    ajStrAssignEmptyS(&name, patname);
	    ajPatternSeqNewList(patlist,name,pat,mismatch);
	    ajRegFree(&mismreg);
	    break;
	}

	ajFilebuffDel(&infile);
    }
    else
    {
        ajStrAssignS(&name, namestr);
	ajPatternSeqNewList(patlist,name,patstr,mismatches);
    }

    ajStrDel(&name);
    ajStrDel(&line);
    ajStrDel(&pat);
    ajStrDel(&namestr);
    ajStrDel(&patstr);

    return patlist;
}
Ejemplo n.º 11
0
int main(int argc, char **argv)
{
    /* Global details */
    AjBool explain_flag;
    AjBool file_flag;
    AjPStr* task;
    AjBool do_primer;
    AjBool do_hybrid;
    ajint num_return;
    ajint first_base_index;

    /* "Sequence" Input Tags */
    AjPSeqall sequence;
    AjPRange included_region;
    AjPRange target;
    AjPRange excluded_region;
    AjPStr left_input;
    AjPStr right_input;

    /* Primer details */
    AjBool pick_anyway;
    AjPFile mispriming_library;
    float max_mispriming;
    float pair_max_mispriming;
    ajint gc_clamp;
    ajint opt_size;
    ajint min_size;
    ajint max_size;
    float opt_tm;
    float min_tm;
    float max_tm;
    float max_diff_tm;
    float opt_gc_percent;
    float min_gc;
    float max_gc;
    float salt_conc;
    float dna_conc;
    ajint num_ns_accepted;
    float self_any;
    float self_end;
    ajint max_poly_x;

    /* Sequence Quality. These are not (yet) implemented */
    /*
       AjPFile sequence_quality;
       ajint	min_quality;
       ajint	min_end_quality;
       ajint	quality_range_min;
       ajint	quality_range_max;
       */

    /* Product details */
    ajint product_opt_size;
    AjPRange product_size_range;
    float product_opt_tm;
    float product_min_tm;
    float product_max_tm;

    /* Objective Function Penalty Weights for Primers */
    float max_end_stability;

    /* these are not (yet) implemented */
    /*
       float		inside_penalty;
       float		outside_penalty;
    */

    /* Primer penalties */
    /* these are not (yet) implemented */

    /* Internal Oligo "Sequence" Input Tags */
    AjPRange internal_oligo_excluded_region;

    /* Internal Oligo "Global" Input Tags */
    AjPStr internal_oligo_input;
    ajint internal_oligo_opt_size;
    ajint internal_oligo_min_size;
    ajint internal_oligo_max_size;
    float internal_oligo_opt_tm;
    float internal_oligo_min_tm;
    float internal_oligo_max_tm;
    float internal_oligo_opt_gc_percent;
    float internal_oligo_min_gc;
    float internal_oligo_max_gc;
    float internal_oligo_salt_conc;
    float internal_oligo_dna_conc;
    float internal_oligo_self_any;
    float internal_oligo_self_end;
    ajint internal_oligo_max_poly_x;
    AjPFile internal_oligo_mishyb_library;
    float internal_oligo_max_mishyb;

    /*
       ajint		internal_oligo_min_quality;
    */

    /* Internal Oligo penalties */
    /* these are not (yet) implemented */

    /* EMBOSS-wrapper-specific stuff */
    AjPFile	outfile;

    /* other variables */
    AjPStr result = NULL;
    AjPStr strand = NULL;
    AjPStr substr = NULL;
    AjPSeq seq    = NULL;
    ajint begin   = 0;
    ajint end;
    FILE* stream;
    AjPStr taskstr  = NULL;
    const AjPStr program = NULL;

    /* pipe variables */

    int *pipeto;	  /* pipe to feed the exec'ed program input */
    int *pipefrom;	  /* pipe to get the exec'ed program output */

    embInit("eprimer3", argc, argv);

    /* Global details */
    explain_flag     = ajAcdGetBoolean("explainflag");
    file_flag        = ajAcdGetBoolean("fileflag");
    task             = ajAcdGetList("task");
    do_primer        = ajAcdGetToggle("primer");
    do_hybrid        = ajAcdGetToggle("hybridprobe");
    num_return       = ajAcdGetInt("numreturn");
    first_base_index = ajAcdGetInt("firstbaseindex");

    /* "Sequence" Input Tags */
    sequence        = ajAcdGetSeqall("sequence");
    included_region = ajAcdGetRange("includedregion");
    target          = ajAcdGetRange("targetregion");
    excluded_region = ajAcdGetRange("excludedregion");
    left_input      = ajAcdGetString("forwardinput");
    right_input     = ajAcdGetString("reverseinput");

    /* Primer details */
    pick_anyway         = ajAcdGetBoolean("pickanyway");
    mispriming_library  = ajAcdGetInfile("mispriminglibraryfile");
    max_mispriming      = ajAcdGetFloat("maxmispriming");
    pair_max_mispriming = ajAcdGetFloat("pairmaxmispriming");
    gc_clamp            = ajAcdGetInt("gcclamp");
    opt_size            = ajAcdGetInt("osize");
    min_size            = ajAcdGetInt("minsize");
    max_size            = ajAcdGetInt("maxsize");
    opt_tm              = ajAcdGetFloat("otm");
    min_tm              = ajAcdGetFloat("mintm");
    max_tm              = ajAcdGetFloat("maxtm");
    max_diff_tm         = ajAcdGetFloat("maxdifftm");
    opt_gc_percent      = ajAcdGetFloat("ogcpercent");
    min_gc              = ajAcdGetFloat("mingc");
    max_gc              = ajAcdGetFloat("maxgc");
    salt_conc           = ajAcdGetFloat("saltconc");
    dna_conc            = ajAcdGetFloat("dnaconc");
    num_ns_accepted     = ajAcdGetInt("numnsaccepted");
    self_any            = ajAcdGetFloat("selfany");
    self_end            = ajAcdGetFloat("selfend");
    max_poly_x          = ajAcdGetInt("maxpolyx");

    AJCNEW0(pipeto,2);
    AJCNEW0(pipefrom,2);

    /* Sequence Quality */
    /* these are not (yet) implemented */
    /*
       sequence_quality  = ajAcdGetInfile("sequencequality");
       min_quality       = ajAcdGetInt("minquality");
       min_end_quality   = ajAcdGetInt("minendquality");
       quality_range_min = ajAcdGetInt("qualityrangemin");
       quality_range_max = ajAcdGetInt("qualityrangemax");
       */

    /* Product details */
    product_opt_size    = ajAcdGetInt("psizeopt");
    product_size_range  = ajAcdGetRange("prange");
    product_opt_tm      = ajAcdGetFloat("ptmopt");
    product_min_tm      = ajAcdGetFloat("ptmmin");
    product_max_tm      = ajAcdGetFloat("ptmmax");

    /* Objective Function Penalty Weights for Primers */
    max_end_stability   = ajAcdGetFloat("maxendstability");
    /* these are not (yet) implemented */
    /*
       inside_penalty      = ajAcdGetFloat("insidepenalty");
       outside_penalty     = ajAcdGetFloat("outsidepenalty");
    */

    /* Primer penalties */
    /* these are not (yet) implemented */

    /* Internal Oligo "Sequence" Input Tags */
    internal_oligo_excluded_region = ajAcdGetRange("oexcludedregion");
    internal_oligo_input           = ajAcdGetString("oligoinput");

    /* Internal Oligo "Global" Input Tags */
    internal_oligo_opt_size       = ajAcdGetInt("osizeopt");
    internal_oligo_min_size       = ajAcdGetInt("ominsize");
    internal_oligo_max_size       = ajAcdGetInt("omaxsize");
    internal_oligo_opt_tm         = ajAcdGetFloat("otmopt");
    internal_oligo_min_tm         = ajAcdGetFloat("otmmin");
    internal_oligo_max_tm         = ajAcdGetFloat("otmmax");
    internal_oligo_opt_gc_percent = ajAcdGetFloat("ogcopt");
    internal_oligo_min_gc         = ajAcdGetFloat("ogcmin");
    internal_oligo_max_gc         = ajAcdGetFloat("ogcmax");
    internal_oligo_salt_conc      = ajAcdGetFloat("osaltconc");
    internal_oligo_dna_conc       = ajAcdGetFloat("odnaconc");
    internal_oligo_self_any       = ajAcdGetFloat("oanyself");
    internal_oligo_self_end       = ajAcdGetFloat("oendself");
    internal_oligo_max_poly_x     = ajAcdGetInt("opolyxmax");
    internal_oligo_mishyb_library = ajAcdGetInfile("mishyblibraryfile");
    internal_oligo_max_mishyb     = ajAcdGetFloat("omishybmax");
    /*
       internal_oligo_min_quality    = ajAcdGetInt("oligominquality");
    */

    /* Internal Oligo penalties */
    /* these are not (yet) implemented */


    /* EMBOSS-wrapper-specific stuff */
    outfile = ajAcdGetOutfile("outfile");


    ajStrRemoveWhite(&left_input);
    ajStrRemoveWhite(&right_input);

    /*
    ** OK - we will now try to do a separate fork-exec for each sequence.
    */

    result = ajStrNew();

    while(ajSeqallNext(sequence, &seq))
    {
        program = ajAcdGetpathC("primer3_core");

        if(!ajSysExecRedirectC(ajStrGetPtr(program),&pipeto,&pipefrom))
            ajFatal("eprimer3: Could not exec primer3_core");

        stream = eprimer3_start_write(pipeto[1]);

        /* send primer3 Primer "Global" parameters */
        eprimer3_send_bool(stream, "PRIMER_EXPLAIN_FLAG", explain_flag);
        eprimer3_send_bool(stream, "PRIMER_FILE_FLAG", file_flag);

        if(do_hybrid)
        {
            if(!ajStrCmpC(task[0], "1"))
                ajStrAssignC(&taskstr, "pick_pcr_primers_and_hyb_probe");
            else if(!ajStrCmpC(task[0], "2"))
                ajStrAssignC(&taskstr, "pick_left_only");
            else if(!ajStrCmpC(task[0], "3"))
                ajStrAssignC(&taskstr, "pick_right_only");
            else if(!ajStrCmpC(task[0], "4"))
                ajStrAssignC(&taskstr, "pick_hyb_probe_only");

            if (!do_primer)
                ajStrAssignC(&taskstr, "pick_hyb_probe_only");
        }
        else
        {
            if(!ajStrCmpC(task[0], "1"))
                ajStrAssignC(&taskstr, "pick_pcr_primers");
            else if(!ajStrCmpC(task[0], "2"))
                ajStrAssignC(&taskstr, "pick_left_only");
            else if(!ajStrCmpC(task[0], "3"))
                ajStrAssignC(&taskstr, "pick_right_only");
            else if(!ajStrCmpC(task[0], "4"))
                ajStrAssignC(&taskstr, "pick_hyb_probe_only");
        }

        eprimer3_send_string(stream, "PRIMER_TASK", taskstr);
        eprimer3_send_int(stream, "PRIMER_NUM_RETURN", num_return);
        eprimer3_send_int(stream, "PRIMER_FIRST_BASE_INDEX",
                          first_base_index);
        eprimer3_send_bool(stream, "PRIMER_PICK_ANYWAY", pick_anyway);

        /* mispriming library may not have been specified */
        if(mispriming_library)
            eprimer3_send_stringC(stream, "PRIMER_MISPRIMING_LIBRARY",
                                  ajFileGetPrintnameC(mispriming_library));

        eprimer3_send_float(stream, "PRIMER_MAX_MISPRIMING",
                            max_mispriming);
        eprimer3_send_float(stream, "PRIMER_PAIR_MAX_MISPRIMING",
                            pair_max_mispriming);
        eprimer3_send_int(stream, "PRIMER_GC_CLAMP", gc_clamp);
        eprimer3_send_int(stream, "PRIMER_OPT_SIZE", opt_size);
        eprimer3_send_int(stream, "PRIMER_MIN_SIZE", min_size);
        eprimer3_send_int(stream, "PRIMER_MAX_SIZE", max_size);
        eprimer3_send_float(stream, "PRIMER_OPT_TM", opt_tm);
        eprimer3_send_float(stream, "PRIMER_MIN_TM", min_tm);
        eprimer3_send_float(stream, "PRIMER_MAX_TM", max_tm);
        eprimer3_send_float(stream, "PRIMER_MAX_DIFF_TM", max_diff_tm);
        eprimer3_send_float(stream, "PRIMER_OPT_GC_PERCENT",
                            opt_gc_percent);
        eprimer3_send_float(stream, "PRIMER_MIN_GC", min_gc);
        eprimer3_send_float(stream, "PRIMER_MAX_GC", max_gc);
        eprimer3_send_float(stream, "PRIMER_SALT_CONC", salt_conc);
        eprimer3_send_float(stream, "PRIMER_DNA_CONC", dna_conc);
        eprimer3_send_int(stream, "PRIMER_NUM_NS_ACCEPTED",
                          num_ns_accepted);
        eprimer3_send_float(stream, "PRIMER_SELF_ANY", self_any);
        eprimer3_send_float(stream, "PRIMER_SELF_END", self_end);
        eprimer3_send_int(stream, "PRIMER_MAX_POLY_X", max_poly_x);
        eprimer3_send_int(stream, "PRIMER_PRODUCT_OPT_SIZE",
                          product_opt_size);
        eprimer3_send_range2(stream, "PRIMER_PRODUCT_SIZE_RANGE",
                             product_size_range);
        eprimer3_send_float(stream, "PRIMER_PRODUCT_OPT_TM",
                            product_opt_tm);
        eprimer3_send_float(stream, "PRIMER_PRODUCT_MIN_TM",
                            product_min_tm);
        eprimer3_send_float(stream, "PRIMER_PRODUCT_MAX_TM",
                            product_max_tm);
        eprimer3_send_float(stream, "PRIMER_MAX_END_STABILITY",
                            max_end_stability);

        /* send primer3 Internal Oligo "Global" parameters */
        eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_OPT_SIZE",
                          internal_oligo_opt_size);
        eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MIN_SIZE",
                          internal_oligo_min_size);
        eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MAX_SIZE",
                          internal_oligo_max_size);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_OPT_TM",
                            internal_oligo_opt_tm);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MIN_TM",
                            internal_oligo_min_tm);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_TM",
                            internal_oligo_max_tm);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_OPT_GC_PERCENT",
                            internal_oligo_opt_gc_percent);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MIN_GC",
                            internal_oligo_min_gc);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_GC",
                            internal_oligo_max_gc);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SALT_CONC",
                            internal_oligo_salt_conc);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_DNA_CONC",
                            internal_oligo_dna_conc);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SELF_ANY",
                            internal_oligo_self_any);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SELF_END",
                            internal_oligo_self_end);
        eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MAX_POLY_X",
                          internal_oligo_max_poly_x);

        /*
        ** internal oligo mishybridising library may not have been
        ** specified
        */
        if(internal_oligo_mishyb_library)
            eprimer3_send_stringC(stream,
                                  "PRIMER_INTERNAL_OLIGO_MISHYB_LIBRARY",
                                  ajFileGetPrintnameC(internal_oligo_mishyb_library));

        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_MISHYB",
                            internal_oligo_max_mishyb);


        /*
        ** Start sequence-specific stuff
        */

        begin = ajSeqallGetseqBegin(sequence) - 1;
        end   = ajSeqallGetseqEnd(sequence) - 1;

        strand = ajSeqGetSeqCopyS(seq);

        ajStrFmtUpper(&strand);
        ajStrAssignSubC(&substr,ajStrGetPtr(strand), begin, end);

        /* send flags to turn on using optimal product size */
        eprimer3_send_float(stream, "PRIMER_PAIR_WT_PRODUCT_SIZE_GT",
                            (float)0.05);
        eprimer3_send_float(stream, "PRIMER_PAIR_WT_PRODUCT_SIZE_LT",
                            (float)0.05);

        /* send primer3 Primer "Sequence" parameters */
        eprimer3_send_string(stream, "SEQUENCE", substr);

        /* if no ID name, use the USA */
        if(ajStrMatchC(ajSeqGetNameS(seq),""))
            eprimer3_send_string(stream, "PRIMER_SEQUENCE_ID",
                                 ajSeqGetUsaS(seq));
        else
            eprimer3_send_string(stream, "PRIMER_SEQUENCE_ID",
                                 ajSeqGetNameS(seq));

        eprimer3_send_range(stream, "INCLUDED_REGION", included_region,
                            begin);
        eprimer3_send_range(stream, "TARGET", target, begin);
        eprimer3_send_range(stream, "EXCLUDED_REGION", excluded_region,
                            begin);
        eprimer3_send_string(stream, "PRIMER_LEFT_INPUT", left_input);
        eprimer3_send_string(stream, "PRIMER_RIGHT_INPUT", right_input);

        /* send primer3 Internal Oligo "Sequence" parameters */
        eprimer3_send_range(stream,
                            "PRIMER_INTERNAL_OLIGO_EXCLUDED_REGION",
                            internal_oligo_excluded_region, begin);
        eprimer3_send_string(stream, "PRIMER_INTERNAL_OLIGO_INPUT",
                             internal_oligo_input);


        /* end the primer3 input sequence record with a '=' */
        eprimer3_send_end(stream);
        /* and close the ouput pipe stream */
        eprimer3_end_write(stream);

        /* read the primer3 output */
        eprimer3_read(pipefrom[0], &result);

        eprimer3_report(outfile, result, num_return, begin);

        ajStrSetClear(&result);

#ifndef WIN32
        close(pipeto[1]);
        close(pipefrom[0]);
#endif
    }	/* end of sequence loop */


    ajStrDel(&result);
    ajSeqDel(&seq);
    ajStrDel(&strand);
    ajStrDel(&substr);
    ajFileClose(&outfile);
    ajStrDel(&taskstr);
    ajStrDelarray(&task);

    ajSeqallDel(&sequence);
    ajSeqDel(&seq);

    ajRangeDel(&included_region);
    ajRangeDel(&target);
    ajRangeDel(&excluded_region);
    ajRangeDel(&product_size_range);
    ajRangeDel(&internal_oligo_excluded_region);

    ajStrDel(&left_input);
    ajStrDel(&right_input);
    ajStrDel(&internal_oligo_input);

    AJFREE(pipeto);
    AJFREE(pipefrom);

    ajFileClose(&mispriming_library);

    embExit();

    return 0;
}
Ejemplo n.º 12
0
int main(int argc, char **argv)
{
    ajint     famn      = 0;	 /* Counter for the families.                */
    ajint     nset      = 0;	 /* No. entries in family.                   */
    
    ajint     last_nodeid = 0;   /* SCOP Sunid of last family that was 
				    processed.                               */
    AjPStr    last_node  = NULL; /* Last family that was processed.          */
    AjPStr    exec       = NULL; /* The UNIX command line to be executed.    */
    AjPStr    out        = NULL; /* Name of stamp alignment file.            */
    AjPStr    align      = NULL; /* Name of sequence alignment file.         */
    AjPStr    alignc     = NULL; /* Name of structure alignment file.        */
    AjPStr    log        = NULL; /* Name of STAMP log file.                  */
    AjPStr    dom        = NULL; /* Name of file containing single domain.   */
    AjPStr    set        = NULL; /* Name of file containing set of domains.  */
    AjPStr    scan       = NULL; /* Name of temp. file used by STAMP.        */
    AjPStr    sort       = NULL; /* Name of temp. file used by STAMP.        */
    AjPStr    name       = NULL; /* Base name of STAMP temp files.           */
    AjPStr    pdbnames   = NULL; /* Names of domain pdb files to be passed to
				    TCOFFEEE.                                */
    AjPDir    pdb        = NULL; /* Path of domain coordinate files (pdb 
				    format input).                           */
    AjPDirout daf        = NULL; /* Path of sequence alignment files for output. */
    AjPDirout super      = NULL; /* Path of structure alignment files for output. */
    AjPDirout singlets   = NULL; /* Path of FASTA singlet sequence files for output. */
    AjPStr    temp1      = NULL; /* A temporary string.                      */

    AjPFile   dcfin      = NULL; /* File pointer for original Escop.dat file.*/
    AjPFile   domf       = NULL; /* File pointer for single domain file.     */
    AjPFile   setf       = NULL; /* File pointer for domain set file.        */
    AjPFile   logf       = NULL; /* Log file. */

    AjPDomain domain     = NULL; /* Pointer to domain structure.             */
    AjPDomain prevdomain = NULL; /* Pointer to previous domain structure.    */

    ajint     type       = 0;    /* Type of domain (ajSCOP or ajCATH) in the 
				    DCF file.                                */

    AjPStr   *node       = NULL; /* Node of alignment         .              */
    ajint     noden      = 0;    /*1: Class (SCOP), 2: Fold (SCOP) etc, see 
				   ACD file.                                 */

    AjPStr   *mode       = NULL; /* Mode of operation from acd*/
    ajint     moden      = 0;    /* Program mode, 1: MODE_STAMP, 2: MODE_TCOFFEE (not
				    yet implemented). */
    AjBool    keepsinglets= ajFalse; /*Whether to retain sequences of singlet families
				       and write them to an output file.         */

    AjPStr    temp      = NULL;	/* A temporary string.                       */
    AjPStr    cmd       = NULL; /* The command line to execute t-coffee.     */





    /* Initialise strings etc*/
    last_node = ajStrNew();
    exec     = ajStrNew();
    out      = ajStrNew();
    align    = ajStrNew();
    alignc   = ajStrNew();
    log      = ajStrNew();
    dom      = ajStrNew();
    set      = ajStrNew();
    scan     = ajStrNew();
    sort     = ajStrNew();
    name     = ajStrNew();
    temp     = ajStrNew();
    temp1    = ajStrNew();
    cmd      = ajStrNew();
    pdbnames = ajStrNew();




    /* Read data from acd. */
    embInitPV("domainalign",argc,argv,"DOMALIGN",VERSION);

    dcfin       = ajAcdGetInfile("dcfinfile");
    pdb           = ajAcdGetDirectory("pdbdir");
    daf          = ajAcdGetOutdir("dafoutdir");
    super         = ajAcdGetOutdir("superoutdir");
    singlets      = ajAcdGetOutdir("singletsoutdir");
    node          = ajAcdGetList("node");
    mode          = ajAcdGetList("mode");    
    keepsinglets  = ajAcdGetToggle("keepsinglets");
    logf          = ajAcdGetOutfile("logfile");
   

    /* Convert the selected node and mode to an integer. */
    if(!(ajStrToInt(node[0], &noden)))
	ajFatal("Could not parse ACD node option");
    if(!(ajStrToInt(mode[0], &moden)))
	ajFatal("Could not parse ACD node option");


    /* Initialise random number generator for naming of temp. files. */
    ajRandomSeed();
    ajFilenameSetTempname(&name);


    /* Create names for temp. files. */
    ajStrAssignS(&log, name);	
    ajStrAppendC(&log, ".log");
    ajStrAssignS(&dom, name);	
    ajStrAppendC(&dom, ".dom");
    ajStrAssignS(&set, name);	
    ajStrAppendC(&set, ".set");
    ajStrAssignS(&scan, name);	
    ajStrAppendC(&scan, ".scan");
    ajStrAssignS(&sort, name);
    ajStrAppendC(&sort, ".sort");
    ajStrAssignS(&out, name);	
    ajStrAppendC(&out, ".out");


    /* Initialise last_node with something that is not in SCOP. */
    ajStrAssignC(&last_node,"!!!!!");
    
    

    /* Open STAMP domain set file. */
    if(moden == MODE_STAMP)
    {
	if(!(setf=ajFileNewOutNameS(set)))
	    ajFatal("Could not open domain set file\n");
    }
    

    /* Get domain type. */
    type = ajDomainDCFType(dcfin);


    /* Start of main application loop. */
    while((domain=(ajDomainReadCNew(dcfin, "*", type))))
    {
	/* A new family. */
	if(((domain->Type == ajSCOP) &&
	    (((noden==1) && (last_nodeid != domain->Scop->Sunid_Class))      ||
	     ((noden==2) && (last_nodeid != domain->Scop->Sunid_Fold))       ||
	     ((noden==3) && (last_nodeid != domain->Scop->Sunid_Superfamily))||
	     ((noden==4) && (last_nodeid != domain->Scop->Sunid_Family))))   ||
	   ((domain->Type == ajCATH) &&
	    (((noden==5) && (last_nodeid != domain->Cath->Class_Id))         ||
	     ((noden==6) && (last_nodeid != domain->Cath->Arch_Id))          ||
	     ((noden==7) && (last_nodeid != domain->Cath->Topology_Id))      ||
	     ((noden==8) && (last_nodeid != domain->Cath->Superfamily_Id))   ||
	     ((noden==9) && (last_nodeid != domain->Cath->Family_Id)))))
	{
	    /* If we have done the first family. */
	    if(famn)
	    {

		/* Create the output file for the alignment - the name will
		   be the same as the Sunid for the DOMAIN family. */
		domainalign_writeid(prevdomain, noden, daf, super,
				    &align, &alignc);

		if(moden == MODE_STAMP)
		{
		    /* Close domain set file. */
		    ajFileClose(&setf);	

		    /* Call STAMP. */
		    
		    /* Family with 2 or more entries. */
		    if(nset > 1)
		    {
			domainalign_stamp(prevdomain, 
					  domain, 
					  daf, 
					  super,
					  singlets, 
					  align, 
					  alignc, 
					  dom, 
					  name, 
					  set, 
					  scan, 
					  sort, 
					  log, 
					  out, 
					  keepsinglets, 
					  moden, 
					  noden,
					  nset, 
					  logf);
		    }
		    
		    else if(keepsinglets) /* Singlet family. */	
			domainalign_keepsinglets(prevdomain, noden,
						 singlets, logf);
			

		    /* Open STAMP domain set file. */
		    if(!(setf=ajFileNewOutNameS(set)))
			ajFatal("Could not open domain set file\n");
		}
		else
		{
		    /* Call TCOFEE. */
		    if(nset > 1)
			domainalign_tcoffee(prevdomain, out, align,
					    alignc, pdbnames, noden, logf);
		    else if(keepsinglets) /* Singlet family. */	
			domainalign_keepsinglets(prevdomain, noden,
						 singlets, logf);
		}

		/* Set the number of members of the new family to zero. */
		nset = 0;

		/* Clear TCOFFEE argument. */    
		ajStrSetClear(&pdbnames);
	    }	
	    
	    
	    /* Open, write and close STAMP domain file. */
	    if(moden == MODE_STAMP)
	    {
		if(!(domf=ajFileNewOutNameS(dom)))
		    ajFatal("Could not open domain file\n");
		ajStrAssignS(&temp, ajDomainGetId(domain));
		ajStrFmtLower(&temp);
		ajFmtPrintF(domf, "%S %S { ALL }\n", temp, temp);
		ajFileClose(&domf);	
	    }
	    
	    
	    /* Copy current family name to last_node. */
	    domainalign_writelast(domain, noden, &last_node, &last_nodeid);
	    
	    /* Copy current domain pointer to prevdomain. */
	    ajDomainDel(&prevdomain);
	    prevdomain=NULL;
	    ajDomainCopy(&prevdomain, domain);

	    /* Increment family counter. */
	    famn++;
	}
	
						
	ajStrAssignS(&temp, ajDomainGetId(domain));
	ajStrFmtLower(&temp);

	/* Write STAMP domain set file. */
	if(moden == MODE_STAMP)
	    ajFmtPrintF(setf, "%S %S { ALL }\n", temp, temp);
	/* Write TCOFFEE argument. */    
	else
	{
	    ajStrAppendS(&pdbnames, ajDirGetPath(pdb));
	    ajStrAppendS(&pdbnames, temp);
	    ajStrAppendC(&pdbnames, ".");
	    ajStrAppendS(&pdbnames, ajDirGetExt(pdb));
	    ajStrAppendC(&pdbnames, " ");
	}
	
	ajDomainDel(&domain);

	/* Increment number of members in family. */
	nset++;
    }
    
    /* End of main application loop. */
    domain=prevdomain;
    

    ajFmtPrint("\nProcessing node %d\n", last_nodeid);
    


    /* Create the output file for the alignment - the name will
       be the same as the Sunid for the DOMAIN family. */
    domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc);



    /* Code to process last family. */
    if(moden == MODE_STAMP)
    {
	/*Close domain set file. */
	ajFileClose(&setf);	

		
	/*    ajFmtPrint("\n***** SECOND CALL\n");. */
	if(nset > 1)
	{
	    domainalign_stamp(prevdomain, 
			      domain, 
			      daf, 
			      super,
			      singlets, 
			      align, 
			      alignc, 
			      dom, 
			      name, 
			      set, 
			      scan, 
			      sort, 
			      log, 
			      out, 
			      keepsinglets, 
			      moden, 
			      noden,
			      nset, 
			      logf);
	}
	
	else if(keepsinglets) /* Singlet family. */	
	    domainalign_keepsinglets(prevdomain, noden, singlets, logf);
			
    }
    else
    {
	/* Call TCOFEE. */
	if(nset > 1)
	    domainalign_tcoffee(prevdomain, out, align, alignc, 
				pdbnames, noden, logf);
	else if(keepsinglets) /* Singlet family. */	
	    domainalign_keepsinglets(prevdomain, noden, singlets, logf);
    }


    /* Remove all temporary files. */

    ajSysFileUnlinkS(log);
    ajSysFileUnlinkS(dom);
    ajSysFileUnlinkS(set);
    ajSysFileUnlinkS(scan);
    ajSysFileUnlinkS(sort);
    ajSysFileUnlinkS(out);
    ajStrAssignS(&temp, name);	
    ajStrAppendC(&temp, ".mat");
    ajSysFileUnlinkS(temp);



    /* Tidy up*/
    ajDomainDel(&domain);
    ajFileClose(&dcfin);	
    ajStrDel(&last_node);
    ajStrDel(&exec);
    ajStrDel(&log);
    ajStrDel(&dom);
    ajStrDel(&set);
    ajStrDel(&scan);
    ajStrDel(&sort);
    ajStrDel(&name);
    ajStrDel(&out);
    ajStrDel(&align);
    ajStrDel(&alignc);
    ajStrDel(&pdbnames);
    ajDirDel(&pdb); 
    ajDiroutDel(&daf); 
    ajDiroutDel(&super); 
    ajDiroutDel(&singlets); 
    ajStrDel(&temp); 
    ajStrDel(&temp1); 
    ajStrDel(&node[0]);
    AJFREE(node);
    ajStrDel(&mode[0]);
    AJFREE(mode);
    ajFileClose(&logf);
    
    ajExit();
    return 0;
}
Ejemplo n.º 13
0
void getorf_FindORFs(const AjPSeq seq, ajint len, const AjPTrn trnTable,
                     ajuint minsize, ajuint maxsize, AjPSeqout seqout,
                     AjBool sense, AjBool circular, ajint find,
                     ajint *orf_no, AjBool methionine, ajint around,
                     ORFrec *record) {
  AjBool ORF[3];            /* true if found an ORF */
  AjBool LASTORF[3];         /* true if hit the end of an ORF past
                    the end on the genome in this
                    frame */
  AjBool GOTSTOP[3];         /* true if found a STOP in a circular
                    genome's frame when
                    find = P_STOP2STOP or
                    N_STOP2STOP */
  ajint start[3];          /* possible starting position of the
                     three frames */
  ajint pos;
  ajint codon;
  char aa;
  ajint frame;
  AjPStr newstr[3];         /* strings of the three frames of ORF
                    sequences that we are growing */
  AjPSeq pep = NULL;
  ajint i;

  ajint seqlen;
  const char *chrseq;

  seqlen = ajSeqGetLen(seq);
  chrseq = ajSeqGetSeqC(seq);

  /* initialise the ORF sequences */
  newstr[0] = NULL;
  newstr[1] = NULL;
  newstr[2] = NULL;

  /*
  ** initialise flags for found the last ORF past the end of a circular
  ** genome
  */
  LASTORF[0] = ajFalse;
  LASTORF[1] = ajFalse;
  LASTORF[2] = ajFalse;

  /* initialise flags for found at least one STOP codon in a frame */
  GOTSTOP[0] = ajFalse;
  GOTSTOP[1] = ajFalse;
  GOTSTOP[2] = ajFalse;

  if (circular || find == P_START2STOP || find == N_START2STOP ||
      find == AROUND_START) {
    ORF[0] = ajFalse;
    ORF[1] = ajFalse;
    ORF[2] = ajFalse;
  } else {
    /*
    ** assume already in a ORF so we get ORFs at the start of the
    ** sequence
    */
    ORF[0] = ajTrue;
    ORF[1] = ajTrue;
    ORF[2] = ajTrue;
    start[0] = 0;
    start[1] = 1;
    start[2] = 2;
  }

  for (pos=0; pos<seqlen-2; pos++) {
    codon = ajTrnStartStopC(trnTable, &chrseq[pos], &aa);
    frame = pos % 3;
    ajDebug("len=%d, Pos=%d, Frame=%d start/stop=%d, aa=%c",
            len, pos, frame, codon, aa);

    /* don't want to find extra ORFs when already been round circ */
    if (LASTORF[frame])
      continue;

    if (find == P_STOP2STOP || find == N_STOP2STOP ||
        find == AROUND_INIT_STOP || find == AROUND_END_STOP) {  /* look for stop codon to begin reporting ORF */
      /* note that there was at least one STOP in a circular genome */
      if (codon == STOP) {
        GOTSTOP[frame] = ajTrue;
      }

      /* write details if a STOP is hit or the end of the sequence */
      if (codon == STOP || pos >= seqlen-5) {

        /*
        ** End of the sequence? If so, append any
        ** last codon to the sequence - otherwise, ignore the STOP
        ** codon
        */
        if (codon != STOP)
          getorf_AppORF(find, &newstr[frame], chrseq, pos,
                        aa);

        /* Already have a sequence to write out? */
        if (ORF[frame]) {
          if (ajStrGetLen(newstr[frame]) >= minsize &&
              ajStrGetLen(newstr[frame]) <= maxsize) {
            /* create a new sequence */
            if (codon == STOP)
              getorf_WriteORF(seq, len, seqlen, sense,
                              find, orf_no, start[frame],
                              pos-1, newstr[frame],
                              seqout, around);
            else
              getorf_WriteORF(seq, len, seqlen, sense,
                              find, orf_no, start[frame],
                              pos+2, newstr[frame],
                              seqout, around);
          }

          ajStrSetClear(&newstr[frame]);
        }

        /*
        ** if its a circular genome and the STOP codon hits past
        ** the end of the genome in all frames, then break
        */
        if (circular && pos >= len) {
          ORF[frame] = ajFalse; /* past the end of the genome */
          LASTORF[frame] = ajTrue; /* finished getting ORFs */
          if (LASTORF[0] && LASTORF[1] && LASTORF[2])
            break;
        } else {
          /*
          ** hit a STOP, therefore a potential ORF to write
          ** out next time, even if the genome is circular
          */
          ORF[frame]   = ajTrue;
          start[frame] = pos+3; /* next start of the ORF */
        }

      } else if (ORF[frame])
        /* append sequence to newstr if in an ORF */
        getorf_AppORF(find, &newstr[frame], chrseq, pos, aa);
    } else { /* Look for start: P_START2STOP N_START2STOP AROUND_START */

      if (codon == START && !ORF[frame]) {
        /* not in a ORF already and found a START */
        if (pos < len) {
          /*
          **  reset the newstr to zero length to enable
          **  storing the ORF for this
          */
          ajStrSetClear(&newstr[frame]);
          ORF[frame] = ajTrue; /* now in an ORF */
          start[frame] = pos;    /* start of the ORF for this frame */
          if (methionine)
            getorf_AppORF(find, &newstr[frame], chrseq,
                          pos, 'M');
          else
            getorf_AppORF(find, &newstr[frame], chrseq,
                          pos, aa);
        }
      } else if (codon == STOP) {
        /* hit a STOP */

        /* Already have a sequence to write out? */
        if (ORF[frame]) {
          ORF[frame] = ajFalse; /* not in an ORF */

          if (ajStrGetLen(newstr[frame]) >= minsize &&
              ajStrGetLen(newstr[frame]) <= maxsize) {
            /* create a new sequence */
            getorf_WriteORF(seq, len, seqlen, sense,
                            find, orf_no, start[frame],
                            pos-1, newstr[frame],
                            seqout, around);
          }
        }

        /*
        ** if a circular genome and hit the STOP past
        ** the end of the genome in all frames, then break
        */
        if (circular && pos >= len) {
          LASTORF[frame] = ajTrue; /* finished getting ORFs */
          if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break;
        }

        ajStrSetClear(&newstr[frame]);
      } else if (pos >= seqlen-5) {
        /* hit the end of the sequence  without a stop */

        /* Already have a sequence to write out? */
        if (ORF[frame]) {
          ORF[frame] = ajFalse; /* not in an ORF */

          /*
          ** End of the sequence? If so, append any
          ** last codon to the sequence - otherwise, ignore the
          ** STOP codon
          */
          if (pos >= seqlen-5 && pos < seqlen-2)
            getorf_AppORF(find, &newstr[frame], chrseq,
                          pos, aa);

          if (ajStrGetLen(newstr[frame]) >= minsize &&
              ajStrGetLen(newstr[frame]) <= maxsize) {
            /* create a new sequence */
            getorf_WriteORF(seq, len, seqlen, sense,
                            find, orf_no, start[frame],
                            pos+2, newstr[frame],
                            seqout, around);
          }
        }

        /*
        ** if a circular genome and hit the STOP past
        ** the end of the genome in all frames, then break
        */
        if (circular && pos >= len) {
          LASTORF[frame] = ajTrue; /* finished getting ORFs */
          if (LASTORF[0] && LASTORF[1] && LASTORF[2]) break;
        }

        ajStrSetClear(&newstr[frame]);
      } else
        if (ORF[frame])
          getorf_AppORF(find, &newstr[frame], chrseq, pos,
                        aa);

    }
  }

  /*
  ** Currently miss reporting a STOP-to-STOP ORF that is
  ** the full length of a circular genome when there are no STOP codons in
  ** that frame
  */
  if ((find == P_STOP2STOP || find == N_STOP2STOP) && circular) {
    if (!GOTSTOP[0]) {
      /* translate frame 1 into pep */
      pep = ajTrnSeqOrig(trnTable, seq, 1);
      if (ajSeqGetLen(pep) >= minsize &&
          ajSeqGetLen(pep) <= maxsize)
        getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
                        0, seqlen-1, ajSeqGetSeqS(pep), seqout,
                        around);
      ajSeqDel(&pep);
    }

    if (!GOTSTOP[1]) {
      /* translate frame 2 into pep */
      pep = ajTrnSeqOrig(trnTable, seq, 2);
      if (ajSeqGetLen(pep) >= minsize &&
          ajSeqGetLen(pep) <= maxsize)
        getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
                        1, seqlen-1, ajSeqGetSeqS(pep), seqout,
                        around);
      ajSeqDel(&pep);
    }

    if (!GOTSTOP[2]) {
      /* translate frame 3 into pep */
      pep = ajTrnSeqOrig(trnTable, seq, 3);
      if (ajSeqGetLen(pep) >= minsize &&
          ajSeqGetLen(pep) >= maxsize)
        getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
                        2, seqlen-1, ajSeqGetSeqS(pep), seqout,
                        around);
      ajSeqDel(&pep);
    }
  }

  for (i=0;i<3;++i)
    ajStrDel(&newstr[i]);

  return;
}