Exemplo n.º 1
0
static ajint cutgextract_readcodons(AjPFile inf, AjBool allrecords,
				    ajint *count)
{
    static int cutidx[] =
    {
	42,43,46,41,45,44,26,30,31,29,27,28,48,51,47,50,
	52,49,55,56,53,54,36,38,35,37, 4, 6, 3, 5,17,18,
	16,15,57,59,60,58,24,25,34,33,39,40,20,19,11,12,
	10, 9,63,62, 8, 7,14,13,21,23,22,32,61, 1, 0, 2
    };
    AjPStr line  = NULL;
    AjPStr value = NULL;
    ajint thiscount[64];

    AjPStrTok token = NULL;
    ajint i;
    ajint n = 0;
    ajint nstops = 0;


    if(!line)
    {
	line  = ajStrNew();
	value = ajStrNew();
    }

    if(!ajReadlineTrim(inf,&line))
	ajFatal("Premature end of file");


    token = ajStrTokenNewC(line," \n\t\r");
    for(i=0;i<CODONS;++i)
    {
	ajStrTokenNextParseC(&token," \n\t\r",&value);
	ajStrToInt(value,&n);
	thiscount[cutidx[i]] = n;
	if(i>60)
	    nstops += n;
    }

    ajStrDel(&line);
    ajStrDel(&value);
    ajStrTokenDel(&token);

    if(!allrecords)
	if(nstops > 1)
	    return -1;

    for(i=0;i<CODONS;++i)
    {
	count[i] += thiscount[i];
    }	

    return nstops;
}
Exemplo n.º 2
0
int main(int argc, char **argv)
{
    AjPSeqall  seqall;
    AjPSeq     a;
    AjPSeqout  outf;
    AjPStr     substr;
    AjPStr     back;
    AjPStr     gctable;
    AjPCod     codon = NULL;
 
    ajint      gctablenum;

    ajint beg;
    ajint end;

    embInit("backtranambig", argc, argv);

    seqall    = ajAcdGetSeqall("sequence");
    outf      = ajAcdGetSeqoutall("outfile");
    gctable   = ajAcdGetListSingle("table");
    ajStrToInt(gctable, &gctablenum);

    codon = ajCodNewCodenum(gctablenum);
    while(ajSeqallNext(seqall, &a))
    {
        substr = ajStrNew();
        beg    = ajSeqGetBegin(a);
        end    = ajSeqGetEnd(a);
        ajStrAssignSubC(&substr,ajSeqGetSeqC(a),beg-1,end-1);

        back = ajStrNew();
        ajCodBacktranslateAmbig(&back,substr,codon);

        ajSeqAssignSeqS (a, back);
        ajSeqSetNuc(a);

        ajSeqoutWriteSeq(outf,a);
    }

    ajSeqoutClose(outf);

    ajStrDel(&back);
    ajStrDel(&substr);
    ajSeqoutDel(&outf);
    ajCodDel(&codon);
    ajStrDel(&gctable);
    ajSeqallDel(&seqall);
    ajSeqDel(&a);

    embExit();

    return 0;
}
Exemplo n.º 3
0
static void eprimer3_write_primer(AjPFile outfile, const char *tag,
                                  const AjPStr pos,
                                  const AjPStr tm, const AjPStr gc,
                                  const AjPStr seq,
                                  AjBool rev, ajint begin)
{
    ajint startint;
    ajint lenint;
    float tmfloat;
    float gcfloat;
    AjPStr start = NULL;
    AjPStr lenstr = NULL;
    ajlong comma;

    if(ajStrGetLen(pos))
    {
        ajStrToFloat(tm, &tmfloat);
        ajStrToFloat(gc, &gcfloat);
        comma = ajStrFindC(pos, ",");
        ajStrAssignS(&start, pos);
        ajStrCutRange(&start, comma, ajStrGetLen(start)-1);
        ajStrToInt(start, &startint);
        startint += begin;
        ajStrAssignS(&lenstr, pos);
        ajStrCutRange(&lenstr, 0, comma);
        ajStrToInt(lenstr, &lenint);
        if(rev)
            startint = startint - lenint + 1;

        ajFmtPrintF(outfile, "     %s  %6d %4d  %2.2f  %2.2f  %S\n\n",
                    tag, startint, lenint, tmfloat, gcfloat, seq);
    }


    ajStrDel(&start);
    ajStrDel(&lenstr);

    return;
}
Exemplo n.º 4
0
static int infoalign_Getrefseq(const AjPStr refseq, const AjPSeqset seqset)
{
    ajint i;
    const AjPSeq seq;

    for(i=0; i<(ajint)ajSeqsetGetSize(seqset); i++)
    {
	seq = ajSeqsetGetseqSeq(seqset, i);
	if(!ajStrCmpS(ajSeqGetNameS(seq), refseq))
	    return i;
    }

    /* not a name of a sequence, so it must be a number */
    if(!ajStrToInt(refseq, &i))
	ajFatal("Reference sequence is not a sequence ID or a number: %S",
		refseq);

    if(i < 0 || i > (ajint) ajSeqsetGetSize(seqset))
	ajFatal("Reference sequence number < 0 or > number "
		"of input sequences: %d", i);

    return i-1;
}
Exemplo n.º 5
0
int main(int argc, char **argv)
{
    AjPDasServer server   = NULL;
    AjPDasSource source   = NULL;
    AjPDasSegment segment = NULL;

    AjPStr host  = NULL;
    AjPStr path  = NULL;
    AjPFile outf = NULL;

    ajint port = 80;

    AjBool sequencesourcesonly;
    AjBool entrypoints;
    AjBool showtestqueries;
    AjBool runtestqueries = ajTrue;
    AjBool quickexit = ajFalse;

    ajint itest=0;
    ajint j=0;

    ajint maxtests=0;
    ajint maxfeatures=0;
    ajint maxsegments=0;

    AjIList iter     = NULL;
    AjIList coordsi  = NULL;
    AjIList itereps  = NULL;
    AjPFilebuff buff = NULL;
    AjPUrlref uo        = NULL;
    AjPList segments = NULL;
    AjPStr ffname    = NULL;
    AjPStr url       = NULL;

    AjPStr dbhttpver = ajStrNew();
    AjPStr dbname    = ajStrNew();
    AjPStr dbproxy   = ajStrNew();

    AjPStr servername   = NULL;
    AjPTable titlecount = NULL;
    const ajuint* count;
    int k=0;

    embInit("dastest", argc, argv);

    host = ajAcdGetString("host");
    path = ajAcdGetString("path");
    port = ajAcdGetInt("port");
    sequencesourcesonly = ajAcdGetBoolean("sequencesourcesonly");
    entrypoints = ajAcdGetBoolean("entrypoints");
    showtestqueries = ajAcdGetBoolean("showtestqueries");
    runtestqueries  = ajAcdGetBoolean("runtestqueries");

    servername = ajAcdGetString("servername");

    outf   = ajAcdGetOutfile("outfile");

    maxtests     = ajAcdGetInt("maxtests");
    maxfeatures = ajAcdGetInt("maxfeatures");
    maxsegments = ajAcdGetInt("maxsegments");

    server = ajDasServerNew();

    if(runtestqueries)
    {
	url = ajStrNew();

	if(!ajNamServer(servername))
	{
	    ajWarn("following das server is required to be defined "
		    "for test queries...");
	    ajWarn("\nSERVER %S [\n"
		    "   type: \"sequence\"\n"
		    "   method: \"das\"\n"
		    "   url: \"http://%S%S\"\n"
		    "]\n",servername, host,path);
	    ajWarn("ignoring -runtestqueries option...");
	    runtestqueries = ajFalse;
	}
	else
	{
	    ajNamSvrGetUrl(servername, &url);
	    ajHttpUrlDeconstruct(url, &port, &host, &path);

	    ajStrDel(&url);
	}
    }

    ajDasServerSethostS(server,host);
    ajDasServerSetport(server,port);


    if(ajStrGetCharLast(path)!='/')
	ajStrAppendK(&path,'/');

    ajStrAppendC(&path,"sources");

    ajDasServerSetpathS(server,path);

    ajFmtPrintF(outf,"host = %S\npath = %S\nport = %d\n",
	    server->host, server->path, server->port);


    /*
     * TODO: stop using http-read but instead use
     *       ajNamSvrListListDatabases(svrname, dbnames);
     */

    buff = ajHttpRead(dbhttpver, dbname, dbproxy, host, port, path);

    if(!buff)
	ajExitAbort();

    ajFilebuffHtmlNoheader(buff);

    ajDasParseRegistry(buff, server->sources);
    ajFmtPrintF(outf,"DAS sources and descriptions\n\n");

    titlecount = dastestGetTitleCount(server);

    iter = ajListIterNew(server->sources);

    while(!ajListIterDone(iter) && !quickexit)
    {
	source = ajListIterGet(iter);


	if ((sequencesourcesonly && !source->sequence)
		|| ajStrMatchC(source->title,"cath") || k++ <50)
	    continue;

	ajFmtPrintF(outf,"%-30S %-50S\n%S\n",source->uri,source->title,
		source->description);

	if(entrypoints && source->entry_points)
	{
	    uo = ajHttpUrlrefNew();

	    ajHttpUrlrefParseC(&uo, ajStrGetPtr(source->entry_points_uri));

	    if(ajStrGetLen(uo->Port))
		ajStrToInt(uo->Port, &port);
	    else
		port = 80;

	    ajFilebuffDel(&buff);
	    buff = ajHttpRead(dbhttpver, dbname, dbproxy,
	                      uo->Host, port, uo->Absolute);
	    ajHttpUrlrefDel(&uo);

	    if(!buff)
		continue;

	    ajFilebuffHtmlNoheader(buff);

	    segments = ajListNew();
	    ajDasParseEntrypoints(buff, segments);

	    itereps = ajListIterNew(segments);

	    ajFmtPrintF(outf, "Number of entry points %d\n",
		    ajListGetLength(segments));

	    j=0;

	    while(!ajListIterDone(itereps))
	    {
		segment = ajListIterGet(itereps);

		if (j++ < maxsegments)
		    ajFmtPrintF(outf,
		            "segment id:%S orientation:%S start:%d stop:%d\n",
		            segment->id,
		            segment->orientation,
		            segment->start, segment->stop);

		ajDasSegmentDel(&segment);
	    }

	    ajListIterDel(&itereps);
	    ajListFree(&segments);
	}

	if(showtestqueries || runtestqueries)
	{
	    AjPDasCoordinate coord;

	    coordsi = ajListIterNew(source->coordinates);

	    while(!ajListIterDone(coordsi) && !quickexit)
	    {
		coord = ajListIterGet(coordsi);
		ajDebug("coordinate uri:%S taxid:%S source:%S test_range:%S\n",
			coord->uri,
			coord->taxid,
			coord->source,
			coord->test_range);

		if(showtestqueries)
		{
		    if(source->sequence)
			ajFmtPrintF(outf,
			       "example/test entry = '%S?segment=%S'\n",
			       source->sequence_query_uri,coord->test_range);

		    if(source->features)
			ajFmtPrintF(outf,
			       "example/test entry = '%S?segment=%S'\n",
			       source->features_query_uri,coord->test_range);

		}

		if(runtestqueries)
		{
		    AjPStr idqry = ajStrNew();
		    AjPStr entry = NULL;
		    AjPSeq seq   = NULL;
		    ajint ibegin = 0;
		    ajint iend   = 0;
		    AjPStr example = NULL;

		    example = ajDasTestrangeParse(coord->test_range,
		                                  &entry, &ibegin, &iend);

		    if(ajStrGetLen(entry))
		    {
			count = ajTableFetchS(titlecount, source->title);
			dbname = ajDasSourceGetDBname(source, *count>1);

			if (source->features)
			{
			    AjPStr qpath=NULL;

			    uo = ajHttpUrlrefNew();

			    ajFmtPrintS(&idqry,"dasgff::%S:%S:%S",
				    servername,
				    dbname,
				    entry);
			    ajFmtPrintF(outf,
				    "feature query: %S  start:%d end:%d\n",
				    idqry,
				    ibegin, iend);


			    ajHttpUrlrefParseC(&uo,
				    ajStrGetPtr(source->features_query_uri));
			    ajHttpUrlrefSplitPort(uo);

			    ajFmtPrintS(&qpath,"%S?segment=%S",
				    uo->Absolute,entry);

			    if(iend>0)
				ajFmtPrintAppS(&qpath,":%d,%d",ibegin, iend);

			    if(ajStrGetLen(uo->Port))
				ajStrToInt(uo->Port, &port);
			    else
				port = 80;

			    ajDebug("calling ajHttpRead to get the raw"
				    " output; host:%S port:%d path:%S\n",
				    uo->Host, port, qpath);

			    ajFilebuffDel(&buff);
			    buff = ajHttpRead(dbhttpver, dbname, dbproxy,
			                      uo->Host, port, qpath);

			    if(buff)
			    {
				AjPFeattable ft;

				ajFmtPrintS(&ffname, "%S.%S", source->uri,
					    entry);

				ajFilebuffHtmlNoheader(buff);

				dastestSaveRawFeatures(buff, ffname);

				ajDebug("now using EMBOSS feature queries\n");

				ft = dastestFeatureQuery(idqry,
                                                         ibegin, iend);

				dastestSaveMappedFeatures(ft, ffname,
                                                          outf, maxfeatures);

				ajStrDel(&ffname);
				ajFeattableDel(&ft);
			    }

			    ajHttpUrlrefDel(&uo);
			    ajStrDel(&qpath);

			    if(++itest>=maxtests)
				quickexit = ajTrue;
			}
			else if(source->sequence)
			{
			    seq = ajSeqNewRes(iend-ibegin+1);

			    ajFmtPrintS(&idqry,"%S:%S:%S",
				    servername, dbname, entry);
			    ajFmtPrintF(outf,
				    "sequence query: %S  start:%d end:%d\n",
				    idqry,
				    ibegin, iend);
			    ajSeqGetFromUsaRange(idqry, ajFalse, ibegin, iend,
				    seq);
			    ajFmtPrintF(outf,
				    "length of sequence returned: %d\n",
				    ajSeqGetLen(seq));

			    if(ajSeqGetLen(seq)>0)
				ajFmtPrintF(outf,
				        "sequence returned (first 100 bases):"
					" %-100.100s\n",
					ajSeqGetSeqC(seq));

			    ajSeqDel(&seq);

			}

			ajStrDel(&dbname);
		    }

		    ajStrDel(&entry);
		    ajStrDel(&idqry);
		    ajStrDel(&example);
		}
	    }
	    ajListIterDel(&coordsi);
	}

    }

    ajListIterDel(&iter);

    ajDasServerDel(&server);
    ajFilebuffDel(&buff);

    ajStrDel(&host);
    ajStrDel(&path);
    ajStrDel(&servername);

    ajStrDel(&dbhttpver);
    ajStrDel(&dbname);
    ajStrDel(&dbproxy);

    ajFileClose(&outf);

    ajTableDelValdel(&titlecount, ajMemFree);

    embExit();

    return 0;
}
Exemplo n.º 6
0
int main(int argc, char *argv[])
{
  embInitPV("ggcsi", argc, argv, "GEMBASSY", "1.0.1");

  struct soap soap;
  struct ns1__gcsiInputParams params;

  AjPSeqall seqall;
  AjPSeq    seq;
  AjPStr    inseq   = NULL;
  AjPStr    seqid   = NULL;
  ajint	    window  = 0;
  AjBool    at      = 0;
  AjBool    purine  = 0;
  AjBool    keto    = 0;
  AjBool    pval    = 0;
  AjPStr    version = NULL;
  AjBool    accid   = ajFalse;
  AjPStr    tmp     = NULL;
  AjPStr    parse   = NULL;
  AjPStr    gcsi    = NULL;
  AjPStr    sa      = NULL;
  AjPStr    dist    = NULL;
  AjPStr    z       = NULL;
  AjPStr    p       = NULL;
  AjPStrTok handle  = NULL;

  char *in0;
  char *result;

  AjPFile outf = NULL;

  seqall  = ajAcdGetSeqall("sequence");
  window  = ajAcdGetInt("window");
  at      = ajAcdGetBoolean("at");
  purine  = ajAcdGetBoolean("purine");
  keto    = ajAcdGetBoolean("keto");
  pval    = ajAcdGetBoolean("pval");
  version = ajAcdGetSelectSingle("gcsi");
  accid   = ajAcdGetBoolean("accid");
  outf    = ajAcdGetOutfile("outfile");

  params.window = window;
  params.at     = 0;
  params.purine = 0;
  params.keto   = 0;
  params.p      = 0;
  ajStrToInt(version, &(params.version));

  if(at)
    params.at = 1;
  if(purine)
    params.purine = 1;
  if(keto)
    params.keto = 1;
  if(pval)
    params.p = 1;

  while(ajSeqallNext(seqall, &seq))
    {
      soap_init(&soap);

      inseq = NULL;

      ajStrAppendC(&inseq, ">");
      ajStrAppendS(&inseq, ajSeqGetNameS(seq));
      ajStrAppendC(&inseq, "\n");
      ajStrAppendS(&inseq, ajSeqGetSeqS(seq));

      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      in0 = ajCharNewS(inseq);

      if (soap_call_ns1__gcsi(
	                      &soap,
                              NULL,
                              NULL,
                              in0,
                             &params,
                             &result
                            ) == SOAP_OK)
	{
	  tmp   = ajStrNew();
	  parse = ajStrNew();
	  gcsi  = ajStrNew();
	  sa    = ajStrNew();
	  dist  = ajStrNew();
	  z     = ajStrNew();
	  p     = ajStrNew();

	  ajStrAssignC(&tmp, result);

	  ajStrExchangeCC(&tmp, "<", "\n");
	  ajStrExchangeCC(&tmp, ">", "\n");

	  handle = ajStrTokenNewC(tmp, "\n");

	  while (ajStrTokenNextParse(&handle, &parse))
	    {
	      if (ajStrIsFloat(parse))
		{
		  if(!ajStrGetLen(gcsi))
		    ajStrAssignS(&gcsi, parse);
		  else if(!ajStrGetLen(sa))
		    ajStrAssignS(&sa, parse);
		  else if(!ajStrGetLen(dist))
		    ajStrAssignS(&dist, parse);
		  else if(!ajStrGetLen(z))
		    ajStrAssignS(&z, parse);
		  else if(!ajStrGetLen(p))
		    ajStrAssignS(&p, parse);
		}
	    }

	  tmp = ajFmtStr("Sequence: %S GCSI: %S SA: %S DIST: %S",
			 seqid, gcsi, sa, dist);

	  if(pval)
	    tmp = ajFmtStr("%S Z: %S P: %S", tmp, z, p);

          ajFmtPrintF(outf, "%S\n", tmp);

	  ajStrDel(&tmp);
	  ajStrDel(&parse);
	  ajStrDel(&gcsi);
	  ajStrDel(&sa);
	  ajStrDel(&dist);
	  ajStrDel(&z);
	  ajStrDel(&p);
	}
      else
	{
	  soap_print_fault(&soap, stderr);
	}

      soap_destroy(&soap);
      soap_end(&soap);
      soap_done(&soap);

      AJFREE(in0);

      ajStrDel(&inseq);
    }

  ajFileClose(&outf);

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&seqid);

  embExit();

  return 0;
}
Exemplo n.º 7
0
int main(int argc, char **argv)
{
    AjPSeqall nucseq;		/* input nucleic sequences */
    AjPSeqset protseq;		/* input aligned protein sequences */
    AjPSeqout seqout;
    AjPSeq nseq;		/* next nucleic sequence to align */
    const AjPSeq pseq;		/* next protein sequence use in alignment */
    AjPTrn trnTable;
    AjPSeq pep;			/* translation of nseq */
    AjPStr tablelist;
    ajint table;
    AjPSeqset outseqset;	/* set of aligned nucleic sequences */
    ajint proteinseqcount = 0;
    AjPStr degapstr = NULL;
    /* used to check if it matches with START removed */
    AjPStr degapstr2 = NULL;
    AjPStr codon = NULL;	/* holds temporary codon to check if is START */
    char aa;			/* translated putative START codon */
    ajint type;			/* returned type of the putative START codon */
    /* start position of guide protein in translation */
    ajlong pos = 0;
    AjPSeq newseq = NULL;	/* output aligned nucleic sequence */
    ajint frame;

    embInit("tranalign", argc, argv);

    nucseq    = ajAcdGetSeqall("asequence");
    protseq   = ajAcdGetSeqset("bsequence");
    tablelist = ajAcdGetListSingle("table");
    seqout    = ajAcdGetSeqoutset("outseq");

    outseqset = ajSeqsetNew();
    degapstr  = ajStrNew();

    /* initialise the translation table */
    ajStrToInt(tablelist, &table);
    trnTable = ajTrnNewI(table);

    ajSeqsetFill(protseq);

    while(ajSeqallNext(nucseq, &nseq))
    {
    	if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL)
    	    ajErr("No guide protein sequence available for "
		  "nucleic sequence %S",
		  ajSeqGetNameS(nseq));

	ajDebug("Aligning %S and %S\n",
		ajSeqGetNameS(nseq), ajSeqGetNameS(pseq));

        /* get copy of pseq string with no gaps */
        ajStrAssignS(&degapstr, ajSeqGetSeqS(pseq));
        ajStrRemoveGap(&degapstr);

        /*
	** for each translation frame look for subset of pep that
	** matches pseq
	*/
        for(frame = 1; frame <4; frame++)
	{
	    ajDebug("trying frame %d\n", frame);
            pep = ajTrnSeqOrig(trnTable, nseq, frame);
            degapstr2 = ajStrNew();
            ajStrAssignRef(&degapstr2, degapstr);
            pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr);

            /* 
            ** we might have a START codon that should be translated as 'M'
            ** we need to check if there is a match after a possible START
            ** codon 
            */
            if(pos == -1 && ajStrGetLen(degapstr) > 1 && 
                (ajStrGetPtr(degapstr)[0] == 'M' ||
		 ajStrGetPtr(degapstr)[0] == 'm'))
	      {
                /* see if pep minus the first character is a match */
                ajStrCutStart(&degapstr2, 1);
                pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); 

                /*
		** pos is >= 1 if we have a match that is after the first
		** residue
		*/
                if(pos >= 1)
		{
                    /* point back at the putative START Methionine */
                    pos--;
                    /* test if first codon is a START */
                    codon = ajStrNew();
                    ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), 
                                (pos*3)+frame-1, (pos*3)+frame+2);
                    type = ajTrnCodonstrTypeS(trnTable, codon, &aa);

                    if(type != 1)
                    {
                        /* first codon is not a valid START, force a mismatch */
                        pos = -1;
                    }
                    ajStrDel(&codon);
                
            	}
		else
		{
                    /* force 'pos == 0' to be treated as a mismatch */
            	    pos = -1;
		}
            }

            ajStrDel(&degapstr2);
            ajSeqDel(&pep);

            if(pos != -1)
            	break;
        }

        if(pos == -1)
	    ajErr("Guide protein sequence %S not found in nucleic sequence %S",
		  ajSeqGetNameS(pseq), ajSeqGetNameS(nseq));
	else
	{
	    ajDebug("got a match with frame=%d\n", frame);
            /* extract the coding region of nseq with gaps */
            newseq = ajSeqNew();
            ajSeqSetNuc(newseq);
            ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq));
            ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq));
            tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1);

            /* output the gapped nucleic sequence */
            ajSeqsetApp(outseqset, newseq);

            ajSeqDel(&newseq);
        }

        ajStrRemoveWhiteExcess(&degapstr);
    }

    ajSeqoutWriteSet(seqout, outseqset);
    ajSeqoutClose(seqout);

    ajTrnDel(&trnTable);
    ajSeqsetDel(&outseqset);
    ajStrDel(&degapstr);
    ajStrDel(&degapstr2);

    ajSeqallDel(&nucseq);
    ajSeqDel(&nseq);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&protseq);
    ajStrDel(&tablelist);

    embExit();

    return 0;
}
Exemplo n.º 8
0
int main(int argc, char **argv)
{

    AjPList idlist;
    AjPList* fieldList = NULL;

    AjBool systemsort;
    AjBool cleanup;

    ajint blastv = 0;
    char dbtype  = '\0';

    ajuint maxindex;
    ajuint maxidlen = 0;
    ajuint maxlen;

    AjPStr version = NULL;
    AjPStr seqtype = NULL;

    AjPFile elistfile  = NULL;
    AjPFile* alistfile = NULL;

    AjPStr dbname   = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjPStr sortopt  = NULL;
    void **entryIds = NULL;

    AjBool usesrc = AJTRUE;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr curfilename = NULL;

    AjPStr idformat = NULL;

    EmbPEntry entry;

    PBlastDb db = NULL;

    ajuint idCount = 0;
    ajuint idDone;
    AjPList listTestFiles = NULL;
    void ** testFiles = NULL;
    ajuint nfiles;
    ajuint ifile;
    ajuint jfile;

    ajuint filesize;
    short recsize;
    ajuint maxfilelen = 20;
    char date[4] =
    {
	0,0,0,0
    };

    AjPStr tmpfname = NULL;
    AjPStr* fields  = NULL;

    AjPFile entFile = NULL;

    AjPStr* divfiles   = NULL;
    ajint* maxFieldLen = NULL;

    ajuint ifield  = 0;
    ajuint nfields = 0;

    AjPFile logfile = NULL;
    ajuint* countField = NULL;
    ajuint* fieldTot = NULL;
    ajuint idCountFile = 0;
    ajuint i = 0;

    embInit("dbiblast", argc, argv);

    idformat = ajStrNewC("NCBI");

    fields     = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    systemsort = ajAcdGetBoolean("systemsort");
    cleanup    = ajAcdGetBoolean("cleanup");
    sortopt    = ajAcdGetString("sortoptions");
    maxindex   = ajAcdGetInt("maxindex");
    version    = ajAcdGetListSingle("blastversion");
    seqtype    = ajAcdGetListSingle("seqtype");
    usesrc     = ajAcdGetBoolean("sourcefile");
    logfile    = ajAcdGetOutfile("outfile");

    while(fields[nfields])		/* array ends with a NULL */
	nfields++;

    if(nfields)
    {
	AJCNEW(maxFieldLen, nfields);
	AJCNEW0(countField, nfields);
	AJCNEW0(fieldTot, nfields);
	for(ifield=0; ifield < nfields; ifield++)
	    maxFieldLen[ifield] = (ajint) maxindex * -1;

	if(systemsort)
	    AJCNEW(alistfile, nfields);
	else
	{
	    AJCNEW(fieldList, nfields);
	    for(ifield=0; ifield < nfields; ifield++)
		fieldList[ifield] = ajListNew();
	}
    }
    
    if(ajStrMatchC(datestr, "00/00/00"))
	ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex"));

    ajStrRemoveWhite(&dbname);		/* used for temp filenames */
    embDbiDateSet(datestr, date);
    idlist = ajListNew();
    
    if(ajUtilGetBigendian())
	readReverse = ajFalse;
    else
	readReverse = ajTrue;
    
    ajStrToInt(version, &blastv);
    dbtype = ajStrGetCharFirst(seqtype);
    
    ajDebug("reading '%S/%S'\n", directory, filename);
    ajDebug("writing '%S/'\n", indexdir);
    
    listTestFiles = embDbiFileListExc(directory, filename, exclude);
    ajListSort(listTestFiles, ajStrVcmp);
    nfiles = ajListToarray(listTestFiles, &testFiles);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);
    
    embDbiLogHeader(logfile, dbname, release, datestr,
		     indexdir, maxindex);

    embDbiLogFields(logfile, fields, nfields);
    embDbiLogSource(logfile, directory, filename, exclude,
		    (AjPStr*) testFiles, nfiles);
    embDbiLogCmdline(logfile);

    AJCNEW0(divfiles, nfiles);
    
    /*
    ** process each input file, one at a time
    */
    
    jfile = 0;
    for(ifile=0; ifile < nfiles; ifile++)
    {
	curfilename = (AjPStr) testFiles[ifile];
	if(!dbiblast_blastopenlib(curfilename,
				  usesrc, blastv, dbtype, &db))
	    continue;	 /* could be the wrong file type with "*.*" */

	ajDebug("processing filename '%S' ...\n", curfilename);
	ajDebug("processing file '%S' ...\n", db->TFile->Name);


	ajStrAssignS(&divfiles[jfile], db->TFile->Name);
	ajFilenameTrimPath(&divfiles[jfile]);
	if(ajStrGetLen(divfiles[jfile]) >= maxfilelen)
	    maxfilelen = ajStrGetLen(divfiles[jfile]) + 1;

	if(systemsort)	 /* elistfile for entries, alist for fields */
	    elistfile = embDbiSortOpen(alistfile, jfile,
				       dbname, fields, nfields);

	idCountFile = 0;
	for(i=0;i<nfields;i++)
	    countField[i] = 0;
	while((entry=dbiblast_nextblastentry(db, jfile,
					     idformat, systemsort,
					     fields,
					     maxFieldLen,
					     &maxidlen, countField,
					     elistfile, alistfile)))
	{
	    idCountFile++;
	    if(!systemsort)	    /* save the entry data in lists */
	    {
		embDbiMemEntry(idlist, fieldList, nfields, entry, jfile);
	    }
	}
	idCount += idCountFile;
	if(systemsort)
	{
	    embDbiSortClose(&elistfile, alistfile, nfields);
	    /* lost the entry, so can't free it :-) */
	}

	embDbiLogFile(logfile, curfilename, idCountFile, fields,
		      countField, nfields);
	dbiblast_dbfree(&db);
	jfile++;
    }
    nfiles = jfile;
    
    /*
    ** write the division.lkp file
    */
    
    embDbiWriteDivision(indexdir, dbname, release, date,
			maxfilelen, nfiles, divfiles, NULL);
    
    /*
    ** Write the entryname.idx index
    */
    
    ajStrAssignC(&tmpfname, "entrynam.idx");
    entFile = ajFileNewOutNamePathS(tmpfname, indexdir);
    
    recsize = maxidlen+10;
    filesize = 300 + (idCount*(ajint)recsize);
    embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date);
    
    if(systemsort)
        idDone = embDbiSortWriteEntry(entFile, maxidlen,
				      dbname, nfiles, cleanup, sortopt);
    else			  /* save entries in entryIds array */
    {
        idDone = embDbiMemWriteEntry(entFile, maxidlen,
				     idlist, &entryIds);
	if(idDone != idCount)
	    ajFatal("Duplicates not allowed for in-memory processing");
    }
    
    embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone);
    ajFileClose(&entFile);
    
    /*
    ** Write the fields index files
    */
    
    for(ifield=0; ifield < nfields; ifield++)
    {

        if(maxindex)
	    maxlen = maxindex;
	else
	{
	    if(maxFieldLen[ifield] >= 0)
		maxlen = maxFieldLen[ifield];
	    else
		maxlen = - maxFieldLen[ifield];
	}

        if(systemsort)
	    fieldTot[ifield] = embDbiSortWriteFields(dbname, release,
						     date, indexdir,
						     fields[ifield], maxlen,
						     nfiles, idCount,
						     cleanup, sortopt);
	else
	    fieldTot[ifield] = embDbiMemWriteFields(dbname, release,
						    date, indexdir,
						    fields[ifield], maxlen,
						    fieldList[ifield],
						    entryIds);
    }
    
    embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot,
		   nfields, nfiles, idDone, idCount);

    if(systemsort)
	embDbiRmEntryFile(dbname, cleanup);
    
    ajListMap(idlist, embDbiEntryDelMap, NULL);
    ajListFree(&idlist);
    AJFREE(entryIds);

    ajStrDelarray(&fields);

    for(i=0;i<nfields;i++)
    {
	if(systemsort)
	{
	    ajFileClose(&alistfile[i]);
	}
	else
	{
	    ajListMap(fieldList[i], embDbiFieldDelMap, NULL);
	    ajListFree(&fieldList[i]);
	}
    }
    AJFREE(alistfile);
    AJFREE(fieldList);
    ajStrDel(&version);
    ajStrDel(&seqtype);
    ajFileClose(&elistfile);
    for(i=0;i<nfiles;i++)
    {
	ajStrDel(&divfiles[i]);
    }
    AJFREE(countField);
    AJFREE(fieldTot);

    ajStrDel(&dbname);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&sortopt);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&idformat);
    ajStrDel(&tmpfname);

    AJFREE(maxFieldLen);

    ajFileClose(&logfile);

    ajListstrFreeData(&listTestFiles);

    ajStrDel(&t);
    ajStrDel(&id);
    ajStrDel(&acc);
    ajStrDel(&hline);
    ajStrDel(&tmpdes);
    ajStrDel(&tmpfd);
    ajStrDel(&tmpgi);
    ajStrDel(&tmpdb);
    ajStrDel(&tmpac);
    ajStrDel(&tmpsv);
    ajRegFree(&wrdexp);

    embDbiEntryDel(&dbiblastEntry);

    if(fdl)
    {
        for(i=0; i < nfields; i++)
            ajListFree(&fdl[i]);
        AJFREE(fdl);
    }

    for(i=0;i<nfiles;i++)
    {
        ajStrDel(&divfiles[i]);
    }
    AJFREE(divfiles);
    AJFREE(testFiles);

    embExit();

    return 0;
}
Exemplo n.º 9
0
AjPSeqout get_orf(
  AjPSeqout seqout,
  AjPSeqall seqall,
  AjPStr tablestr,
  ajuint minsize,
  ajuint maxsize,
  AjPStr findstr,
  AjBool methionine,
  AjBool circular,
  AjBool reverse,
  ajint around
) {
  ajint table;
  ajint find;
  AjPSeq seq = NULL;
  AjPTrn trnTable;
  AjPStr sseq = NULL;    /* sequence string */

  /* ORF number to append to name of sequence to create unique name */
  ajint orf_no;


  AjBool sense;    /* ajTrue = forward sense */
  ajint len;

  /* initialise the translation table */
  ajStrToInt(tablestr, &table);
  trnTable = ajTrnNewI(table);

  /* what sort of ORF are we looking for */
  ajStrToInt(findstr, &find);

  /*
  ** get the minimum size converted to protein length if storing
  ** protein sequences
  */
  if (find == P_STOP2STOP || find == P_START2STOP || find == AROUND_START) {
    minsize /= 3;
    maxsize /= 3;
  }

  while (ajSeqallNext(seqall, &seq)) {
    orf_no = 1;           /* number of the next ORF */
    sense = ajTrue;           /* forward sense initially */

    /* get the length of the sequence */
    len = ajSeqGetLen(seq);

    /*
    ** if the sequence is circular, append it to itself to triple its
    **  length so can deal easily with wrapped ORFs, but don't update
    ** len
    */
    if (circular) {
      ajStrAssignS(&sseq, ajSeqGetSeqS(seq));
      ajStrAppendS(&sseq, ajSeqGetSeqS(seq));
      ajStrAppendS(&sseq, ajSeqGetSeqS(seq));
      ajSeqAssignSeqS(seq, sseq);
    }

    /* find the ORFs */
    getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense,
                    circular, find, &orf_no, methionine, around, &record);

    /* now reverse complement the sequence and do it again */
    if (reverse) {
      sense = ajFalse;
      ajSeqReverseForce(seq);
      getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense,
                      circular, find, &orf_no, methionine,
                      around);
    }
  }
  ajTrnDel(&trnTable);
  ajSeqDel(&seq);
  ajStrDel(&sseq);
}
Exemplo n.º 10
0
int main(int argc, char **argv)
{
    /*         
    ** All pointers set to NULL for safety. 
    ** Variables names and initialisation values aligned for clarity.
    */

    AjBool    boo   = ajFalse;
    ajint     n1    = 0;
    ajint     n2    = 0;
    ajlong    l1    = 0;     /* long int */
    float     f1    = 0.0;
    double    d1    = 0.0;   /* there is no long double */
    size_t    size  = 100;   /* Reserved memory size.  Could be any value you know in advance. */


    embInit("demostringnew", argc, argv);
    
    demostringnew_msg("/* Starting string values */");
    

    /* Functions with the prefix ajStr are for manipulating EMBOSS strings.
       Functions with the prefix ajChar are for manipulating C-type (char*) string 
       See filesection and datasection sections in ajstr.c */


    /*
    ** String constructor functions 
    ** See "@section constructors" in ajstr.c
    */

    /* Construct a new string with no starting value or reserved size.
       There is no equivlent function for C-type (char*) strings */
    str0  = ajStrNew ();

    /* Construct a new string with a reserved size but no starting value */
    txt1 = ajCharNewRes(size);  
    str1 = ajStrNewRes (size);    

    /* Construct a new C-type (char*) string with a starting value ... */
    txt2 = ajCharNewC ("Starting value");   /* ... copied from a C-type (char*) string */
    str2 = ajStrNewC  (txt2);                /* ... copied from a C-type (char*) string */
    txt3 = ajCharNewS (str2);                /* ... copied from a string */
    str3 = ajStrNewS  (str2);                /* ... copied from a string */

    /* Construct a new string with a reserved size and starting value ... */
    txt4 = ajCharNewResC("Starting value, reserved size", size);  /* ... copied from a C-type (char*) string)*/
    str4 = ajStrNewResC (txt4, size);                              /* ... copied from a C-type (char*) string */
    /* or str4 = ajStrNewResLenC(txt4, size, strlen(txt4)); to specify string length */
    txt5 = ajCharNewResS(str4, size);                              /* ... copied from a string */
    str5 = ajStrNewResS (str4, size);                              /* ... copied from a string */

    demostringnew_msg("/* After string constructor functions */");    





    /*
    ** String destructor functions 
    ** See "@section destructors" in ajstr.c) 
    */
    
    /* Destruct a string */
    ajCharDel(&txt1);
    ajCharDel(&txt2);
    ajCharDel(&txt3);
    ajCharDel(&txt4);
    ajCharDel(&txt5);
    ajStrDel (&str0);
    ajStrDel (&str1);
    ajStrDel (&str3);
    ajStrDel (&str5);

    /* str2 & str4 still in memory */
    demostringnew_msg("/* After string destructor functions */");    





    /*
    ** String (de)referencing functions 
    ** See "@section destructors" in ajstr.c)
    */

    str0 = ajStrNewRef(str2);
    /* or ajStrAssignRef(&str0, str2); */
    demostringnew_msg("/* After string reference */");    

    ajStrDelStatic(&str0);
    demostringnew_msg("/* After string dereference */");    





    /*
    ** String assignment functions 
    ** See "@section assignment" in ajstr.c)
    */

    /* Still only str2 & str4 in memory */

    /* Assign a string value using ... */
    ajStrAssignC(&str1, "Assigned value");     /* ... a C-type (char*) string */
    /*    or ajStrAssignLenC(&str1, "Assigned value", strlen("Assigned value")); to specify string length. */
    ajStrAssignS(&str3, str1);                 /* ... a string                */
    ajStrAssignK(&str5, 'A');                  /* ... a character             */
    demostringnew_msg("/* After string assignment 1 */");    

    ajStrAssignSubC(&str1, "Assigned value", 0, 11);
    ajStrAssignSubS(&str3, str1, 0, 9);
    demostringnew_msg("/* After string assignment 2 */");    

    /* The assignment functions allocate memory if necessary so str1, str3 and str5 will be created for you.  It's bad practice to use this mechanism however because it's not obvious the string has been allocated (and needs freeing).  Much cleaner to call the construct (ajStrNew) explicitly. */



    /* Assign a string with a reserved size and value using ... */
    ajStrAssignResC(&str1, size, "Assigned value, reserved size");  /* ... a C-type (char*) string */
    ajStrAssignResS(&str3, size, str1);                             /* ... a string                */
    demostringnew_msg("/* After string assignment 3 */");    



    /* Assign a string value only if the string is empty using ... */
    str0 = ajStrNew();
    ajStrAssignEmptyC(&str0, "New value if string was empty");    /* ... a C-type (char*) string */ 
    ajStrAssignEmptyS(&str1, str0);                               /* ... a string                */
    demostringnew_msg("/* After string assignment 4 */");    

    /* Now str0-5 in memory.  The above code is for illustrative purposes: it's much cleaner to put all the constructors / destructors at the top / bottom of the code where possible. */


    /* Assign all strings intuitive values */
    txt0 = ajCharNewResC("TEXT 0", 100);   
    txt1 = ajCharNewResC("TEXT 1", 100);   
    txt2 = ajCharNewResC("Text 2", 100);   
    txt3 = ajCharNewResC("Text 3", 100);   
    txt4 = ajCharNewResC("Text 4", 100);   
    txt5 = ajCharNewResC("Text 5", 100);
    ajStrAssignC(&str0, "STRING 0");   
    ajStrAssignC(&str1, "STRING 1");   
    ajStrAssignC(&str2, "String 2");   
    ajStrAssignC(&str3, "String 3");   
    ajStrAssignC(&str4, "String 4 WITHSOMETEXTINABLOCK");   
    ajStrAssignC(&str5, "String 5 WITHSOMETEXTINABLOCK");   
    demostringnew_msg("/* After string assignment 5 */");    





    /*
    ** String formatting functions 
    ** See "@section formatting" in ajstr.c
    */
    ajCharFmtLower(txt0);
    ajCharFmtLower(txt1);
    ajStrFmtLower(&str0);
    ajStrFmtLowerSub(&str1, 0, 2);

    ajCharFmtUpper(txt2);
    ajCharFmtUpper(txt3);
    ajStrFmtUpper(&str2);
    ajStrFmtUpperSub(&str3, 0, 2);
    demostringnew_msg("/* After string formatting 1 */");    


    ajStrFmtTitle(&str0);
    ajStrFmtQuote(&str1);
    ajStrFmtBlock(&str4, 3);
    demostringnew_msg("/* After string formatting 2 */");    


    /* See also ajStrFmtWrap, ajStrFmtWrapLeft
       ... these need checking. */





    /*
    ** String conversion functions 
    ** See "@section datatype to string conversion" in ajstr.c
    */
    n1 = n2 = l1 = 1;
    f1 = d1 = 0.5;
    ajStrFromBool( &str0, boo);
    ajStrFromInt(&str1, n1);
    ajStrFromLong(&str2, l1);
    ajStrFromFloat(&str3, f1, 5);
    ajStrFromDouble(&str4, d1, 5);
    ajStrFromDoubleExp(&str5, d1, 5);
    demostringnew_msg("/* After datatype to string conversion */");    


    /*
    ** String conversion functions 
    ** See "@section string to datatype conversion" in ajstr.c
    */
    ajStrToBool(str0, &boo);
    ajStrToInt(str1, &n1);
    ajStrToLong(str2, &l1);
    ajStrToDouble(str4, &d1);
    ajUser("/* After string to datatype conversion */\n"
	   "boo (from str0): %B\nn1 (from str1): %d\nl1 (from str2): %d", 
	   boo, n1, l1);
    ajFmtPrint("f1 (from str3): %f\nd1 (from str4): %f\n", f1, d1);
    /* Check ajUser ... doesn't support %f */
    /* See also  ajStrToHex */





    /* Assign all strings new values */
    strcpy(txt0, "Text String");
    strcpy(txt1, "TEXT STRING");
    strcpy(txt2, "Text*");
    strcpy(txt3, "Text");
    strcpy(txt4, "Text String 4");
    strcpy(txt5, "Text String 5");

    ajStrAssignC(&str0, "String");   
    ajStrAssignC(&str1, "STRING");   
    ajStrAssignC(&str2, "String*");   
    ajStrAssignC(&str3, "*String");   
    ajStrAssignC(&str4, "String 4");   
    ajStrAssignC(&str5, "String 5");   
    demostringnew_msg("/* After resetting strings */");    





    /*
    ** String comparison functions 
    ** See "@section comparison" in ajstr.c
    */
    ajUserDumpC("/* String comparison functions */");
    boo = ajCharMatchC(txt0, txt1);
    ajUser("ajCharMatchC(txt0 txt1); == %B", boo);
    boo = ajCharMatchCaseC(txt0, txt1);
    ajUser("ajCharMatchCaseC(txt0 txt1); == %B", boo);
    boo = ajCharMatchC(txt0, txt2);
    ajUser("ajCharMatchC(txt0,txt2); == %B", boo);
    boo = ajCharMatchWildC(txt0, txt2);
    ajUser("ajCharMatchWildC(txt0,txt2); == %B", boo);
    boo = ajCharMatchWildS(txt0, str2);
    ajUser("ajCharMatchWildS(txt0,str2); == %B", boo);
    /* See also ajCharMatchWildNextC, ajCharMatchWildWordC
       ... these need checking & documentation updated. */

    boo = ajCharPrefixC(txt0, txt3);
    ajUser("ajCharPrefixC(txt0, txt3); == %B", boo);
    boo = ajCharPrefixS(txt0, str0);
    ajUser("ajCharPrefixS(txt0, str0); == %B", boo);
    boo = ajCharPrefixCaseC(txt5, txt1);
    ajUser("ajCharPrefixCaseC(txt5, txt1); == %B", boo);
    boo = ajCharPrefixCaseC(txt1, txt5);
    ajUser("ajCharPrefixCaseC(txt1, txt5); == %B", boo);
    boo = ajCharPrefixCaseS(txt0, str0);
    ajUser("ajCharPrefixCaseS(txt0, str0); == %B", boo);
    boo = ajCharSuffixC(txt0, txt3);
    ajUser("ajCharSuffixC(txt0, txt3); === %B", boo);
    boo = ajCharSuffixS(txt0, str0);
    ajUser("ajCharSuffixS(txt0, str0); == %B", boo);

    /* See also ajCharSuffixCaseC, ajCharSuffixCaseC, ajCharSuffixCaseS, ajCharSuffixCaseS
       ... these need checking. */
    boo =  ajStrMatchC (str0, txt0);
    ajUser("ajStrMatchC (str0, txt0); == %B", boo);
    boo = ajStrMatchS(str0, str1);
    ajUser("ajStrMatchS(str0, str1); == %B", boo);
    boo = ajStrMatchCaseC(str0, txt0);
    ajUser("ajStrMatchCaseC(str0, txt0); == %B", boo);
    boo = ajStrMatchCaseS(str0, str0);
    ajUser("ajStrMatchCaseS(str0, str0); == %B", boo);


    /*
    ajUser("== %B", boo);

    boo = ajStrMatchWildC(str2, const char* text);
     ajStrMatchWildS  (const AjPStr thys, const AjPStr wild);
     ajStrMatchWildWordC (const AjPStr str, const char* text);
     ajStrMatchWildWordS (const AjPStr str, const AjPStr text);
     ajStrPrefixC(const AjPStr str, const char* txt2);
     ajStrPrefixS(const AjPStr str, const AjPStr str2);
     ajStrPrefixCaseC (const AjPStr str, const char* pref);
     ajStrPrefixCaseS (const AjPStr str, const AjPStr pref);
     ajStrSuffixC (const AjPStr thys, const char* suff);
     ajStrSuffixS (const AjPStr thys, const AjPStr suff);
    */





    /**************************************************************************/
    /* String substitution functions (See "@section substitution" in ajstr.c) */
    /**************************************************************************/

    /*
AjBool     ajStrExchangeCC(AjPStr* Pstr, const char* txt, const char* txtnew);
AjBool     ajStrExchangeCS(AjPStr* Pstr, const char* txt,
                           const AjPStr strnew);
AjBool     ajStrExchangeKK(AjPStr* Pstr, char chr, char chrnew);
AjBool     ajStrExchangeSC(AjPStr* Pstr, const AjPStr str,
                           const char* txtnew);
AjBool     ajStrExchangeSS(AjPStr* Pstr, const AjPStr str,
                           const AjPStr strnew);
AjBool     ajStrExchangeSetCC(AjPStr* Pstr, const char* oldc,
                              const char* newc);
AjBool     ajStrExchangeSetSS(AjPStr* Pstr, const AjPStr str,
                            const AjPStr strnew);
AjBool     ajStrRandom(AjPStr *s);
AjBool     ajStrReverse(AjPStr* Pstr);
    */


    embExit();
    return 0;
}
Exemplo n.º 11
0
static AjBool assemoutWriteBamAlignment(AjPSeqBamBgzf gzfile,
					const AjPAssemRead r,
					AjPSeqBam bam)
{
    AjPSeqBamCore c;
    AjPAssemTag tag;
    unsigned char *dpos;
    const char *s;
    ajuint ilen;
    ajuint slen;
    ajuint i;
    AjIList l = NULL;

    /* optional fields */
    ajint tagvalsize = 0;
    const unsigned char* tagval = 0;
    ajint intval =0;

    /* processing cigar strings*/
    char *t;
    int op;
    long x;


    unsigned char bam_nt16_table[256] =
    {
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,
     15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
     15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
     15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
     15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
    };

    /* bam_write1 for each alignment */

    c = &bam->core;

    ilen = ajStrGetLen(r->Seq);

    c->tid = (int) r->Reference;

    if(r->Flag & BAM_FREVERSE)
	c->pos = r->y1-1;
    else
	c->pos = r->x1-1; /* BAM format is zero based;
                             -1 is translated to 0, meaning unmapped */
    c->bin = 0;
    c->qual = r->MapQ;
    c->l_qname = 1 + ajStrGetLen(r->Name);
    c->flag = r->Flag;
    c->n_cigar = 0;
    c->l_qseq = ilen;
    c->mtid = (int) r->Rnext;
    c->mpos = (int) r->Pnext-1;
    c->isize = r->Tlen;


    /* get cigar string length */
    s = ajStrGetPtr(r->Cigar);
    if (strcmp(s,"*")) /* '*' means unavailable  */
    {
	for (; *s; ++s)
	{
	  if ((isalpha((int)*s)) || (*s=='='))
		++c->n_cigar;
	    else
	      if (!isdigit((int)*s))
		    ajWarn("invalid CIGAR character: %c\n", *s);
	}
    }


    bam->data_len = c->n_cigar*4 + c->l_qname +
        (ilen + 1)/2 + ilen;

    /* allocation for optional tags are made as they are appended */

    if(bam->data_len > bam->m_data)
    {
        AJCRESIZE0(bam->data,bam->m_data, bam->data_len);
        bam->m_data = bam->data_len;
    }

    dpos = bam->data;

    /* copy query name to bam->data */
    memcpy(dpos, ajStrGetPtr(r->Name), c->l_qname);
    dpos += c->l_qname;

    /* copy cigar string to bam->data */
    s = ajStrGetPtr(r->Cigar);
    for (i = 0; i != c->n_cigar; ++i)
    {
	x = strtol(s, &t, 10);
	op = toupper((int)*t);
	if (op == 'M') op = BAM_CMATCH;
	else if (op == 'I') op = BAM_CINS;
	else if (op == 'D') op = BAM_CDEL;
	else if (op == 'N') op = BAM_CREF_SKIP;
	else if (op == 'S') op = BAM_CSOFT_CLIP;
	else if (op == 'H') op = BAM_CHARD_CLIP;
	else if (op == 'P') op = BAM_CPAD;
	else if (op == '=') op = BAM_CEQUAL;
	else if (op == 'X') op = BAM_CDIFF;
	else ajWarn("invalid CIGAR operation: %c",op);
	s = t + 1;
	((ajuint*)dpos)[i] = x << BAM_CIGAR_SHIFT | op;
    }

    if (*s && c->n_cigar)
	ajWarn("unmatched CIGAR operation: %c", *s);

    c->bin = ajSeqBamReg2bin(c->pos, ajSeqBamCalend(c, MAJSEQBAMCIGAR(bam)));

    dpos += c->n_cigar*4;


    /* copy sequence string to bam->data */
    s = ajStrGetPtr(r->Seq);
    slen = (ilen+1)/2;
    for (i = 0; i < slen; ++i)
        dpos[i] = 0;
    for (i = 0; i < ilen; ++i)
        dpos[i/2] |= bam_nt16_table[(ajuint)s[i]] << 4*(1-i%2);
    dpos += slen;

    /* copy quality values to bam->data */
    if(r->SeqQ && !ajStrMatchC(r->SeqQ, "*"))
    {
	s = ajStrGetPtr(r->SeqQ);

	for(i=0;i<ilen;i++)
	    dpos[i]= s[i]-33;

    }
    else
	for(i=0;i<ilen;i++)
	    dpos[i]= 0xff;



    l = ajListIterNewread(r->Tags);
    bam->l_aux=0;
    while (!ajListIterDone(l))
    {

	tag = ajListIterGet(l);

	/* TODO: array type 'B' and other types */

	if(tag->type == 'i' || tag->type == 'I')
	{
	    tagvalsize = 4;
	    ajStrToInt(tag->Comment, &intval);
	    tagval = (unsigned char*)&intval;
	}
	else if(tag->type =='s' || tag->type =='S')
	{
	    tagvalsize = 2;
	    ajStrToInt(tag->Comment, &intval);
	    tagval = (unsigned char*)&intval;
	}
	else if(tag->type =='c' || tag->type =='C')
	{
	    tagvalsize = 1;
	    ajStrToInt(tag->Comment, &intval);
	    tagval = (unsigned char*)&intval;
	}
	else if(tag->type =='A')
	{
	    tagvalsize = 1;
	    tagval = (const unsigned char*)ajStrGetPtr(tag->Comment);
	}
	else if(tag->type =='Z')
	{
	    tagvalsize = ajStrGetLen(tag->Comment)+1;
	    tagval = (const unsigned char*)ajStrGetPtr(tag->Comment);
	}
	else
	{
	    ajWarn("tag type '%c' not yet supported",tag->type);
	    continue;
	}
	ajSeqBamAuxAppend(bam,
		ajStrGetPtr(tag->Name),
		tag->type,
		tagvalsize,
		tagval);


    }
    ajListIterDel(&l);



    ajSeqBamWrite(gzfile, bam);

    return ajTrue;
}
Exemplo n.º 12
0
int main(int argc, char **argv)
{
    ajint     famn      = 0;	 /* Counter for the families.                */
    ajint     nset      = 0;	 /* No. entries in family.                   */
    
    ajint     last_nodeid = 0;   /* SCOP Sunid of last family that was 
				    processed.                               */
    AjPStr    last_node  = NULL; /* Last family that was processed.          */
    AjPStr    exec       = NULL; /* The UNIX command line to be executed.    */
    AjPStr    out        = NULL; /* Name of stamp alignment file.            */
    AjPStr    align      = NULL; /* Name of sequence alignment file.         */
    AjPStr    alignc     = NULL; /* Name of structure alignment file.        */
    AjPStr    log        = NULL; /* Name of STAMP log file.                  */
    AjPStr    dom        = NULL; /* Name of file containing single domain.   */
    AjPStr    set        = NULL; /* Name of file containing set of domains.  */
    AjPStr    scan       = NULL; /* Name of temp. file used by STAMP.        */
    AjPStr    sort       = NULL; /* Name of temp. file used by STAMP.        */
    AjPStr    name       = NULL; /* Base name of STAMP temp files.           */
    AjPStr    pdbnames   = NULL; /* Names of domain pdb files to be passed to
				    TCOFFEEE.                                */
    AjPDir    pdb        = NULL; /* Path of domain coordinate files (pdb 
				    format input).                           */
    AjPDirout daf        = NULL; /* Path of sequence alignment files for output. */
    AjPDirout super      = NULL; /* Path of structure alignment files for output. */
    AjPDirout singlets   = NULL; /* Path of FASTA singlet sequence files for output. */
    AjPStr    temp1      = NULL; /* A temporary string.                      */

    AjPFile   dcfin      = NULL; /* File pointer for original Escop.dat file.*/
    AjPFile   domf       = NULL; /* File pointer for single domain file.     */
    AjPFile   setf       = NULL; /* File pointer for domain set file.        */
    AjPFile   logf       = NULL; /* Log file. */

    AjPDomain domain     = NULL; /* Pointer to domain structure.             */
    AjPDomain prevdomain = NULL; /* Pointer to previous domain structure.    */

    ajint     type       = 0;    /* Type of domain (ajSCOP or ajCATH) in the 
				    DCF file.                                */

    AjPStr   *node       = NULL; /* Node of alignment         .              */
    ajint     noden      = 0;    /*1: Class (SCOP), 2: Fold (SCOP) etc, see 
				   ACD file.                                 */

    AjPStr   *mode       = NULL; /* Mode of operation from acd*/
    ajint     moden      = 0;    /* Program mode, 1: MODE_STAMP, 2: MODE_TCOFFEE (not
				    yet implemented). */
    AjBool    keepsinglets= ajFalse; /*Whether to retain sequences of singlet families
				       and write them to an output file.         */

    AjPStr    temp      = NULL;	/* A temporary string.                       */
    AjPStr    cmd       = NULL; /* The command line to execute t-coffee.     */





    /* Initialise strings etc*/
    last_node = ajStrNew();
    exec     = ajStrNew();
    out      = ajStrNew();
    align    = ajStrNew();
    alignc   = ajStrNew();
    log      = ajStrNew();
    dom      = ajStrNew();
    set      = ajStrNew();
    scan     = ajStrNew();
    sort     = ajStrNew();
    name     = ajStrNew();
    temp     = ajStrNew();
    temp1    = ajStrNew();
    cmd      = ajStrNew();
    pdbnames = ajStrNew();




    /* Read data from acd. */
    embInitPV("domainalign",argc,argv,"DOMALIGN",VERSION);

    dcfin       = ajAcdGetInfile("dcfinfile");
    pdb           = ajAcdGetDirectory("pdbdir");
    daf          = ajAcdGetOutdir("dafoutdir");
    super         = ajAcdGetOutdir("superoutdir");
    singlets      = ajAcdGetOutdir("singletsoutdir");
    node          = ajAcdGetList("node");
    mode          = ajAcdGetList("mode");    
    keepsinglets  = ajAcdGetToggle("keepsinglets");
    logf          = ajAcdGetOutfile("logfile");
   

    /* Convert the selected node and mode to an integer. */
    if(!(ajStrToInt(node[0], &noden)))
	ajFatal("Could not parse ACD node option");
    if(!(ajStrToInt(mode[0], &moden)))
	ajFatal("Could not parse ACD node option");


    /* Initialise random number generator for naming of temp. files. */
    ajRandomSeed();
    ajFilenameSetTempname(&name);


    /* Create names for temp. files. */
    ajStrAssignS(&log, name);	
    ajStrAppendC(&log, ".log");
    ajStrAssignS(&dom, name);	
    ajStrAppendC(&dom, ".dom");
    ajStrAssignS(&set, name);	
    ajStrAppendC(&set, ".set");
    ajStrAssignS(&scan, name);	
    ajStrAppendC(&scan, ".scan");
    ajStrAssignS(&sort, name);
    ajStrAppendC(&sort, ".sort");
    ajStrAssignS(&out, name);	
    ajStrAppendC(&out, ".out");


    /* Initialise last_node with something that is not in SCOP. */
    ajStrAssignC(&last_node,"!!!!!");
    
    

    /* Open STAMP domain set file. */
    if(moden == MODE_STAMP)
    {
	if(!(setf=ajFileNewOutNameS(set)))
	    ajFatal("Could not open domain set file\n");
    }
    

    /* Get domain type. */
    type = ajDomainDCFType(dcfin);


    /* Start of main application loop. */
    while((domain=(ajDomainReadCNew(dcfin, "*", type))))
    {
	/* A new family. */
	if(((domain->Type == ajSCOP) &&
	    (((noden==1) && (last_nodeid != domain->Scop->Sunid_Class))      ||
	     ((noden==2) && (last_nodeid != domain->Scop->Sunid_Fold))       ||
	     ((noden==3) && (last_nodeid != domain->Scop->Sunid_Superfamily))||
	     ((noden==4) && (last_nodeid != domain->Scop->Sunid_Family))))   ||
	   ((domain->Type == ajCATH) &&
	    (((noden==5) && (last_nodeid != domain->Cath->Class_Id))         ||
	     ((noden==6) && (last_nodeid != domain->Cath->Arch_Id))          ||
	     ((noden==7) && (last_nodeid != domain->Cath->Topology_Id))      ||
	     ((noden==8) && (last_nodeid != domain->Cath->Superfamily_Id))   ||
	     ((noden==9) && (last_nodeid != domain->Cath->Family_Id)))))
	{
	    /* If we have done the first family. */
	    if(famn)
	    {

		/* Create the output file for the alignment - the name will
		   be the same as the Sunid for the DOMAIN family. */
		domainalign_writeid(prevdomain, noden, daf, super,
				    &align, &alignc);

		if(moden == MODE_STAMP)
		{
		    /* Close domain set file. */
		    ajFileClose(&setf);	

		    /* Call STAMP. */
		    
		    /* Family with 2 or more entries. */
		    if(nset > 1)
		    {
			domainalign_stamp(prevdomain, 
					  domain, 
					  daf, 
					  super,
					  singlets, 
					  align, 
					  alignc, 
					  dom, 
					  name, 
					  set, 
					  scan, 
					  sort, 
					  log, 
					  out, 
					  keepsinglets, 
					  moden, 
					  noden,
					  nset, 
					  logf);
		    }
		    
		    else if(keepsinglets) /* Singlet family. */	
			domainalign_keepsinglets(prevdomain, noden,
						 singlets, logf);
			

		    /* Open STAMP domain set file. */
		    if(!(setf=ajFileNewOutNameS(set)))
			ajFatal("Could not open domain set file\n");
		}
		else
		{
		    /* Call TCOFEE. */
		    if(nset > 1)
			domainalign_tcoffee(prevdomain, out, align,
					    alignc, pdbnames, noden, logf);
		    else if(keepsinglets) /* Singlet family. */	
			domainalign_keepsinglets(prevdomain, noden,
						 singlets, logf);
		}

		/* Set the number of members of the new family to zero. */
		nset = 0;

		/* Clear TCOFFEE argument. */    
		ajStrSetClear(&pdbnames);
	    }	
	    
	    
	    /* Open, write and close STAMP domain file. */
	    if(moden == MODE_STAMP)
	    {
		if(!(domf=ajFileNewOutNameS(dom)))
		    ajFatal("Could not open domain file\n");
		ajStrAssignS(&temp, ajDomainGetId(domain));
		ajStrFmtLower(&temp);
		ajFmtPrintF(domf, "%S %S { ALL }\n", temp, temp);
		ajFileClose(&domf);	
	    }
	    
	    
	    /* Copy current family name to last_node. */
	    domainalign_writelast(domain, noden, &last_node, &last_nodeid);
	    
	    /* Copy current domain pointer to prevdomain. */
	    ajDomainDel(&prevdomain);
	    prevdomain=NULL;
	    ajDomainCopy(&prevdomain, domain);

	    /* Increment family counter. */
	    famn++;
	}
	
						
	ajStrAssignS(&temp, ajDomainGetId(domain));
	ajStrFmtLower(&temp);

	/* Write STAMP domain set file. */
	if(moden == MODE_STAMP)
	    ajFmtPrintF(setf, "%S %S { ALL }\n", temp, temp);
	/* Write TCOFFEE argument. */    
	else
	{
	    ajStrAppendS(&pdbnames, ajDirGetPath(pdb));
	    ajStrAppendS(&pdbnames, temp);
	    ajStrAppendC(&pdbnames, ".");
	    ajStrAppendS(&pdbnames, ajDirGetExt(pdb));
	    ajStrAppendC(&pdbnames, " ");
	}
	
	ajDomainDel(&domain);

	/* Increment number of members in family. */
	nset++;
    }
    
    /* End of main application loop. */
    domain=prevdomain;
    

    ajFmtPrint("\nProcessing node %d\n", last_nodeid);
    


    /* Create the output file for the alignment - the name will
       be the same as the Sunid for the DOMAIN family. */
    domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc);



    /* Code to process last family. */
    if(moden == MODE_STAMP)
    {
	/*Close domain set file. */
	ajFileClose(&setf);	

		
	/*    ajFmtPrint("\n***** SECOND CALL\n");. */
	if(nset > 1)
	{
	    domainalign_stamp(prevdomain, 
			      domain, 
			      daf, 
			      super,
			      singlets, 
			      align, 
			      alignc, 
			      dom, 
			      name, 
			      set, 
			      scan, 
			      sort, 
			      log, 
			      out, 
			      keepsinglets, 
			      moden, 
			      noden,
			      nset, 
			      logf);
	}
	
	else if(keepsinglets) /* Singlet family. */	
	    domainalign_keepsinglets(prevdomain, noden, singlets, logf);
			
    }
    else
    {
	/* Call TCOFEE. */
	if(nset > 1)
	    domainalign_tcoffee(prevdomain, out, align, alignc, 
				pdbnames, noden, logf);
	else if(keepsinglets) /* Singlet family. */	
	    domainalign_keepsinglets(prevdomain, noden, singlets, logf);
    }


    /* Remove all temporary files. */

    ajSysFileUnlinkS(log);
    ajSysFileUnlinkS(dom);
    ajSysFileUnlinkS(set);
    ajSysFileUnlinkS(scan);
    ajSysFileUnlinkS(sort);
    ajSysFileUnlinkS(out);
    ajStrAssignS(&temp, name);	
    ajStrAppendC(&temp, ".mat");
    ajSysFileUnlinkS(temp);



    /* Tidy up*/
    ajDomainDel(&domain);
    ajFileClose(&dcfin);	
    ajStrDel(&last_node);
    ajStrDel(&exec);
    ajStrDel(&log);
    ajStrDel(&dom);
    ajStrDel(&set);
    ajStrDel(&scan);
    ajStrDel(&sort);
    ajStrDel(&name);
    ajStrDel(&out);
    ajStrDel(&align);
    ajStrDel(&alignc);
    ajStrDel(&pdbnames);
    ajDirDel(&pdb); 
    ajDiroutDel(&daf); 
    ajDiroutDel(&super); 
    ajDiroutDel(&singlets); 
    ajStrDel(&temp); 
    ajStrDel(&temp1); 
    ajStrDel(&node[0]);
    AJFREE(node);
    ajStrDel(&mode[0]);
    AJFREE(mode);
    ajFileClose(&logf);
    
    ajExit();
    return 0;
}
Exemplo n.º 13
0
/* @prog seqnr **************************************************************
**
** Removes redundancy from DHF files (domain hits files) or other files of 
** sequences.
**
****************************************************************************/
int main(int argc, char **argv)
{
    /* Variable declarations */
    AjPList    in        = NULL;    /* Names of domain hits files (input).   */
    AjPStr     inname    = NULL;    /* Full name of the current DHF file.    */
    AjPFile    inf       = NULL;    /* Current DHF file.                     */
    EmbPHitlist infhits   = NULL;   /* Hitlist from DHF file                 */
    AjBool     dosing    = ajFalse; /* Filter using singlet sequences.       */
    AjPDir     singlets  = NULL;    /* Singlets (input).                     */
    AjBool     dosets    = ajFalse; /* Filter using sets of sequences.       */
    AjPDir     insets    = NULL;    /* Sets (input).                         */
    AjPStr     mode      = NULL;    /* Mode of operation                     */
    ajint      moden     = 0;       /* Mode 1: single threshold for redundancy
				       removal, 2: lower and upper thresholds
				       for redundancy removal.               */
    float      thresh    = 0.0;     /* Threshold for non-redundancy.         */
    float      threshlow = 0.0;	    /* Threshold (lower limit).              */
    float      threshup  = 0.0;	    /* Threshold (upper limit).              */
    AjPMatrixf matrix    = NULL;    /* Substitution matrix.                  */
    float      gapopen   = 0.0;     /* Gap insertion penalty.                */
    float      gapextend = 0.0;     /* Gap extension penalty.                */
    AjPDirout  out       = NULL;    /* Domain hits files (output).           */
    AjPFile    outf      = NULL;    /* Current DHF file (output).            */
    AjBool     dored     = ajFalse; /* True if redundant hits are output.    */
    AjPDirout  outred    = NULL;    /* DHF files for redundant hits (output).*/
    AjPFile    redf      = NULL;    /* Current DHF file redundancy (output). */
    AjPStr     outname   = NULL;    /* Name of output file (re-used).        */
    AjPFile    logf      = NULL;    /* Log file pointer.                     */
 
    AjBool     ok        = ajFalse; /* Housekeeping.                         */
    AjPSeqset  seqset    = NULL;    /* Seqset (re-used).                     */
    AjPSeqin   seqin     = NULL;    /* Seqin (re-used).                      */
    AjPList    seq_list  = NULL;    /* Main list for redundancy removal.     */
    EmbPDmxNrseq seq_tmp = NULL;    /* Temp. pointer for making seq_list.    */
    ajint      seq_siz   = 0;       /* Size of seq_list.                     */
    AjPUint    keep      = NULL;    /* 1: Sequence in seq_list was classed as
				       non-redundant, 0: redundant.          */
    AjPUint    nokeep    = NULL;    /* Inversion of keep array.              */
    ajint      nseqnr    = 0;       /* No. non-redundant seqs. in seq_list.  */
    

    AjPStr     filtername= NULL;    /* Name of filter file (re-used).        */
    AjPFile    filterf   = NULL;    /* Current filter file.                  */
    EmbPHitlist hitlist   = NULL;   /* Hitlist from input file (re-used).    */
    AjPScopalg scopalg   = NULL;    /* Scopalg from input file.              */
    ajint      x         = 0;       /* Housekeeping.                         */
    

    


    /* Read data from acd. */
    embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION);

    in        = ajAcdGetDirlist("dhfinpath");
    dosing    = ajAcdGetToggle("dosing");
    singlets    = ajAcdGetDirectory("singletsdir");
    dosets    = ajAcdGetToggle("dosets");
    insets    = ajAcdGetDirectory("insetsdir");
    mode      = ajAcdGetListSingle("mode");  
    thresh    = ajAcdGetFloat("thresh");
    threshlow = ajAcdGetFloat("threshlow");
    threshup  = ajAcdGetFloat("threshup");
    matrix    = ajAcdGetMatrixf("matrix");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    out       = ajAcdGetOutdir("dhfoutdir");
    dored     = ajAcdGetToggle("dored");
    outred    = ajAcdGetOutdir("redoutdir");
    logf      = ajAcdGetOutfile("logfile");



    /* Housekeeping. */
    filtername  = ajStrNew();
    outname     = ajStrNew();


    if(!(ajStrToInt(mode, &moden)))
	ajFatal("Could not parse ACD node option");


    
    /* Process each DHF (input) in turn. */
    while(ajListPop(in,(void **)&inname))
    {
	ajFmtPrint("Processing %S\n", inname);
	ajFmtPrintF(logf, "//\n%S\n", inname);


	seq_list    = ajListNew();
	keep        = ajUintNew();  	    
	nokeep      = ajUintNew();  	    	
	
	/**********************************/
	/*         Open DHF file          */
	/**********************************/
	if((inf = ajFileNewInNameS(inname)) == NULL)
	    ajFatal("Could not open DHF file %S", inname);

	/* Read DHF file. */
	ok = ajFalse;
	if(!(infhits = embHitlistReadFasta(inf)))
	{
	    ajWarn("embHitlistReadFasta call failed in seqnr");
	    ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n");
	
	    /* Read sequence set instead. */ 
	    seqset = ajSeqsetNew();
	    seqin  = ajSeqinNew();
	    ajSeqinUsa(&seqin, inname);
	
	    if(!(ajSeqsetRead(seqset, seqin)))
		ajFatal("SeqsetRead failed in seqsearch_psialigned");

	    if(ajSeqsetGetSize(seqset))
		ok = ajTrue;
	}
	else
	    if(infhits->N)
		ok = ajTrue;

	/* Close DHF file. */
	ajFileClose(&inf);
	
	/* Process empty DHF files (should never occur). */
	if(!ok)
	{		
	    ajWarn("Empty input file %S\n", inname);
	    ajFmtPrintF(logf, "Empty input file %S\n", inname);
	    if(infhits)
		embHitlistDel(&infhits);
	    if(seqset)
		ajSeqsetDel(&seqset);
	    if(seqin)
		ajSeqinDel(&seqin);
	    continue;
	}	

	
	/* 1.  Create list of sequences from the main input directory.. */
	if(infhits)
	{
	    for(x=0; x<infhits->N; x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc);
		ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq);
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	} 
	else
	{	 
	    for(x=0;x<ajSeqsetGetSize(seqset);x++)
	    {
		AJNEW0(seq_tmp);
		seq_tmp->Seq = ajSeqNew();
		ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x));
		ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x));
		ajListPushAppend(seq_list,seq_tmp);		
	    }
	    ajSeqsetDel(&seqset);
	    ajSeqinDel(&seqin);
	}
	
    

	/**********************************/
	/*   Open singlets filter file    */
	/**********************************/
	if(dosing)
	{
	    /* Open singlets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(singlets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(singlets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DHF file %S",
		       filtername);
		ajFmtPrint("Could not open singlets filter file %S",
			   filtername);
	    }
	    else
	    {
		/* Read DHF file. */
		ok = ajFalse;
		if(!(hitlist = embHitlistReadFasta(filterf)))
		{
		    ajWarn("embHitlistReadFasta call failed in seqnr");
		    ajFmtPrintF(logf, 
				"embHitlistReadFasta call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
	
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(hitlist->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty singlets filter file %S\n", filtername);
		    ajFmtPrintF(logf, "Empty singlets filter file %S\n", 
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files). */
		if(hitlist)
		{
		    for(x=0; x<hitlist->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc);
			ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq);
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    embHitlistDel(&hitlist);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	
	/**********************************/
	/*      Open sets filter file     */
	/**********************************/
	if(dosets)
	{
	    /* Open sets file. */
	    ajStrAssignS(&filtername, inname);
	    ajFilenameTrimPathExt(&filtername);
	    ajStrInsertS(&filtername, 0, ajDirGetPath(insets));
	    ajStrAppendC(&filtername, ".");
	    ajStrAppendS(&filtername, ajDirGetExt(insets));

	
	    if((filterf = ajFileNewInNameS(filtername)) == NULL)
	    {
		ajWarn("Could not open DAF file %S", filtername);
		ajFmtPrint("Could not open sets filter file %S", filtername);
	    }
	    else
	    {
		/* Read DAF file. */
		ok = ajFalse;

		if(!(ajDmxScopalgRead(filterf, &scopalg)))
		{
		    ajWarn("ajDmxScopalgRead call failed in seqnr");
		    ajFmtPrintF(logf,
				"ajDmxScopalgRead call failed in seqnr\n");
	
		    /* Read sequence set instead. */ 
		    seqset = ajSeqsetNew();
		    seqin  = ajSeqinNew();
		    ajSeqinUsa(&seqin, inname);
		    
		    if(!(ajSeqsetRead(seqset, seqin)))
			ajFatal("SeqsetRead failed in seqnr");

		    if(ajSeqsetGetSize(seqset))
			ok = ajTrue;
		}
		else
		    if(scopalg->N)
			ok = ajTrue;


		/* Close DHF file. */
		ajFileClose(&filterf);

	
		/* Process empty DHF files (should never occur). */
		if(!ok)
		{		
		    ajWarn("Empty sets filter file %S\n",
			   filtername);
		    ajFmtPrintF(logf, "Empty sets filter file %S\n",
				filtername);
		    /* No continue this time. */
		}	

	
		/* 2. Add sequences from filter directories to List but mark 
		   them up (they are considered in the redundancy calculation 
		   but never appear in the output files).. */
		if(scopalg)
		{
		    for(x=0; x<scopalg->N; x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]);
			ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]);
			/* Remove gap char's & whitespace. */
			ajStrRemoveGap(&seq_tmp->Seq->Seq);  
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajDmxScopalgDel(&scopalg);
		} 
		else
		{	 
		    for(x=0;x<ajSeqsetGetSize(seqset);x++)
		    {
			AJNEW0(seq_tmp);
			seq_tmp->Seq = ajSeqNew();
			seq_tmp->Garbage = ajTrue;
			ajStrAssignS(&seq_tmp->Seq->Acc,
				     ajSeqsetGetseqAccS(seqset, x));
			ajStrAssignS(&seq_tmp->Seq->Seq,
				     ajSeqsetGetseqSeqS(seqset, x));
			ajListPushAppend(seq_list,seq_tmp);		
		    }
		    ajSeqsetDel(&seqset);
		    ajSeqinDel(&seqin);
		}
	    }
	}
	
	
	/* 4. Identify redundant domains.. */
	if(moden == 1)
	{
	    if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, thresh, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}		
	else
	{
	    if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, 
			     gapextend, threshlow, threshup, ajTrue)))
		ajFatal("embDmxSeqNR failure in seqnr");
	}	
	seq_siz = ajListGetLength(seq_list);
	for(x=0; x<seq_siz; x++)
	    if(ajUintGet(keep, x) == 1)
		ajUintPut(&nokeep, x, 0);
	    else
		ajUintPut(&nokeep, x, 1);
	

	/* Create output files. */
	ajStrAssignS(&outname, inname);
	ajFilenameTrimPathExt(&outname);
	outf = ajFileNewOutNameDirS(outname, out);
	if(dored)
	    redf = ajFileNewOutNameDirS(outname, outred);
	

	/* 5. Write non-redundant domains to main output directory.  
	   6.  If specified, write redundant domains to output directory. */
	embHitlistWriteSubsetFasta(outf, infhits, keep);
	if(dored)
	    embHitlistWriteSubsetFasta(redf, infhits, nokeep);

	embHitlistDel(&infhits);
	ajFileClose(&outf);
	ajFileClose(&redf);
	ajStrDel(&inname);

	while(ajListPop(seq_list, (void **) &seq_tmp))
	{
	    ajSeqDel(&seq_tmp->Seq);
	    AJFREE(seq_tmp);
	}
	ajListFree(&seq_list);
	ajUintDel(&keep);	
	ajUintDel(&nokeep);
    }	    


    /* Tidy up. */
    ajListFree(&in);
    if(singlets)
	ajDirDel(&singlets);
    if(insets)
	ajDirDel(&insets);
    ajDiroutDel(&out);
    if(outred)
	ajDiroutDel(&outred);
    ajFileClose(&logf);

    ajMatrixfDel(&matrix);

    ajStrDel(&filtername);
    ajStrDel(&outname);
    ajStrDel(&mode);


    embExit();
    return 0;
}
Exemplo n.º 14
0
int main(int argc, char **argv)
{
    ajint begin, end;
    AjPSeqall seqall;
    AjPSeq seq;
    EmbPShow ss;
    AjPFile outfile;
    AjPStr tablename;
    ajint table;
    AjPRange uppercase;
    AjPRange highlight;
    AjBool threeletter;
    AjBool numberseq;
    AjBool nameseq;
    ajint width;
    ajint length;
    ajint margin;
    AjBool description;
    ajint offset;
    AjBool html;
    AjPStr descriptionline;
    ajint orfminsize;
    AjPTrn trnTable;
    AjBool translation;
    AjBool reverse;
    AjBool cutlist;
    AjBool flat;
    EmbPMatMatch mm = NULL;

    AjPStr *framelist;
    AjBool frames[6];   /* frames to be translated 1 to 3, -1 to -3 */
	 
    /* stuff for tables and lists of enzymes and hits */
    ajint default_mincuts = 1;
    ajint default_maxcuts = 2000000000;
    AjPTable hittable; /* enzyme hits */

    /* stuff lifted from Alan's 'restrict.c' */
    AjPStr enzymes = NULL;
    ajint mincuts;
    ajint maxcuts;
    ajint sitelen;
    AjBool single;
    AjBool blunt;
    AjBool sticky;
    AjBool ambiguity;
    AjBool plasmid;
    AjBool commercial;
    AjBool limit;
    AjBool methyl;
    AjPFile enzfile  = NULL;
    AjPFile equfile  = NULL;
    AjPFile methfile = NULL;
    AjPTable retable = NULL;
    ajint hits;
    AjPList restrictlist = NULL;

    embInit("remap", argc, argv);

    seqall      = ajAcdGetSeqall("sequence");
    outfile     = ajAcdGetOutfile("outfile");
    tablename   = ajAcdGetListSingle("table");
    uppercase   = ajAcdGetRange("uppercase");
    highlight   = ajAcdGetRange("highlight");
    threeletter = ajAcdGetBoolean("threeletter");
    numberseq   = ajAcdGetBoolean("number");
    width       = ajAcdGetInt("width");
    length      = ajAcdGetInt("length");
    margin      = ajAcdGetInt("margin");
    nameseq     = ajAcdGetBoolean("name");
    description = ajAcdGetBoolean("description");
    offset      = ajAcdGetInt("offset");
    html        = ajAcdGetBoolean("html");
    orfminsize  = ajAcdGetInt("orfminsize");
    translation = ajAcdGetBoolean("translation");
    reverse     = ajAcdGetBoolean("reverse");
    cutlist     = ajAcdGetBoolean("cutlist");
    flat        = ajAcdGetBoolean("flatreformat");
    framelist   = ajAcdGetList("frame");
    
    /*  restriction enzyme stuff */
    mincuts    = ajAcdGetInt("mincuts");
    maxcuts    = ajAcdGetInt("maxcuts");
    sitelen    = ajAcdGetInt("sitelen");
    single     = ajAcdGetBoolean("single");
    blunt      = ajAcdGetBoolean("blunt");
    sticky     = ajAcdGetBoolean("sticky");
    ambiguity  = ajAcdGetBoolean("ambiguity");
    plasmid    = ajAcdGetBoolean("plasmid");
    commercial = ajAcdGetBoolean("commercial");
    limit      = ajAcdGetBoolean("limit");
    enzymes    = ajAcdGetString("enzymes");
    methfile   = ajAcdGetDatafile("mfile");
    methyl     = ajAcdGetBoolean("methylation");
    
    if(!blunt  && !sticky)
	ajFatal("Blunt/Sticky end cutters shouldn't both be disabled.");

    /* get the number of the genetic code used */
    ajStrToInt(tablename, &table);
    trnTable = ajTrnNewI(table);

    /* read the local file of enzymes names */
    remap_read_file_of_enzyme_names(&enzymes);

    /* get the frames to be translated */
    remap_GetFrames(framelist, frames);
	 
    while(ajSeqallNext(seqall, &seq))
    {
	/* get begin and end positions */
	begin = ajSeqGetBegin(seq)-1;
	end   = ajSeqGetEnd(seq)-1;

	/* do the name and description */
	if(nameseq)
	{
	    if(html)
		ajFmtPrintF(outfile, "<H2>%S</H2>\n",
				   ajSeqGetNameS(seq));
	    else
		ajFmtPrintF(outfile, "%S\n", ajSeqGetNameS(seq));
	}

	if(description)
	{
	    /*
	    **  wrap the description line at the width of the sequence
	    **  plus margin
	    */
	    if(html)
		ajFmtPrintF(outfile, "<H3>%S</H3>\n",
				   ajSeqGetDescS(seq));
	    else
	    {
		descriptionline = ajStrNew();
		ajStrAssignS(&descriptionline, ajSeqGetDescS(seq));
		ajStrFmtWrap(&descriptionline, width+margin);
		ajFmtPrintF(outfile, "%S\n", descriptionline);
		ajStrDel(&descriptionline);
	    }
	}

	/* get the restriction cut sites */
	/*
	**  most of this is lifted from the program 'restrict.c' by Alan
	**  Bleasby
	 */
	if(single)
	    maxcuts=mincuts=1;
	retable = ajTablestrNew(EQUGUESS);
	enzfile = ajDatafileNewInNameC(ENZDATA);
	if(!enzfile)
	    ajFatal("Cannot locate enzyme file. Run REBASEEXTRACT");

	if(limit)
	{
	    equfile = ajDatafileNewInNameC(EQUDATA);
	    if(!equfile)
		limit = ajFalse;
	    else
		remap_read_equiv(&equfile, &retable, commercial);
	}

	ajFileSeek(enzfile, 0L, 0);
	restrictlist = ajListNew();
	/* search for hits, but don't use mincuts and maxcuts criteria yet */
	hits = embPatRestrictMatch(seq, begin+1, end+1, enzfile, methfile,
                                   enzymes, sitelen,plasmid, ambiguity,
                                   default_mincuts, default_maxcuts, blunt,
                                   sticky, commercial, methyl,
				   restrictlist);

	ajDebug("Remap found %d hits\n", hits);

	if(hits)
	{
	    /* this bit is lifted from printHits */
	    embPatRestrictRestrict(restrictlist, hits, !limit,
					  ajFalse);
	    if(limit)
		remap_RestrictPreferred(restrictlist,retable);
	}


	ajFileClose(&enzfile);
	ajFileClose(&methfile);


	/*
	** Remove those violating the mincuts and maxcuts
	** criteria, but save them in hittable for printing out later.
	** Keep a count of how many hits each enzyme gets in hittable.
	*/
        hittable = ajTablestrNewCase(TABLEGUESS);
	remap_RemoveMinMax(restrictlist, hittable, mincuts, maxcuts);


	/* make the Show Object */
	ss = embShowNew(seq, begin, end, width, length, margin, html, offset);

	if(html)
	    ajFmtPrintF(outfile, "<PRE>");

	/* create the format to display */
	embShowAddBlank(ss);
	embShowAddRE(ss, 1, restrictlist, plasmid, flat);
	embShowAddSeq(ss, numberseq, threeletter, uppercase, highlight);

	if(!numberseq)
	    embShowAddTicknum(ss);
	embShowAddTicks(ss);

	if(reverse)
	{
	    embShowAddComp(ss, numberseq);
	    embShowAddRE(ss, -1, restrictlist, plasmid, flat);
	}


	if(translation)
	{
	    if(reverse)
		embShowAddBlank(ss);

            if(frames[0])	    
	      embShowAddTran(ss, trnTable, 1, threeletter,
			     numberseq, NULL, orfminsize,
			     AJFALSE, AJFALSE, AJFALSE, AJFALSE);
            if(frames[1])
	      embShowAddTran(ss, trnTable, 2, threeletter,
			     numberseq, NULL, orfminsize,
			     AJFALSE, AJFALSE, AJFALSE, AJFALSE);
            if(frames[2])
	      embShowAddTran(ss, trnTable, 3, threeletter,
			     numberseq, NULL, orfminsize,
			     AJFALSE, AJFALSE, AJFALSE, AJFALSE);
	    
	    if(reverse)
	    {
		embShowAddTicks(ss);
                if(frames[5])
		  embShowAddTran(ss, trnTable, -3, threeletter,
			         numberseq, NULL, orfminsize,
			         AJFALSE, AJFALSE, AJFALSE, AJFALSE);
                if(frames[4])
		  embShowAddTran(ss, trnTable, -2, threeletter,
			         numberseq, NULL, orfminsize,
			         AJFALSE, AJFALSE, AJFALSE, AJFALSE);
                if(frames[3])
		  embShowAddTran(ss, trnTable, -1, threeletter,
			         numberseq, NULL, orfminsize,
			         AJFALSE, AJFALSE, AJFALSE, AJFALSE);
	    }
	}

	embShowPrint(outfile, ss);

	/* display a list of the Enzymes that cut and don't cut */
	if(cutlist)
	{
	    remap_CutList(outfile, hittable,
	    		limit, html, mincuts, maxcuts);
	    remap_NoCutList(outfile, hittable, html, enzymes, blunt,
			sticky, sitelen, commercial, ambiguity, 
			limit, retable);
	}

	/* add a gratuitous newline at the end of the sequence */
	ajFmtPrintF(outfile, "\n");

	/* tidy up */
	embShowDel(&ss);

	while(ajListPop(restrictlist,(void **)&mm))
	    embMatMatchDel(&mm);
	ajListFree(&restrictlist);

        remap_DelTable(&hittable);

	ajTablestrFree(&retable);
    }


    ajTrnDel(&trnTable);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajFileClose(&outfile);
    ajStrDel(&tablename);
    ajStrDel(&enzymes);
    ajStrDelarray(&framelist);

    ajRangeDel(&uppercase);
    ajRangeDel(&highlight);

    embExit();

    return 0;
}
Exemplo n.º 15
0
int main(int argc, char **argv)
{
    ajint i;
    ajint numseq;
    ajint j = 0;
    ajint numres;
    ajint count;
    ajint k;
    ajint kmax;
    float defheight;
    float currentscale;
    AjPStr shade = NULL;
    AjPFloat pair  = NULL;
    AjPGraph graph = NULL;
    AjPMatrix cmpmatrix = NULL;
    AjPSeqCvt cvt = NULL;
    AjPStr matcodes = NULL;
    AjBool consensus;
    AjBool colourbyconsensus;
    AjBool colourbyresidues;
    AjBool colourbyshade = AJFALSE;
    AjBool boxit;
    AjBool boxcol;
    AjBool portrait;
    AjBool collision;
    ajint identity;
    AjBool listoptions;
    ajint alternative;
    AjPStr altstr = NULL;
    AjPStr sidentity = NULL;
    AjPStr ssimilarity = NULL;
    AjPStr sother = NULL;
    AjPStr sboxcolval = NULL;
    AjPStr options = NULL;
    /*    ajint showscore = 0; */
    ajint iboxcolval = 0;
    ajint cidentity = RED;
    ajint csimilarity = GREEN;
    ajint cother = BLACK;
    float fxp;
    float fyp;
    float yincr;
    float y;
    ajint ixlen;
    ajint iylen;
    ajint ixoff;
    ajint iyoff;
    char res[2] = " ";

    float *score = 0;
    float scoremax = 0;

    float *identical = NULL;
    ajint identicalmaxindex;
    float *matching = NULL;
    ajint matchingmaxindex;

    float *colcheck = NULL;

    ajint **matrix;
    ajint m1 = 0;
    ajint m2 = 0;
    ajint ms = 0;
    ajint highindex = 0;
    ajint myindex;
    ajint *previous = 0;
    AjBool iscons = ajFalse;
    ajint currentstate = 0;
    ajint oldfg = 0;
    float fold = 0.0;
    ajint *colmat = 0;
    ajint *shadecolour = 0;
    /* float identthresh = 1.5; */
    /* float simthresh = 1.0; */
    /* float relthresh = 0.5; */
    float part = 0.0;
    const char *cptr;
    ajint resbreak;
    float fplural;
    float ystart;
    float xmin;
    float xmax;
    float xmid;
    AjPTime ajtime;
    ajint gapcount = 0;
    ajint countforgap = 0;
    ajint boxindex;
    float max;
    ajint matsize;
    ajint seqperpage = 0;
    ajint startseq;
    ajint endseq;
    ajint newILend = 0;
    ajint newILstart;
    void *freeptr;
    ajint itmp;
    
    embInit("prettyplot", argc, argv);

    seqset   = ajAcdGetSeqset("sequences");
    numres   = ajAcdGetInt("residuesperline");
    resbreak = ajAcdGetInt("resbreak");

    ajSeqsetFill(seqset);	/* Pads sequence set with gap characters */
    numseq = ajSeqsetGetSize(seqset);

    graph             = ajAcdGetGraph("graph");
    colourbyconsensus = ajAcdGetBoolean("ccolours");
    colourbyresidues  = ajAcdGetBoolean("docolour");
    shade             = ajAcdGetString("shade");
    pair              = ajAcdGetArray("pair");
    identity          = ajAcdGetInt("identity");
    boxit             = ajAcdGetBoolean("box");

    ajtime = ajTimeNewTodayFmt("daytime");

    ajSeqsetTrim(seqset);
    /* offset = ajSeqsetGetOffset(seqset); Unused */

    ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset));

    if(boxit)
    {
	AJCNEW(seqboxptr, numseq);
	for(i=0;i<numseq;i++)
	    AJCNEW(seqboxptr[i], ajSeqsetGetLen(seqset));
    }
    boxcol      = ajAcdGetBoolean("boxcol");
    sboxcolval  = ajAcdGetString("boxuse");

    if(boxcol)
    {
	iboxcolval = ajGraphicsCheckColourS(sboxcolval);
	if(iboxcolval == -1)
	    iboxcolval = GREY;
    }

    consensus = ajAcdGetBoolean("consensus");
    if(consensus)
    {
	AJCNEW(constr, ajSeqsetGetLen(seqset)+1);
	constr[0] = '\0';
    }
    shownames   = ajAcdGetBoolean("name");
    shownumbers = ajAcdGetBoolean("number");
    charlen     = ajAcdGetInt("maxnamelen");
    fplural     = ajAcdGetFloat("plurality");
    portrait    = ajAcdGetBoolean("portrait");
    collision   = ajAcdGetBoolean("collision");
    listoptions = ajAcdGetBoolean("listoptions");
    altstr = ajAcdGetListSingle("alternative");
    cmpmatrix   = ajAcdGetMatrix("matrixfile");

    ajStrToInt(altstr, &alternative);

    matrix = ajMatrixGetMatrix(cmpmatrix);
    cvt = ajMatrixGetCvt(cmpmatrix);
    matsize = ajMatrixGetSize(cmpmatrix);

    AJCNEW(identical,matsize);
    AJCNEW(matching,matsize);
    AJCNEW(colcheck,matsize);

    numgaps = numres/resbreak;
    numgaps--;

    if(portrait)
    {
	ajGraphicsSetPortrait(1);
	ystart = (float) 75.0;
    }
    else
	ystart = (float) 75.0;

    /* pair is an array of three non-negative floats */

    /* identthresh = ajFloatGet(pair,0); Unused */
    /* simthresh = ajFloatGet(pair,1); Unused */
    /* relthresh = ajFloatGet(pair,2); Unused */

    /*
    ** shade is a formatted 4-character string. Characters BLPW only.
    ** controlled by a pattern in ACD.
    */

    if(ajStrGetLen(shade))
    {
	AJCNEW(shadecolour,4);
	cptr = ajStrGetPtr(shade);
	for(i=0;i<4;i++){
	    if(cptr[i]== 'B' || cptr[i]== 'b')
		shadecolour[i] = BLACK;
	    else if(cptr[i]== 'L' || cptr[i]== 'l')
		shadecolour[i] = BROWN;
	    else if(cptr[i]== 'P' || cptr[i]== 'p')
		shadecolour[i] = WHEAT;
	    else if(cptr[i]== 'W' || cptr[i]== 'w')
		shadecolour[i] = WHITE;
	}

	colourbyconsensus = colourbyresidues = ajFalse;
	colourbyshade = ajTrue;
    }

/*
** we can colour by consensus or residue but not both
** if we have to choose, use the consensus
*/

    if(colourbyconsensus && colourbyresidues)
	colourbyconsensus = AJFALSE;

    sidentity = ajAcdGetString("cidentity");
    ssimilarity = ajAcdGetString("csimilarity");
    sother = ajAcdGetString("cother");

    if(colourbyconsensus)
    {
	cidentity = ajGraphicsCheckColourS(sidentity);
	if(cidentity == -1)
	    cidentity = RED;

	csimilarity = ajGraphicsCheckColourS(ssimilarity);
	if(csimilarity == -1)
	    csimilarity = GREEN;


	cother = ajGraphicsCheckColourS(sother);
	if(cother == -1)
	    cother = BLACK;

    }
    else if(colourbyresidues)
    {
	matcodes = ajMatrixGetCodes(cmpmatrix);
	if(ajSeqsetIsProt(seqset))
	    colmat = ajGraphicsBasecolourNewProt(matcodes);
	else
	    colmat = ajGraphicsBasecolourNewNuc(matcodes);
    }


    /* output the options used as the subtitle for the bottom of the graph */
    if(listoptions)
    {
	ajStrAssignC(&options,"");
	ajFmtPrintAppS(&options,"-plurality %.1f",fplural);

	if(collision)
	    ajStrAppendC(&options," -collision");
	else
	    ajStrAppendC(&options," -nocollision");

	if(boxit)
	    ajStrAppendC(&options," -box");
	else
	    ajStrAppendC(&options," -nobox");

	if(boxcol)
	    ajStrAppendC(&options," -boxcol");
	else
	    ajStrAppendC(&options," -noboxcol");

	if(colourbyconsensus)
	    ajStrAppendC(&options," -colbyconsensus");
	else if(colourbyresidues)
	    ajStrAppendC(&options," -colbyresidues");
	else if(colourbyshade)
	    ajStrAppendC(&options," -colbyshade");
	else
	    ajStrAppendC(&options," -nocolour");

	if(alternative==2)
	    ajStrAppendC(&options," -alt 2");
	else if(alternative==1)
	    ajStrAppendC(&options," -alt 1");
	else if(alternative==3)
	    ajStrAppendC(&options," -alt 3");
    }


    AJCNEW(seqcolptr, numseq);
    for(i=0;i<numseq;i++)
	AJCNEW(seqcolptr[i], ajSeqsetGetLen(seqset));

    AJCNEW(seqcharptr, numseq);
    AJCNEW(seqnames, numseq);
    AJCNEW(score, numseq);
    AJCNEW(previous, numseq);
    AJCNEW(seqcount, numseq);

    for(i=0;i<numseq;i++)
    {
	ajSeqsetFmtUpper(seqset);
	seqcharptr[i] =  ajSeqsetGetseqSeqC(seqset, i);
	seqnames[i] = 0;
	ajStrAppendS(&seqnames[i],ajSeqsetGetseqNameS(seqset, i));
	ajStrTruncateLen(&seqnames[i],charlen);
	previous[i] = 0;
	seqcount[i] = 0;
    }

    /*
    ** user will pass the number of residues to fit a page
    ** therefore we now need to calculate the size of the chars
    ** based on this and get the new char width.
    ** 'charlen' maximum characters for the name (truncated above)
    */

    ajGraphicsGetCharsize(&defheight,&currentscale);

    xmin = -charlen - (float)2.0;
    xmax = (float)numres+(float)11.0+(float)(numres/resbreak);
    xmid = (xmax + xmin)/(float)2.0;

    ajGraphOpenWin(graph, xmin, xmax,
		   (float)0.0, ystart+(float)1.0);
 
    ajGraphGetParamsPage(graph, &fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff);

    if(portrait)
    {
        itmp = ixlen;
        ixlen = iylen;
        iylen = itmp;
    }

    ajGraphicsGetCharsize(&defheight,&currentscale);

    ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen+1)*
                                          (currentscale * (float) 1.5)))/
                                           currentscale);

/*    ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen)*
                                          (currentscale+(float)1.0)))/
                                          currentscale); */

    ajGraphicsGetCharsize(&defheight,&currentscale);

    yincr = (currentscale + (float)3.0)*(float)0.3;

/*
** If we have titles (now the standard graph title and subtitle and footer)
** leave 7 rows of space for them
*/
    y=ystart-(float)7.0;

    if(ajStrGetLen(options))
    {
	fold = ajGraphicsSetCharscale(1.0);
	ajGraphicsDrawposTextAtmid(xmid,2.0,
                                   ajStrGetPtr(options));
	ajGraphicsSetCharscale(fold);
    }

/* if sequences per page not set then calculate it */

    if(!seqperpage)
    {
	seqperpage = prettyplot_calcseqperpage(yincr,y,consensus);
	if(seqperpage>numseq)
	    seqperpage=numseq;
    }

    count = 0;

/*
** for boxes we need to set a foreground colour for the box lines
** and save the current foreground colour
*/
    if(boxit && boxcol)
	oldfg = ajGraphicsSetFgcolour(iboxcolval);

/*
** step through each residue position
*/

    kmax = ajSeqsetGetLen(seqset) - 1;
    for(k=0; k<= kmax; k++)
    {
	/* reset column score array */
	for(i=0;i<numseq;i++)
	    score[i] = 0.0;

	/* reset matrix character testing arrays */
	for(i=0;i<matsize;i++)
	{
	    identical[i] = 0.0;
	    matching[i] = 0.0;
	    colcheck[i] = 0.0;
	}

	/* generate a score for this residue in each sequence */
	for(i=0;i<numseq;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
	    for(j=0;j<numseq;j++)
	    {
		m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]);
		if(m1 && m2)
		    score[i] += (float)matrix[m1][m2]*
			ajSeqsetGetseqWeight(seqset, j);
	    }
	    if(m1)
		identical[m1] += ajSeqsetGetseqWeight(seqset, i);
	}

	/* find the highest score */
	highindex = -1;
	scoremax  = INT_MIN;
	/*ajDebug("Scores at position %d:\n", k);*/

	for(i=0;i<numseq;i++)
	{
	    /*ajDebug("  seq %d: '%c' %f\n",i,seqcharptr[i][k],score[i]);*/

	    if(score[i] > scoremax)
	    {
		scoremax = score[i];
		highindex = i;
	    }
	}
	for(i=0;i<numseq;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);

	    if(!matching[m1])
	    {
		for(j=0;j<numseq;j++)
		{
		    m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]);
		    if(m1 && m2 && matrix[m1][m2] > 0)
			matching[m1] += ajSeqsetGetseqWeight(seqset, j);
		}
	    }
	}

	/* find highs for matching and identical */
	matchingmaxindex  = 0;
	identicalmaxindex = 0;
	for(i=0;i<numseq;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
	    if(identical[m1] > identical[identicalmaxindex])
		identicalmaxindex = m1;
	}
	for(i=0;i<numseq;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
	    if(matching[m1] > matching[matchingmaxindex])
		matchingmaxindex = m1;
	    else if(matching[m1] ==  matching[matchingmaxindex])
	    {
		if(identical[m1] > identical[matchingmaxindex])
		    matchingmaxindex= m1;
	    }
	}

	iscons = ajFalse;
	boxindex = -1;
	max = -3;

	ajDebug("k:%2d highindex:%2d matching:%4.2f\n",
		k, highindex,
		matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]);
	if(highindex != -1 &&
	   matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] >= fplural)
	{
	    iscons = ajTrue;
	    boxindex = highindex;
	}
	else
	{
	    for(i=0;i<numseq;i++)
	    {
		m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
		if(matching[m1] > max)
		{
		    max = matching[m1];
		    highindex = i;
		}
		else if(matching[m1] == max)
		{
		    if(identical[m1] >
		       identical[ajSeqcvtGetCodeK(cvt,
                                                  seqcharptr[highindex][k])] )
		    {
			max = matching[m1];
			highindex = i;
		    }
		}
	    }

	    if(matching[ajSeqcvtGetCodeK(cvt,
                                         seqcharptr[highindex][k])] >= fplural)
	    {
		iscons = ajTrue;
		boxindex = highindex;
	    }
	}


	if(iscons)
	{
	    if(!collision)
	    {
		/* check for collisions */
		if(alternative == 1)
		{
		    /* check to see if this is unique for collisions */
		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
			if(identical[m1] >= identical[identicalmaxindex] &&
			   m1 != identicalmaxindex)
			    iscons = ajFalse;
		    }

		    /*ajDebug("after (alt=1) iscons: %B",iscons);*/
		}

		else if(alternative == 2)
		{
		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);

			if((matching[m1] >= matching[matchingmaxindex] &&
			    m1 != matchingmaxindex &&
			    matrix[m1][matchingmaxindex] < 0.1)||
			   (identical[m1] >= identical[matchingmaxindex]
			   && m1 != matchingmaxindex))
			    iscons = ajFalse;
		    }
		}
		else if(alternative == 3)
		{
		    /*
		    ** to do this check one is NOT in consensus to see if
		    ** another score of fplural has been found
		    */
		    ms = ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k]);

		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
			if(ms != m1 && colcheck[m1] == 0.0)
			    /* NOT in the current consensus */
			    for(j=0;j<numseq;j++)
			    {
				m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]);
				if( matrix[ms][m2] < 0.1)
				{
				    /* NOT in the current consensus */
				    if( matrix[m1][m2] > 0.1)
					colcheck[m1] +=
                                            ajSeqsetGetseqWeight(seqset,
                                                                 j);
				}
			    }
		    }

		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
			/* if any other matches then we have a collision */
			if(colcheck[m1] >= fplural)
			    iscons = ajFalse;
		    }

		    /*ajDebug("after alt=2 iscons: %B", iscons);*/
		}
		else
		{
		    for(i=0;i<numseq;i++)
		    {
			m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
			if((matching[m1] >= matching[matchingmaxindex] &&
			    m1 != matchingmaxindex &&
			    matrix[m1][matchingmaxindex] < 0.1))
			    iscons = ajFalse;
			if(identical[m1] >= identical[matchingmaxindex] &&
			   m1 != matchingmaxindex &&
			   matrix[m1][matchingmaxindex] > 0.1)
			    iscons = ajFalse;
		    }

		    if(!iscons)
		    {	/* matches failed try identicals */
			if(identical[identicalmaxindex] >= fplural)
			{
			    iscons = ajTrue;
			    /*
			    ** if nothing has an equal or higher match that
			    ** does not match highest then false
			    */
			    for(i=0;i<numseq;i++)
			    {
				m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);
				if(identical[m1] >=
				   identical[identicalmaxindex] &&
				   m1 != identicalmaxindex)
				    iscons = ajFalse;
				else if(matching[m1] >=
					matching[identicalmaxindex] &&
					matrix[m1][matchingmaxindex] <= 0.0)
				    iscons = ajFalse;
				else if(m1 == identicalmaxindex)
				    j = i;
			    }

			    if(iscons)
				highindex = j;
			}
		    }

		}
	    }

	    if(identity)
	    {
		j = 0;
		for(i=0;i<numseq;i++)
		    if(seqcharptr[highindex][k] == seqcharptr[i][k])
			j++;

		if(j<identity)
		    iscons = ajFalse;
	    }
	}

	/*
	** Done a full line of residues
	** Boxes have been defined up to this point
	*/
	if(count >= numres )
	{
	    /* check y position for next set */
	    y=y-(yincr*((float)numseq+(float)2.0+((float)consensus*(float)2)));
	    if(y<yincr*((float)numseq+(float)2.0+((float)consensus*(float)2)))
	    {
		/* full page - print it */
		y=ystart-(float)6.0;

		startseq = 0;
		endseq = seqperpage;
		newILstart = newILend;
		newILend = k;
		while(startseq < numseq)
		{
		    /* AJB */
		    /*if(startseq != 0)
		    	ajGraphNewpage(graph, AJFALSE);*/

		    /*ajDebug("Inner loop: startseq: %d numseq: %d endseq: %d\n",
			    startseq, numseq, endseq);*/
		    if(endseq>numseq)
			endseq=numseq;
		    prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset),
					   startseq,endseq,
					   newILstart,newILend,
					   numres,resbreak,
					   boxit,boxcol,consensus,
					   ystart,yincr,cvt);
		    startseq = endseq;
		    endseq += seqperpage;
		    ajGraphNewpage(graph, AJFALSE);
		}
	    }

	    count = 0;
	    gapcount = 0;
	}

	count++;
	countforgap++;

	for(j=0;j<numseq;j++)
	{
	    /* START OF BOXES */

	    if(boxit)
	    {
		seqboxptr[j][k] = 0;
		if(boxindex!=-1)
		{
		    myindex = boxindex;
		    if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]
		       [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0)
			part = 1.0;
		    else
		    {
			if(identical[ajSeqcvtGetCodeK(cvt,
                                                      seqcharptr[j][k])] >=
			   fplural)
			    part = 1.0;
			else
			    part = 0.0;
		    }

		    if(previous[j] != part)
			/* draw vertical line */
			seqboxptr[j][k] |= BOXLEF;

		    if(j==0)
		    {
			/* special case for horizontal line */
			if(part)
			{
			    currentstate = 1;
			    /* draw hori line */
			    seqboxptr[j][k] |= BOXTOP;
			}
			else
			    currentstate = 0;
		    }
		    else
		    {
			/* j != 0  Normal case for horizontal line */
			if(part != currentstate)
			{
			    /*draw hori line */
			    seqboxptr[j][k] |= BOXTOP;
			    currentstate = (ajint) part;
			}
		    }

		    if(j== numseq-1 && currentstate)
			/* draw horiline at bottom */
			seqboxptr[j][k] |= BOXBOT;

		    previous[j] = (ajint) part;
		}
		else
		{
		    part = 0;
		    if(previous[j])
		    {
			/* draw vertical line */
			seqboxptr[j][k] |= BOXLEF;
		    }
		    previous[j] = 0;
		}

		if(count == numres || k == kmax || countforgap >= resbreak )
		{			/* last one on the row or a break*/
		    if(previous[j])
		    {
			/* draw vertical line */
			seqboxptr[j][k] |= BOXRIG;
		    }
		    previous[j] = 0;
		}

	    } /* end box */

	    if(boxit && boxcol)
		if(boxindex != -1)
		{
		    myindex = boxindex;
		    if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]
		       [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0
		       || identical[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] >=
                       fplural )

			seqboxptr[j][k] |= BOXCOLOURED;
		}

	    /* END OF BOXES */




	    if(ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]))
		res[0] = seqcharptr[j][k];
	    else
		res[0] = '-';

	    if(colourbyconsensus)
	    {
		part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]
		    [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])];
		if(iscons && seqcharptr[highindex][k] == seqcharptr[j][k])
		    seqcolptr[j][k] = cidentity;
		else if(part > 0.0)
		    seqcolptr[j][k] = csimilarity;
		else
		    seqcolptr[j][k] = cother;
	    }
	    else if(colourbyresidues)
		seqcolptr[j][k] = colmat[ajSeqcvtGetCodeK(cvt,
                                                          seqcharptr[j][k])];
	    else if(iscons && colourbyshade)
	    {
		part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]
		    [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])];
		if(part >= 1.5)
		    seqcolptr[j][k] = shadecolour[0];
		else if(part >= 1.0)
		    seqcolptr[j][k] = shadecolour[1];
		else if(part >= 0.5)
		    seqcolptr[j][k] = shadecolour[2];
		else
		    seqcolptr[j][k] = shadecolour[3];
	    }
	    else if(colourbyshade)
		seqcolptr[j][k] = shadecolour[3];
	    else
		seqcolptr[j][k] = BLACK;
	}

	if(consensus)
	{
	    if(iscons)
		res[0] = seqcharptr[highindex][k];
	    else
		res[0] = '-';
	    strcat(constr,res);
	}

	if(countforgap >= resbreak)
	{
	    gapcount++;
	    countforgap=0;
	}
    }


    startseq = 0;
    endseq=seqperpage;
    newILstart = newILend;
    newILend = k;
    while(startseq < numseq)
    {
	if(startseq)
	    ajGraphNewpage(graph, AJFALSE);

	/*ajDebug("Final loop: startseq: %d numseq: %d endseq: %d\n",
		startseq, numseq, endseq);*/
	if(endseq>numseq)
	    endseq = numseq;
	prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset),
			       startseq,endseq,
			       newILstart,newILend,
			       numres,resbreak,
			       boxit,boxcol,consensus,
			       ystart,yincr,cvt);
	startseq = endseq;
	endseq += seqperpage;
    }


    ajGraphicsGetCharsize(&defheight,&currentscale);

    if(boxit && boxcol)
	oldfg = ajGraphicsSetFgcolour(oldfg);

    ajGraphicsCloseWin();
    ajGraphxyDel(&graph);

    ajStrDel(&sidentity);
    ajStrDel(&ssimilarity);
    ajStrDel(&sother);
    ajStrDel(&options);
    ajStrDel(&altstr);

    ajStrDel(&matcodes);

    for(i=0;i<numseq;i++)
    {
	ajStrDel(&seqnames[i]);
	AJFREE(seqcolptr[i]);
	if(seqboxptr)
            AJFREE(seqboxptr[i]);
    }
    AJFREE(seqcolptr);
    AJFREE(seqboxptr);

    AJFREE(seqnames);
    AJFREE(score);
    AJFREE(previous);
    AJFREE(seqcount);

    AJFREE(colmat);
    AJFREE(shadecolour);

    freeptr = (void *) seqcharptr;
    AJFREE(freeptr);

    AJFREE(identical);
    AJFREE(matching);
    AJFREE(colcheck);

    ajSeqsetDel(&seqset);
    ajMatrixDel(&cmpmatrix);
    ajStrDel(&shade);
    ajStrDel(&sboxcolval);
    ajStrDel(&sidentity);
    ajStrDel(&ssimilarity);
    ajStrDel(&sother);
    ajFloatDel(&pair);
    ajTimeDel(&ajtime);
    AJFREE(constr);

    embExit();

    return 0;
}
Exemplo n.º 16
0
int main(int argc, char **argv)
{
    /* Variable Declarations */
    AjPSeqset  seqset    = NULL;
    AjPMatrixf fmat      = NULL;
    float      thresh;
    float      threshlow;
    float      threshup;
    float      gapopen;
    float      gapextend;
    AjPSeqout  seqout    = NULL;
    AjPSeqout  seqoutred = NULL;
    AjPStr     mode      = NULL;
    ajint      moden;
    ajuint i;


    /* toggle "feature" from ACD not retrieved ... no need */

    const AjPSeq seq    = NULL;
    AjPList      list   = NULL;    /* List for redundancy removal.       */
    AjPUint      keep   = NULL;    /* 1: Sequence in list was non-redundant,
                                      0: redundant.    */
    ajuint       nseq   = 0;       /* No. seqs. in list.                 */
    ajint        nseqnr = 0;       /* No. non-redundant seqs. in list.   */

    /* ACD File Processing */
    embInit("skipredundant", argc, argv);
    seqset        = ajAcdGetSeqset("sequences");
    mode          = ajAcdGetListSingle("mode");
    fmat          = ajAcdGetMatrixf("datafile");
    thresh        = ajAcdGetFloat("threshold");
    threshlow     = ajAcdGetFloat("minthreshold");
    threshup      = ajAcdGetFloat("maxthreshold");
    gapopen       = ajAcdGetFloat("gapopen");
    gapextend     = ajAcdGetFloat("gapextend");
    seqout        = ajAcdGetSeqoutall("outseq");
    seqoutred     = ajAcdGetSeqoutall("redundantoutseq");



    /* Application logic */
    list    = ajListNew();
    skipredundant_SeqsetToList(list, seqset);
    keep = ajUintNew();  
    ajStrToInt(mode, &moden);


    if(moden == 1) 
      /* Remove redundancy at a single threshold % sequence similarity */
      {
	if((!embDmxSeqNR(list, &keep, &nseqnr, fmat, gapopen, 
			 gapextend, thresh, ajFalse)))
	  ajFatal("embDmxSeqNR unexpected failure!");
      }
    else if (moden == 2)
      /* 2: Remove redundancy outside a range of acceptable threshold % similarity */
      {
	if((!embDmxSeqNRRange(list, &keep, &nseqnr, fmat, gapopen, 
			      gapextend, threshlow, threshup, ajFalse)))
	  ajFatal("embDmxSeqNRRange unexpected failure!");
      }
    else 
      ajFatal("Invalid mode (not 1 or 2) which should never occur (check ACD file!)");

    nseq = ajSeqsetGetSize(seqset);
    for(i=0; i<nseq; i++)
      {
	seq = ajSeqsetGetseqSeq(seqset, i);

	if(ajUintGet(keep, i))
	  ajSeqoutWriteSeq(seqout, seq);
	else if(seqoutred)
	  ajSeqoutWriteSeq(seqoutred, seq);
      }

    /* Memory management and exit */
    ajSeqsetDel(&seqset);
    ajMatrixfDel(&fmat);
    ajStrDel(&mode);
    ajSeqoutClose(seqout);
    ajSeqoutDel(&seqout);
    if(seqoutred)
    {
	ajSeqoutClose(seqoutred);
	ajSeqoutDel(&seqoutred);
    }
    skipredundant_ClearList(list);

    ajListFree(&list);
    ajUintDel(&keep);

    embExit();

    return 0;
}
Exemplo n.º 17
0
int main(int argc, char **argv)
{
    AjPSeqall  seqall;
    AjPSeq  a;
    AjPStr  substr;
    AjPStr  rname;
    ajint be;
    ajint en;
    ajint len;

    AjBool unfavoured;
    AjBool overlap;
    AjBool allpartials;
    AjPStr menu;
    AjPStr rag;
    ajint  n = 0;
    ajint  r = 0;
    
    AjPFile  outf = NULL;
    AjPReport report    = NULL;
    AjPFeattable TabRpt = NULL;
    AjPStr tmpStr = NULL;
    AjPList  l;
    AjPList  pa;
    AjPFile mfptr   = NULL;

    AjBool nterm = ajFalse;
    AjBool cterm = ajFalse;
    AjBool dorag = ajFalse;

    ajint     ncomp;
    ajint     npart;

    EmbPPropMolwt *mwdata = NULL;
    AjBool mono;
    

    embInit("digest", argc, argv);

    seqall      = ajAcdGetSeqall("seqall");
    menu        = ajAcdGetListSingle("menu");
    dorag       = ajAcdGetBoolean("ragging");
    rag         = ajAcdGetListSingle("termini");
    unfavoured  = ajAcdGetBoolean("unfavoured");
    overlap     = ajAcdGetBoolean("overlap");
    allpartials = ajAcdGetBoolean("allpartials");
    report      = ajAcdGetReport("outfile");
    mfptr       = ajAcdGetDatafile("mwdata");
    mono        = ajAcdGetBoolean("mono");
    
    /* obsolete. Can be uncommented in acd file and here to reuse */

    /* outf      = ajAcdGetOutfile("originalfile"); */

    ajStrToInt(menu, &n);
    --n;

    ajStrToInt(rag, &r);

    if(r==2 || r==4)
	nterm = ajTrue;

    if(r==3 || r==4)
	cterm = ajTrue;


    mwdata = embPropEmolwtRead(mfptr);

    while(ajSeqallNext(seqall, &a))
    {
	substr = ajStrNew();
	be     = ajSeqGetBegin(a);
	en     = ajSeqGetEnd(a);
	ajStrAssignSubC(&substr,ajSeqGetSeqC(a),be-1,en-1);
        ajStrFmtUpper(&substr);

	len = en-be+1;

	l     = ajListNew();
	pa    = ajListNew();
	rname = ajStrNew();

	TabRpt = ajFeattableNewSeq(a);

	embPropCalcFragments(ajStrGetPtr(substr),n,&l,&pa,
			     unfavoured,overlap,
			     allpartials,&ncomp,&npart,&rname,
			     nterm, cterm, dorag, mwdata, mono);

	if(outf)
	    ajFmtPrintF(outf,"DIGEST of %s from %d to %d Molwt=%10.3f\n\n",
			ajSeqGetNameC(a),be,en,
			embPropCalcMolwt(ajSeqGetSeqC(a),0,len-1,mwdata,mono));
	if(!ncomp)
	{
	    if(outf)
		ajFmtPrintF(outf,
			    "Is not proteolytically digested using %s\n",
			    ajStrGetPtr(rname));
	}
	else
	{
	    if(outf)
	    {
		ajFmtPrintF(outf,"Complete digestion with %s "
			    "yields %d fragments:\n",
			    ajStrGetPtr(rname),ncomp);
		digest_print_hits(l,outf,be,ajStrGetPtr(substr));
	    }
	    ajFmtPrintS(&tmpStr,
			"Complete digestion with %S yields %d fragments",
			rname,ncomp);
	    ajReportSetHeaderS(report, tmpStr);
	    digest_report_hits(TabRpt,l,be, ajStrGetPtr(substr));
	    ajReportWrite(report, TabRpt, a);
	    ajFeattableClear(TabRpt);
	}

	if(overlap && !allpartials && npart)
	{
	    if(outf)
	    {
		ajFmtPrintF(outf,
			    "\n\nPartial digest with %s yields %d extras.\n",
			    ajStrGetPtr(rname),npart);
		ajFmtPrintF(outf,"Only overlapping partials shown:\n");
		digest_print_hits(pa,outf,be,ajStrGetPtr(substr));
	    }
	    ajFmtPrintS(&tmpStr,
			"\n\nPartial digest with %S yields %d extras.\n",
			rname,npart);
	    ajFmtPrintAppS(&tmpStr,"Only overlapping partials shown:\n");
	    ajReportSetHeaderS(report, tmpStr);
	    digest_report_hits(TabRpt, pa,be,ajStrGetPtr(substr));
	    ajReportWrite(report, TabRpt, a);
	    ajFeattableClear(TabRpt);
	}

	if(allpartials && npart)
	{
	    if(outf)
	    {
		ajFmtPrintF(outf,
			    "\n\nPartial digest with %s yields %d extras.\n",
			    ajStrGetPtr(rname),npart);
		ajFmtPrintF(outf,"All partials shown:\n");
		digest_print_hits(pa,outf,be,ajStrGetPtr(substr));
	    }
	    ajFmtPrintS(&tmpStr,
			"\n\nPartial digest with %S yields %d extras.\n",
			rname,npart);
	    ajFmtPrintAppS(&tmpStr,"All partials shown:\n");
	    ajReportSetHeaderS(report, tmpStr);
	    digest_report_hits(TabRpt, pa,be, ajStrGetPtr(substr));
	    ajReportWrite(report, TabRpt, a);
	    ajFeattableClear(TabRpt);
	}
    }


    embPropMolwtDel(&mwdata);

    ajReportDel(&report);

    ajFeattableDel(&TabRpt);
    
    ajSeqDel(&a);
    ajSeqallDel(&seqall);

    ajStrDel(&rname);
    ajStrDel(&substr);
    ajListFree(&pa);
    ajListFree(&l);
    ajStrDel(&menu);
    ajStrDel(&rag);

    if(outf)
	ajFileClose(&outf);
    ajFileClose(&mfptr);

    ajStrDel(&tmpStr);

    embExit();

    return 0;
}