示例#1
0
static AjPRegexp dbifasta_getExpr(const AjPStr idformat, ajuint *type)
{
    AjPRegexp retexp = NULL;

    dbifastaGIdexp = ajRegCompC("^>([A-Za-z0-9_-]+:)?([.A-Za-z0-9_-]+)");

    if(ajStrMatchC(idformat,"simple"))
    {
	*type = FASTATYPE_SIMPLE;
	retexp = ajRegCompC("^>([A-Za-z0-9_-]+:)?([.A-Za-z0-9_-]+)");
    }
    else if(ajStrMatchC(idformat,"idacc"))
    {
	*type = FASTATYPE_IDACC;
	retexp = ajRegCompC(
                            "^>([.A-Za-z0-9_-]+)+[ \t]+\\(?([A-Za-z0-9_-]+)\\)?");
    }
    else if(ajStrMatchC(idformat,"accid"))
    {
	*type = FASTATYPE_ACCID;
	retexp = ajRegCompC("^>([A-Za-z0-9_-]+)+[ \t]+([A-Za-z0-9_-]+)");
    }
    else if(ajStrMatchC(idformat,"gcgid"))
    {
	*type = FASTATYPE_GCGID;
	retexp = ajRegCompC("^>[A-Za-z0-9_-]+:([A-Za-z0-9_-]+)");
    }
    else if(ajStrMatchC(idformat,"gcgidacc"))
    {
	*type = FASTATYPE_GCGIDACC;
	retexp = ajRegCompC(
		     "^>[A-Za-z0-9_-]+:([A-Za-z0-9_-]+)[ \t]+([A-Za-z0-9-]+)");
    }
    else if(ajStrMatchC(idformat,"gcgaccid"))
    {
	*type = FASTATYPE_GCGACCID;
	retexp = ajRegCompC(
		     "^>[A-Za-z0-9_-]+:([A-Za-z0-9_-]+)[ \t]+([A-Za-z0-9-]+)");
    }
    else if(ajStrMatchC(idformat,"ncbi"))
    {
	*type = FASTATYPE_NCBI;
	retexp = ajRegCompC("^>([A-Za-z0-9_-]+)"); /* dummy regexp */
    }
    else if(ajStrMatchC(idformat,"dbid"))
    {
	*type = FASTATYPE_DBID;
	retexp = ajRegCompC("^>[A-Za-z0-9_-]+[ \t]+([A-Za-z0-9_-]+)");
    }
    else
	return NULL;

    return retexp;
}
示例#2
0
文件: ghttp.c 项目: ktnyt/GEMBASSY
AjBool gHttpConvertS(AjPStr url, AjPFile* outf, AjPStr informat, AjPStr outformat)
{
  AjPRegexp regexp = NULL;
  AjPStr jobid = NULL;
  AjPStr convert = NULL;

  regexp = ajRegCompC("^.+jobid=");

  if(!ajRegExec(regexp, url))
    {
      return ajFalse;
    }

  if(!ajRegPost(regexp, &jobid))
    {
      return ajFalse;
    }

  convert = ajFmtStr("http://soap.g-language.org/WS/convert.cgi?"
                     "jobid=%S&informat=%S&outformat=%S",
                     jobid, informat, outformat);

  if(!gHttpGetBinS(convert, outf)) {
    return ajFalse;
  }

  return ajTrue;
}
示例#3
0
文件: gfile.c 项目: ktnyt/GEMBASSY
ajint gValID(AjPStr id){
  AjPFilebuff buff = NULL;
  AjPStr      url  = NULL;
  AjPStr      line = NULL;
  AjPRegexp   pval = NULL;

  url = ajStrNewC("http://web.sfc.keio.ac.jp/~t11080hi/valID/valID.cgi?id=");
  url = ajStrNew();
  ajFmtPrintS(&url, "http://rest.g-language.org/%S", id);

  //ajStrAppendS(&url, id);

  if(!gFilebuffURLS(url, &buff)) {
    return ajFalse;
  }

  return ajTrue;

  ajBuffreadLine(buff, &line);

  pval = ajRegCompC("^0");

  if(ajRegExec(pval, line))
    return ajFalse;

  return ajTrue;
}
示例#4
0
文件: ghttp.c 项目: ktnyt/GEMBASSY
AjBool gHttpGetBinS(AjPStr url, AjPFile* outf)
{
  AjPFile file = NULL;
  AjPStr  line = NULL;
  AjPStr  host = NULL;
  AjPStr  path = NULL;
  AjPStr  get  = NULL;
  ajint   port = 80;
  ajuint  http = 0;
  FILE   *fp;

  AjPRegexp crlf = NULL;

  char buf[8];

  AjOSysSocket sock;

  get = ajStrNew();

  ajHttpUrlDeconstruct(url, &port, &host, &path);

  while(file==NULL || gHttpRedirect(file, &host, &port, &path))
    {
      if(ajStrGetCharFirst(path) != '/')
	ajStrInsertK(&path, 0, '/');

      ajFmtPrintS(&get, "GET http://%S:%d%S HTTP/1.1\r\n", host, port, path);

      fp = ajHttpOpen(NULL, host, port, get, &sock);

      file = ajFileNewFromCfile(fp);

      if(!file)
	return ajFalse;
    }

  ajStrDel(&get);

  crlf = ajRegCompC("^\r?\n$");

  while(ajReadline(file, &line))
    {
      if(ajRegExec(crlf, line))
	break;
    }

  while(ajReadbinBinary(file, 1, 1, buf))
    {
      ajWritebinBinary(*outf, 1, 1, buf);
    }

  ajFileClose(outf);
  ajFileClose(&file);

  return ajTrue;
}
示例#5
0
static AjBool dbiblast_parseId(const AjPStr line, AjPFile * alistfile,
			       AjBool systemsort, AjPStr const * fields,
			       ajint* maxFieldLen,
			       ajuint* countfield,
			       AjPStr* myid,
			       AjPList * myfdl)
{
    static AjPRegexp idexp = NULL;
    static AjBool reset = AJTRUE;

    (void) alistfile;
    (void) systemsort;
    (void) maxFieldLen;
    (void) countfield;
    (void) myfdl;

    if(!fields)
    {
	reset = ajTrue;
	return ajFalse;
    }

    if(reset)
    {
	reset = ajFalse;
    }


    if(!idexp)
	idexp = ajRegCompC("^([^ ]+)");

    if(!ajRegExec(idexp, line))
	return ajFalse;

    ajRegSubI(idexp, 1, myid);
    ajStrFmtUpper(myid);

    ajDebug("parseId '%S'\n", *myid);

    return ajTrue;
}
示例#6
0
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec,
				const AjPStr patname,
				const AjPStr fmt,
				AjBool protein, ajuint mismatches)
{
    AjPPatlistSeq patlist = NULL;
    AjPStr line = NULL;
    AjPStr name = NULL;
    AjPFilebuff infile = NULL;
    AjPRegexp mismreg = NULL;
    AjPStr patstr = NULL;
    AjPStr pat = NULL;
    ajuint mismatch = 0;
    ajint ifmt = 0;
    ajuint npat = 0;
    AjPStr namestr = NULL;

    ajStrAssignS(&namestr, patname);
    ajStrAssignEmptyC(&namestr, "pattern");

    ajStrAssignS(&patstr, patspec);

    patlist = ajPatlistSeqNewType(protein);

    ifmt = patternSeqFormat(fmt);

    ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' "
	    "protein: %B mismatches: %d\n",
	    patspec, patname, protein, mismatches);

    if(ajStrGetCharFirst(patstr) == '@')
    {
	ajStrCutStart(&patstr, 1);
	infile = ajFilebuffNewNameS(patstr);

	if(!infile)
	{
	    ajErr("Unable to open pattern file '%S'", patstr);

	    return NULL;
	}

	line = ajStrNew();
	name = ajStrNew();

	if(!ifmt)
	{
	    ajBuffreadLineTrim(infile,&line);

	    if(ajStrPrefixC(line, ">"))
		ifmt = 2;
	    else
		ifmt = 1;
	    ajFilebuffReset(infile);
	}
	
	switch(ifmt)
	{
	case 1:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		npat++;
		ajStrAppendS (&pat,line);
		ajFmtPrintS(&name, "%S%u", namestr, npat);
		ajPatternSeqNewList(patlist,name,pat,mismatches);
		ajStrSetClear(&pat);
	    }
	    break;
	default:
	    mismreg = ajRegCompC("<mismatch=(\\d+)>");

	    while (ajBuffreadLineTrim(infile,&line))
	    {
		if (ajStrGetCharFirst(line) == '>')
		{
		    if (ajStrGetLen(name))
		    {
			ajPatternSeqNewList(patlist,name,pat,
					    mismatch);
			ajStrSetClear(&name);
			ajStrSetClear(&pat);
			mismatch=mismatches;
		    }

		    ajStrCutStart(&line,1);

		    if (ajRegExec(mismreg,line))
		    {
			ajRegSubI(mismreg,1,&name);
			ajStrToUint(name,&mismatch);
			ajStrTruncateLen(&line,ajRegOffset(mismreg));
			ajStrTrimWhiteEnd(&line);
		    }
		    ajStrAssignS (&name,line);
		    ajStrAssignEmptyS(&name, patname);
		}
		else
		    ajStrAppendS (&pat,line);
	    }

	    ajStrAssignEmptyS(&name, patname);
	    ajPatternSeqNewList(patlist,name,pat,mismatch);
	    ajRegFree(&mismreg);
	    break;
	}

	ajFilebuffDel(&infile);
    }
    else
    {
        ajStrAssignS(&name, namestr);
	ajPatternSeqNewList(patlist,name,patstr,mismatches);
    }

    ajStrDel(&name);
    ajStrDel(&line);
    ajStrDel(&pat);
    ajStrDel(&namestr);
    ajStrDel(&patstr);

    return patlist;
}
示例#7
0
文件: ajreg.c 项目: ICO2S/emboss
AjPRegexp ajRegComp(const AjPStr rexp)
{
    return ajRegCompC(ajStrGetPtr(rexp));
}
示例#8
0
文件: ghttp.c 项目: ktnyt/GEMBASSY
AjBool gHttpRedirect(AjPFile file, AjPStr* host, ajint* port, AjPStr* path)
{
  AjPFilebuff buff = NULL;

  AjPRegexp httpexp  = NULL;
  AjPRegexp nullexp  = NULL;
  AjPRegexp redirexp = NULL;

  AjPStr codestr  = NULL;
  AjPStr newurl   = NULL;
  AjPStr newhost  = NULL;
  AjPStr currline = NULL;

  ajuint httpcode = 0;

  AjBool isheader = ajFalse;
  AjBool ret = ajFalse;

  httpexp  = ajRegCompC("^HTTP/\\S+\\s+(\\d+)");

  ajReadline(file, &currline);

  ajDebug("gHttpRedirect: First line: '%S'\n", currline);

  if(ajRegExec(httpexp, currline))
    {
      isheader = ajTrue;
      ajRegSubI(httpexp, 1, &codestr);
      ajStrToUint(codestr, &httpcode);
      ajDebug("Header: codestr '%S' code '%u'\n", codestr, httpcode);
      ajStrDel(&codestr);
    }

  if(isheader)
    {
      if(httpcode == 301 || httpcode == 302 || httpcode==307)
        {
	  redirexp = ajRegCompC("^Location: (\\S+)");
	  nullexp  = ajRegCompC("^\r?\n?$");

	  while( ajReadline(file, &currline) &&
		 !ajRegExec(nullexp, currline))
            {
	      ajDebug("gHttpRedirect: header line: '%S'\n", currline);

	      if(ajRegExec(redirexp, currline))
                {
		  ajRegSubI(redirexp, 1, &newurl);
		  ajHttpUrlDeconstruct(newurl, port, &newhost, path);

		  if(ajStrGetLen(newhost))
		    ajStrAssignS(host, newhost);

		  ajStrDel(&newurl);
		  ajStrDel(&newhost);
		  ret = ajTrue;
		  break;
                }
            }

	  ajRegFree(&redirexp);
	  ajRegFree(&nullexp);
        }
    }

  ajRegFree(&httpexp);
  ajStrDel(&currline);

  return ret;
}
示例#9
0
static AjBool dbiblast_parseSimple(const AjPStr line,
				   AjPFile * alistfile,
				   AjBool systemsort, AjPStr const * fields,
				   ajint* maxFieldLen,
				   ajuint* countfield,
				   AjPStr* myid,
				   AjPList* myfdl)
{
    static AjPRegexp idexp = NULL;
    static AjPStr mytmpac    = NULL;
    char* ac;
    static ajint numFields;
    static ajint accfield = -1;
    static AjBool reset = AJTRUE;

    if(!fields)
    {
	reset = ajTrue;
	accfield = -1;
	return ajFalse;
    }

    if(reset)
    {
	numFields = 0;
	while(fields[numFields])
	{
	    if(ajStrMatchCaseC(fields[numFields], "acc"))
		accfield=numFields;
	    else if(!ajStrMatchCaseC(fields[numFields], "sv") &&
		    !ajStrMatchCaseC(fields[numFields], "des"))
		ajWarn("Simple ID parsing unknown field '%S' ignored",
		       fields[numFields]);
	    numFields++;
	}
	reset = ajFalse;
    }


    if(!idexp)
	idexp = ajRegCompC("^([^ ]+)( +([A-Za-z][A-Za-z0-9]+[0-9]))");

    if(!ajRegExec(idexp, line))
	return ajFalse;

    ajRegSubI(idexp, 1, myid);
    ajRegSubI(idexp, 3, &mytmpac);
    ajStrFmtUpper(myid);
    ajStrFmtUpper(&mytmpac); /* GCG mixes case on new SwissProt acnums */

    if(accfield >= 0)
    {
        embDbiMaxlen(&mytmpac, &maxFieldLen[accfield]);
	countfield[accfield]++;
	if(systemsort)
	    ajFmtPrintF(alistfile[accfield], "%S %S\n", *myid, mytmpac);
	else
	{
	    ac = ajCharNewS(mytmpac);
	    ajListPushAppend(myfdl[accfield], ac);
	}
    }

    ajDebug("parseSimple '%S' '%S'\n", *myid, mytmpac);

    return ajTrue;
}
示例#10
0
static AjBool dbiblast_parseNcbi(const AjPStr line, AjPFile * alistfile,
				 AjBool systemsort, AjPStr const * fields,
				 ajint* maxFieldLen,
				 ajuint* countfield,
				 AjPStr* myid,
				 AjPList* fdlist)
{
    char* fd;

    static ajint numFields;
    static ajint accfield = -1;
    static ajint desfield = -1;
    static ajint svnfield = -1;
    static AjBool reset = AJTRUE;

    if(!fields)
    {
	reset = ajTrue;
	accfield = svnfield = desfield = -1;
	return ajFalse;
    }

    if(reset)
    {
	numFields = 0;
	while(fields[numFields])
	{
	    if(ajStrMatchCaseC(fields[numFields], "acc"))
		accfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "sv"))
		svnfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "des"))
		desfield=numFields;
	    else
		ajWarn("EMBL parsing unknown field '%S' ignored",
		       fields[numFields]);
	    numFields++;
	}
	reset = ajFalse;
    }

    if(!wrdexp)
	wrdexp = ajRegCompC("([A-Za-z0-9]+)");

    ajStrAssignC(&tmpdes,"");
    ajStrAssignC(&t,"");
    ajStrAssignC(&tmpac,"");
    ajStrAssignC(&tmpsv,"");
    ajStrAssignC(&tmpgi,"");
    ajStrAssignC(&tmpdb,"");

    ajFmtPrintS(&t,">%S",line);

    if(!ajSeqParseNcbi(t,myid,&tmpac,&tmpsv,&tmpgi,&tmpdb,&tmpdes))
	return ajFalse;

    if(ajStrGetLen(tmpac))
	ajStrFmtUpper(&tmpac);

    if(accfield >= 0)
	embDbiMaxlen(&tmpac, &maxFieldLen[accfield]);

    if(svnfield >= 0)
    {
	embDbiMaxlen(&tmpsv, &maxFieldLen[svnfield]);
	embDbiMaxlen(&tmpgi, &maxFieldLen[svnfield]);
    }


    ajStrFmtUpper(myid);

    /* ajDebug("parseNCBI success\n"); */

    if(systemsort)
    {
	if(accfield >= 0 && ajStrGetLen(tmpac))
	{
	    countfield[accfield]++;
	    ajFmtPrintF(alistfile[accfield], "%S %S\n", *myid, tmpac);
	}
	if(svnfield >= 0 && ajStrGetLen(tmpsv))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n", *myid, tmpsv);
	}
	if(svnfield >= 0 && ajStrGetLen(tmpgi))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n", *myid, tmpgi);
	}
	if(desfield >= 0 && ajStrGetLen(tmpdes))
	    while(ajRegExec(wrdexp, tmpdes))
	    {
		ajRegSubI(wrdexp, 1, &tmpfd);
		embDbiMaxlen(&tmpfd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&tmpfd);
		ajDebug("++des '%S'\n", tmpfd);
		countfield[desfield]++;
		ajFmtPrintF(alistfile[desfield], "%S %S\n", *myid, tmpfd);
		ajRegPost(wrdexp, &tmpdes);
	    }
    }
    else
    {
        if(accfield >= 0 && ajStrGetLen(tmpac))
	{
	    fd = ajCharNewS(tmpac);
	    countfield[accfield]++;
	    ajListPushAppend(fdlist[accfield], fd);
	}

        if(svnfield >= 0 && ajStrGetLen(tmpsv))
	{
	    fd = ajCharNewS(tmpsv);
	    countfield[svnfield]++;
	    ajListPushAppend(fdlist[svnfield], fd);
	}

        if(svnfield >= 0 && ajStrGetLen(tmpgi))
	{
	    fd = ajCharNewS(tmpgi);
	    ajListPushAppend(fdlist[svnfield], fd);
	}

        if(desfield >= 0 && ajStrGetLen(tmpdes))
	{
	    while(ajRegExec(wrdexp, tmpdes))
	    {
		ajRegSubI(wrdexp, 1, &tmpfd);
		embDbiMaxlen(&tmpfd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&tmpfd);
		ajDebug("++des '%S'\n", tmpfd);
		fd = ajCharNewS(tmpfd);
		countfield[desfield]++;
		ajListPushAppend(fdlist[desfield], fd);
		ajRegPost(wrdexp, &tmpdes);
	    }
	}
    }

    /* ajDebug("parseNCBI '%S' '%S'\n", *myid, tmpac); */

    return ajTrue;
}
示例#11
0
static AjBool dbifasta_ParseFasta(AjPFile libr, ajint* dpos,
				  ajint* maxFieldLen, ajuint* countfield,
				  AjPRegexp idexp,
				  ajuint usertype, AjPFile* alistfile,
				  AjBool systemsort, AjPStr const * fields)
{
    char* fd;
    ajlong ipos;
    static AjPStr tstr = NULL;
    static ajint numFields;
    static ajint accfield = -1;
    static ajint desfield = -1;
    static ajint svnfield = -1;
    static AjBool reset = AJTRUE;

    ajuint type = usertype;

    if(!fields)
    {
	reset = ajTrue;
	accfield = svnfield = desfield = -1;
	return ajFalse;
    }

    if(reset)
    {
	numFields = 0;
	while(fields[numFields])
	{
	    if(ajStrMatchCaseC(fields[numFields], "acc"))
		accfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "sv"))
		svnfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "des"))
		desfield=numFields;
	    else
		ajWarn("EMBL parsing unknown field '%S' ignored",
		       fields[numFields]);

	    numFields++;
	}
	reset = ajFalse;
    }

    if(!dbifastaGWrdexp)
	dbifastaGWrdexp = ajRegCompC("([A-Za-z0-9]+)");

    if(!tstr)
	tstr = ajStrNew();

    *dpos = (ajint) ajFileResetPos(libr); /* Lossy cast */

    ajReadline(libr, &dbifastaGRline);

    if(!ajStrGetLen(dbifastaGRline))
        return ajFalse;

    if(!ajRegExec(idexp,dbifastaGRline))
    {
	ajStrDelStatic(&dbifastaGTmpAc);
        type = FASTATYPE_SIMPLE;
        idexp = dbifastaGIdexp;

        if(!ajRegExec(idexp, dbifastaGRline))
        {
            ajFatal("Unrecognised ID line format: %S", dbifastaGRline);
            return ajFalse;
        }

	ajWarn("Invalid ID line for selected format: %S", dbifastaGRline);
    }

    /*
    ** each case needs to set id, tmpac, tmpsv, tmpdes
    ** using empty values if they are not found
    */

    ajStrAssignC(&dbifastaGTmpSv, "");
    ajStrAssignC(&dbifastaGTmpGi, "");
    ajStrAssignC(&dbifastaGTmpDb, "");
    ajStrAssignC(&dbifastaGTmpDes, "");
    ajStrAssignC(&dbifastaGTmpAc, "");
    ajStrAssignC(&dbifastaGTmpId, "");

    switch(type)
    {
    case FASTATYPE_SIMPLE:
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_DBID:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_GCGID:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_NCBI:
	if(!ajSeqParseNcbi(dbifastaGRline, &dbifastaGTmpId, &dbifastaGTmpAc,
			   &dbifastaGTmpSv, &dbifastaGTmpGi, &dbifastaGTmpDb,
                           &dbifastaGTmpDes))
	{
	    ajStrDelStatic(&dbifastaGTmpAc);
	    return ajFalse;
	}
	break;
    case FASTATYPE_GCGIDACC:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajRegSubI(idexp,2,&dbifastaGTmpAc);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_GCGACCID:
	ajRegSubI(idexp,1,&dbifastaGTmpAc);
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_IDACC:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajRegSubI(idexp,2,&dbifastaGTmpAc);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_ACCID:
	ajRegSubI(idexp,1,&dbifastaGTmpAc);
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    default:
	ajStrDelStatic(&dbifastaGTmpAc);
	return ajFalse;
    }

    ajStrFmtUpper(&dbifastaGTmpId);
    ajStrFmtUpper(&dbifastaGTmpAc);

    if(accfield >= 0)
	embDbiMaxlen(&dbifastaGTmpAc, &maxFieldLen[accfield]);
    if(svnfield >= 0)
    {
	embDbiMaxlen(&dbifastaGTmpSv, &maxFieldLen[svnfield]);
	embDbiMaxlen(&dbifastaGTmpGi, &maxFieldLen[svnfield]);
    }

    if(systemsort)
    {
	if(accfield >= 0 && ajStrGetLen(dbifastaGTmpAc))
	{
	    countfield[accfield]++;
	    ajFmtPrintF(alistfile[accfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpAc);
	}
	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpSv))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpSv);
	}
	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpGi))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpGi);
	}
	if(desfield >= 0 && ajStrGetLen(dbifastaGTmpDes))
	    while(ajRegExec(dbifastaGWrdexp, dbifastaGTmpDes))
	    {
		ajRegSubI(dbifastaGWrdexp, 1, &dbifastaGTmpFd);
		embDbiMaxlen(&dbifastaGTmpFd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&dbifastaGTmpFd);
		ajDebug("++des '%S' tmpdes '%S\n",
			dbifastaGTmpFd, dbifastaGTmpDes);
		countfield[desfield]++;
		ajFmtPrintF(alistfile[desfield], "%S %S\n",
			    dbifastaGTmpId, dbifastaGTmpFd);
		ajRegPost(dbifastaGWrdexp, &dbifastaGTmpDes);
	    }
    }
    else
    {
	if(accfield >= 0 && ajStrGetLen(dbifastaGTmpAc))
	{
	    fd = ajCharNewS(dbifastaGTmpAc);
	    ajListPushAppend(dbifastaGFdl[accfield],fd);
	    countfield[accfield]++;
	}

	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpSv))
	{
	    fd = ajCharNewS(dbifastaGTmpSv);
	    ajListPushAppend(dbifastaGFdl[svnfield], fd);
	    countfield[svnfield]++;
	}

	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpGi))
	{
	    fd = ajCharNewS(dbifastaGTmpGi);
	    ajListPushAppend(dbifastaGFdl[svnfield], fd);
	    countfield[svnfield]++;
	}

	if(desfield >= 0 && ajStrGetLen(dbifastaGTmpDes))
	    while(ajRegExec(dbifastaGWrdexp, dbifastaGTmpDes))
	    {
		ajRegSubI(dbifastaGWrdexp, 1, &dbifastaGTmpFd);
		embDbiMaxlen(&dbifastaGTmpFd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&dbifastaGTmpFd);
		ajDebug("++des '%S' tmpdes: '%S'\n",
			dbifastaGTmpFd, dbifastaGTmpDes);
		fd = ajCharNewS(dbifastaGTmpFd);
		ajListPushAppend(dbifastaGFdl[desfield], fd);
		countfield[desfield]++;
		ajRegPost(dbifastaGWrdexp, &dbifastaGTmpDes);
	    }
    }

    ipos = ajFileResetPos(libr);

    while(ajReadline(libr, &dbifastaGRline))
    {
	if(ajStrGetCharFirst(dbifastaGRline) == '>')
	{
	    ajFileSeek(libr, ipos, 0);
	    return ajTrue;
	}
	ipos = ajFileResetPos(libr);
    }

    ajFileSeek(libr, ipos, 0);		/* end of file reached */

    return ajTrue;
}
示例#12
0
文件: genret.c 项目: ktnyt/GEMBASSY
int main(int argc, char *argv[])
{
  embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3");

  AjPSeqall seqall;
  AjPSeq seq      = NULL;
  AjPStr inseq    = NULL;
  AjPStr gene     = NULL;
  AjPStr access   = NULL;
  AjBool accid    = ajTrue;
  AjPStr argument = NULL;
  AjPFile outfile = NULL;

  AjPStr seqid  = NULL;
  AjPStr restid = NULL;

  AjBool valid = ajFalse;
  AjBool isseq = ajFalse;
  AjBool isgbk = ajFalse;

  AjPFilebuff buff = NULL;
  AjPFile  tmpfile = NULL;
  AjPStr   tmpname = NULL;

  AjPStr regexstr = NULL;
  AjPStrTok token = NULL;
  AjPRegexp regex = NULL;

  AjPStr url  = NULL;
  AjPStr base = NULL;
  AjPStr head = NULL;
  AjPStr line = NULL;

  seqall   = ajAcdGetSeqall("sequence");
  access   = ajAcdGetString("access");
  gene     = ajAcdGetString("gene");
  argument = ajAcdGetString("argument");
  accid    = ajAcdGetBoolean("accid");
  outfile  = ajAcdGetOutfile("outfile");

  if(
     ajStrMatchC(access, "translation") ||
     ajStrMatchC(access, "get_exon") ||
     ajStrMatchC(access, "get_exons") ||
     ajStrMatchC(access, "get_cdsseq") ||
     ajStrMatchC(access, "get_gbkseq") ||
     ajStrMatchC(access, "get_geneseq") ||
     ajStrMatchC(access, "get_intron") ||
     ajStrMatchC(access, "getseq") ||
     ajStrMatchC(access, "seq") ||
     ajStrMatchC(access, "around_startcodon") ||
     ajStrMatchC(access, "around_stopcodon") ||
     ajStrMatchC(access, "before_startcodon") ||
     ajStrMatchC(access, "before_stopcodon") ||
     ajStrMatchC(access, "after_startcodon") ||
     ajStrMatchC(access, "after_stopcodon")
     )
    {
      isseq = ajTrue;
    }
  else if(ajStrMatchC(access, "annotate") ||
          ajStrMatchC(access, "output"))
    {
      isgbk = ajTrue;
    }
  else
    {
      ajFmtPrintF(outfile, "gene,%S\n", access);
    }

  base = ajStrNewC("rest.g-language.org");

  ajStrExchangeCC(&argument, " ", "/");
  ajStrExchangeCC(&argument, ",", "/");
  ajStrExchangeCC(&argument, "\t", "/");
  ajStrExchangeCC(&argument, "\r", "/");
  ajStrExchangeCC(&argument, "\n", "/");

  if(ajStrMatchC(gene, "*"))
    {
      ajStrInsertK(&gene, 0, '.');
    }

  if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::"))
    {
      ajStrExchangeCC(&gene, "@", "");
      ajStrExchangeCC(&gene, "list::", "");
      ajStrAssignS(&tmpname, gene);

      tmpfile = ajFileNewInNameS(tmpname);

      if(!tmpfile)
        {
          ajDie("List file (%S) open error\n", tmpname);
        }

      gene = ajStrNew();

      while(ajReadline(tmpfile, &line))
        {
          ajStrAppendS(&gene, line);
        }

      ajFileClose(&tmpfile);
      ajStrDel(&tmpname);
      ajStrDel(&line);
    }

  tmpname = ajStrNew();
  gAssignUniqueName(&tmpname);

  while(ajSeqallNext(seqall, &seq))
    {
      inseq = ajStrNew();

      if(!accid)
        {
          if(gFormatGenbank(seq, &inseq))
            {
              tmpfile = ajFileNewOutNameS(tmpname);

              if(!tmpfile)
                {
                  ajDie("Output file (%S) open error\n", tmpname);
                }

              ajFmtPrintF(tmpfile, "%S", inseq);

              ajFileClose(&tmpfile);

              ajFmtPrintS(&url, "http://%S/upload/upl.pl", base);

              gFilePostSS(url, tmpname, &restid);

              ajStrDel(&url);

              ajSysFileUnlinkS(tmpname);
            }
          else
            {
              ajWarn("Sequence does not have features\n"
                     "Proceeding with sequence accession ID\n");
              accid = ajTrue;
            }
        }


      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      if(ajStrGetLen(seqid) == 0)
        {
          ajStrAssignS(&seqid, ajSeqGetNameS(seq));
        }

      if(ajStrGetLen(seqid) == 0)
        {
          ajWarn("No valid header information\n");
        }

      if(accid)
        {
          ajStrAssignS(&restid, seqid);
          if(ajStrGetLen(seqid) == 0)
            {
              ajDie("Cannot proceed without header with -accid\n");
            }

          if(!gValID(seqid))
            {
              ajDie("Invalid accession ID:%S, exiting\n", seqid);
            }
        }

      url = ajStrNew();

      if(isgbk)
        {
          ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access);
        }
      else
        {
          ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument);
        }

      if(!gFilebuffURLS(url, &buff))
        {
          ajDie("GET error from %S\n", url);
        }

      while(ajBuffreadLine(buff, &line))
        {
          if(isgbk){
            ajFmtPrintF(outfile, "%S", line);
            continue;
          }

          ajStrRemoveLastNewline(&line);

          regex = ajRegCompC("^>");

          if(ajRegExec(regex, line))
            {
              head = ajStrNew();

              ajStrAssignS(&head, line);
              ajStrTrimStartC(&head, ">");

              valid = ajFalse;

              token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n");

              while(ajStrTokenNextParse(token, &regexstr))
                {
                  if(ajStrGetLen(regexstr))
                    {
                      regex = ajRegComp(regexstr);

                      if(ajRegExec(regex, line))
                        {
                          valid = ajTrue;
                          if(ajStrIsAlnum(regexstr))
                            {
                              ajStrExchangeSC(&gene, regexstr, "");
                            }
                        }

                      ajRegFree(&regex);
                    }
                }
            }
          else
            {
              if(valid)
                {
                  if(isseq)
                    {
                      ajStrFmtWrap(&line, 60);
                      ajFmtPrintF(outfile, ">%S\n%S\n", head, line);
                    }
                  else
                    {
                      ajFmtPrintF(outfile, "%S,%S\n", head, line);
                    }

                  valid = ajFalse;
                }
            }
        }

      ajFileClose(&outfile);

      ajStrDel(&restid);
      ajStrDel(&seqid);
      ajStrDel(&inseq);
    }

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&access);
  ajStrDel(&gene);

  embExit();
}
示例#13
0
/* @funcstatic domainalign_stamp **********************************************
**
** Call STAMP and process files.
**
** @param [r] prevdomain [AjPDomain] Previous domain.
** @param [r] domain [AjPDomain] This domain.
** @param [r] daf [AjPDirout] Domain alignment files.
** @param [r] super [AjPDirout] Superimposition files.
** @param [r] singlets [AjPDirout]  Singlet files.
** @param [r] align [AjPStr]   Align.
** @param [r] alignc [AjPStr] Alignc.
** @param [r] dom [AjPStr]   Dom.
** @param [r] name [AjPStr] Name.
** @param [r] set [AjPStr] Name of set file.
** @param [r] scan [AjPStr] Name of scan file.
** @param [r] sort [AjPStr] Name of sort file.
** @param [r] log [AjPStr] Log file name.
** @param [r] out [AjPStr] Out file name.
** @param [r] keepsinglets [AjBool] Keep singlet sequences or not.
** @param [r] moden [ajint] Mode number.
** @param [r] noden [ajint] Node number.
** @param [r] nset [ajint] Number in set.
** @param [r] logf [AjPFile] Lof file.
** 
**
** @return [void] True on success
** @@
****************************************************************************/
static void domainalign_stamp(AjPDomain prevdomain,
			      AjPDomain domain, 
			      AjPDirout daf, 
			      AjPDirout super,
			      AjPDirout singlets, 
			      AjPStr    align, 
			      AjPStr    alignc, 
			      AjPStr    dom, 
			      AjPStr    name, 
			      AjPStr    set, 
			      AjPStr    scan, 
			      AjPStr    sort, 
			      AjPStr    log, 
			      AjPStr    out, 
			      AjBool    keepsinglets, 
			      ajint     moden, 
			      ajint     noden,
			      ajint     nset, 
			      AjPFile   logf)
{
    AjPStr    exec      = NULL;	/* The UNIX command line to be executed.   */
    AjPFile   clusterf  = NULL;	/* File pointer for log file.              */
    ajint     ncluster  = 0;	/* Counter for the number of clusters.     */
    AjPStr    line      = NULL;	/* Holds a line from the log file.         */
    AjPRegexp rexp      = NULL;	/* For parsing no. of clusters in log file */
    AjPStr    temp      = NULL;	/* A temporary string.                     */
    ajint     x         = 0;    /* Loop counter.                           */
    

    exec     = ajStrNew();
    line     = ajStrNew();
    temp     = ajStrNew();



    rexp     = ajRegCompC("^(Cluster:)");

    ajDebug("domainalign_stamp name: '%S'\n", name);
    
    /* Call STAMP. */
    ajFmtPrintS(&exec,	"%S -l %S -s -n 2 -slide 5 -prefix %S -d %S",
		ajAcdGetpathC("stamp"), dom, name, set);
    ajFmtPrint("\n%S\n\n", exec);
    ajSysExecS(exec);  

    ajFmtPrintS(&exec, "%S -f %S -s Sc 2.5",
		ajAcdGetpathC("sorttrans"), scan);
    ajFmtPrint("\n%S > %S\n\n", exec, sort);

    ajSysExecOutnameS(exec, sort);

    ajFmtPrintS(&exec, "%S -l %S -prefix %S",
		ajAcdGetpathC("stamp"), sort, name);
    ajFmtPrint("\n%S > %S\n\n", exec, log);
    ajSysExecOutnameS(exec, log);
	
    ajFmtPrintS(&exec, "%S -f %S -g  -o %S",
		ajAcdGetpathC("transform"), sort, alignc);
    ajFmtPrint("\n%S\n\n", exec);
    ajSysExecS(exec);
    
    
    /* Count the number of clusters in the log file. */
    if(!(clusterf=ajFileNewInNameS(log)))
	ajFatal("Could not open log file '%S'\n", log);
    ncluster=0;
    while(ajReadlineTrim(clusterf,&line))
	if(ajRegExec(rexp,line))
	    ncluster++;
    ajFileClose(&clusterf);	

    ajDebug("ncluster: %d\n", ncluster);
    
    /* Call STAMP ... calculate two fields for structural equivalence using 
       threshold Pij value of 0.5, see stamp manual v4.1 pg 27. */
    ajFmtPrintS(&exec,"%S -f %S.%d -min 0.5",
		ajAcdGetpathC("poststamp"), name, ncluster);
    ajFmtPrint("%S\n\n", exec);
    ajSysExecS(exec);
    
    
    /* Call STAMP ... convert block format alignment into clustal format. */
    ajFmtPrintS(&exec,"%S -f %S.%d.post",
		ajAcdGetpathC("ver2hor"), name, ncluster); 
    ajFmtPrint("%S > %S\n\n", exec, out);
    ajSysExecOutnameS(exec, out);
    
    
    /* Process STAMP alignment file and generate alignment file for output. */
    domainalign_ProcessStampFile(out, align, prevdomain, noden, logf);
    
    
    /* Remove all temporary files. */
    
    for(x=1;x<ncluster+1;x++)
    {
	ajFmtPrintS(&temp, "%S.%d", name, x);
	ajSysFileUnlinkS(temp); 
    }
    
    ajFmtPrintS(&temp, "%S.%d.post", name, ncluster);
    ajSysFileUnlinkS(temp); 

    ajStrDel(&exec);
    ajStrDel(&line);
    ajStrDel(&temp);
    ajRegFree(&rexp);

    return;
}   
示例#14
0
static AjBool dbxflat_ParseFastq(EmbPBtreeEntry entry, AjPFile inf)
{
    AjPStr line = NULL;
    ajlong pos  = 0L;
    ajuint seqlen = 0;
    ajuint qlen = 0;
    AjPStr tmpfd  = NULL;
    AjPStr str = NULL;
    AjPStr de = NULL;
    AjBool ok;

    if(!dbxflat_wrdexp)
	dbxflat_wrdexp = ajRegCompC("([A-Za-z0-9.:=]+)");

    line = ajStrNewC("");
    
    pos = ajFileResetPos(inf);

    if(!ajReadlineTrim(inf,&line))
    {
        ajStrDel(&line);
        return ajFalse;
    }

    /* first line of entry */

    if(!ajStrPrefixC(line,"@"))
        return ajFalse;

    entry->fpos = pos;
    ajStrCutStart(&line, 1);
    ajStrExtractFirst(line, &de, &entry->id);

    if(desfield && ajStrGetLen(de))
    {
	while(ajRegExec(dbxflat_wrdexp,de))
	{
	    ajRegSubI(dbxflat_wrdexp, 1, &tmpfd);
	    str = ajStrNew();
	    ajStrAssignS(&str,tmpfd);
	    ajListPush(desfield->data,(void *)str);
	    ajRegPost(dbxflat_wrdexp, &de);
	}
    }

/* now read sequence */
    ok = ajReadlineTrim(inf,&line);
    while(ok && !ajStrPrefixC(line, "+"))
    {
        ajStrRemoveWhite(&line);
        seqlen += MAJSTRGETLEN(line);
        ok = ajReadlineTrim(inf,&line);
    }

    if(!ok)
        return ajFalse;

    ok = ajReadlineTrim(inf,&line);
    while(ok)
    {
        qlen += MAJSTRGETLEN(line);
        if(qlen < seqlen)
            ok = ajReadlineTrim(inf,&line);
        else
            ok = ajFalse;
    }

    ajStrDel(&de);
    ajStrDel(&tmpfd);
    ajStrDel(&line);
    
    return ajTrue;
}
示例#15
0
int main(int argc, char *argv[])
{
  embInitPV("greporiter", argc, argv, "GEMBASSY", "1.0.3");

  AjPSeqall seqall;
  AjPSeq    seq;
  AjPStr    inseq  = NULL;

  AjBool accid  = ajFalse;
  AjPStr restid = NULL;
  AjPStr seqid  = NULL;

  AjPStr base = NULL;
  AjPStr url  = NULL;

  AjBool oriloc = 0;
  AjBool gcskew = 0;
  AjBool dbonly = 0;
  ajint	 difthreshold = 0;

  AjPFile outf = NULL;

  AjPFile     tmpfile = NULL;
  AjPStr      tmpname = NULL;
  AjPStr      fstname = NULL;
  AjPFilebuff tmp     = NULL;
  AjPStr      line    = NULL;
  AjPSeqout   tmpout  = NULL;

  AjPRegexp regex;

  AjPStr    ori    = NULL;
  AjPStr    ter    = NULL;

  seqall = ajAcdGetSeqall("sequence");
  difthreshold = ajAcdGetInt("difthreshold");
  oriloc = ajAcdGetBoolean("oriloc");
  gcskew = ajAcdGetBoolean("gcskew");
  dbonly = ajAcdGetBoolean("dbonly");
  accid  = ajAcdGetBoolean("accid");
  outf   = ajAcdGetOutfile("outfile");

  base = ajStrNewC("rest.g-language.org");

  gAssignUniqueName(&tmpname);
  gAssignUniqueName(&fstname);
  ajStrAppendC(&fstname, ".fasta");

  while(ajSeqallNext(seqall, &seq))
    {
      inseq = ajStrNew();

      tmpout = ajSeqoutNew();

      if(!accid)
        {
          if(gFormatGenbank(seq, &inseq))
            {
              tmpfile = ajFileNewOutNameS(tmpname);
              if(!tmpfile)
                {
                  ajDie("Output file (%S) open error\n", tmpname);
                }
              ajFmtPrintF(tmpfile, "%S", inseq);
              ajFileClose(&tmpfile);
              ajFmtPrintS(&url, "http://%S/upload/upl.pl", base);
              gFilePostSS(url, tmpname, &restid);
              ajStrDel(&url);
              ajSysFileUnlinkS(tmpname);
            }
          else
            {
              if(!ajSeqoutOpenFilename(tmpout, fstname))
                {
                  embExitBad();
                }

              ajSeqoutSetFormatS(tmpout,ajStrNewC("fasta"));
              ajSeqoutWriteSeq(tmpout, seq);
              ajSeqoutClose(tmpout);
              ajSeqoutDel(&tmpout);
              ajFmtPrintS(&url, "http://%S/upload/upl.pl", base);
              gFilePostSS(url, fstname, &restid);
              ajStrDel(&url);
              ajSysFileUnlinkS(fstname);
            }
        }

      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      if(ajStrGetLen(seqid) == 0)
        {
          ajStrAssignS(&seqid, ajSeqGetNameS(seq));
        }

      if(ajStrGetLen(seqid) == 0)
        {
          ajWarn("No valid header information\n");
        }

      if(accid)
        {
          ajStrAssignS(&restid, seqid);
          if(ajStrGetLen(seqid) == 0)
            {
              ajDie("Cannot proceed without header with -accid\n");
            }

          if(!gValID(seqid))
            {
              ajDie("Invalid accession ID:%S, exiting\n", seqid);
            }
        }

      url = ajStrNew();

      ajFmtPrintS(&url, "http://%S/%S/rep_ori_ter/oriloc=%d/gcskew=%d/"
                  "difthreshold=%d/dbonly=%d/",  base, restid, oriloc, gcskew,
                  difthreshold, dbonly);

      if(!gFilebuffURLS(url, &tmp))
        {
          ajDie("Failed to download result from:\n%S\n", url);
        }

      ajBuffreadLine(tmp, &line);

      regex = ajRegCompC("([0-9]+),([0-9]+)");

      if(ajRegExec(regex, line)) {
        if(ajRegSubI(regex, 1, &ori), ajRegSubI(regex, 2, &ter)) {
          ajFmtPrint("%S Origin: %S Terminus %S\n", seqid, ori, ter);
        }
      }

      ajStrDel(&url);
      ajStrDel(&restid);
      ajStrDel(&seqid);
      ajStrDel(&inseq);
    }

  ajFileClose(&outf);

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&base);

  embExit();

  return 0;
}