Ejemplo n.º 1
0
AjBool gHttpConvertS(AjPStr url, AjPFile* outf, AjPStr informat, AjPStr outformat)
{
  AjPRegexp regexp = NULL;
  AjPStr jobid = NULL;
  AjPStr convert = NULL;

  regexp = ajRegCompC("^.+jobid=");

  if(!ajRegExec(regexp, url))
    {
      return ajFalse;
    }

  if(!ajRegPost(regexp, &jobid))
    {
      return ajFalse;
    }

  convert = ajFmtStr("http://soap.g-language.org/WS/convert.cgi?"
                     "jobid=%S&informat=%S&outformat=%S",
                     jobid, informat, outformat);

  if(!gHttpGetBinS(convert, outf)) {
    return ajFalse;
  }

  return ajTrue;
}
Ejemplo n.º 2
0
static AjBool dbiblast_parseNcbi(const AjPStr line, AjPFile * alistfile,
				 AjBool systemsort, AjPStr const * fields,
				 ajint* maxFieldLen,
				 ajuint* countfield,
				 AjPStr* myid,
				 AjPList* fdlist)
{
    char* fd;

    static ajint numFields;
    static ajint accfield = -1;
    static ajint desfield = -1;
    static ajint svnfield = -1;
    static AjBool reset = AJTRUE;

    if(!fields)
    {
	reset = ajTrue;
	accfield = svnfield = desfield = -1;
	return ajFalse;
    }

    if(reset)
    {
	numFields = 0;
	while(fields[numFields])
	{
	    if(ajStrMatchCaseC(fields[numFields], "acc"))
		accfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "sv"))
		svnfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "des"))
		desfield=numFields;
	    else
		ajWarn("EMBL parsing unknown field '%S' ignored",
		       fields[numFields]);
	    numFields++;
	}
	reset = ajFalse;
    }

    if(!wrdexp)
	wrdexp = ajRegCompC("([A-Za-z0-9]+)");

    ajStrAssignC(&tmpdes,"");
    ajStrAssignC(&t,"");
    ajStrAssignC(&tmpac,"");
    ajStrAssignC(&tmpsv,"");
    ajStrAssignC(&tmpgi,"");
    ajStrAssignC(&tmpdb,"");

    ajFmtPrintS(&t,">%S",line);

    if(!ajSeqParseNcbi(t,myid,&tmpac,&tmpsv,&tmpgi,&tmpdb,&tmpdes))
	return ajFalse;

    if(ajStrGetLen(tmpac))
	ajStrFmtUpper(&tmpac);

    if(accfield >= 0)
	embDbiMaxlen(&tmpac, &maxFieldLen[accfield]);

    if(svnfield >= 0)
    {
	embDbiMaxlen(&tmpsv, &maxFieldLen[svnfield]);
	embDbiMaxlen(&tmpgi, &maxFieldLen[svnfield]);
    }


    ajStrFmtUpper(myid);

    /* ajDebug("parseNCBI success\n"); */

    if(systemsort)
    {
	if(accfield >= 0 && ajStrGetLen(tmpac))
	{
	    countfield[accfield]++;
	    ajFmtPrintF(alistfile[accfield], "%S %S\n", *myid, tmpac);
	}
	if(svnfield >= 0 && ajStrGetLen(tmpsv))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n", *myid, tmpsv);
	}
	if(svnfield >= 0 && ajStrGetLen(tmpgi))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n", *myid, tmpgi);
	}
	if(desfield >= 0 && ajStrGetLen(tmpdes))
	    while(ajRegExec(wrdexp, tmpdes))
	    {
		ajRegSubI(wrdexp, 1, &tmpfd);
		embDbiMaxlen(&tmpfd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&tmpfd);
		ajDebug("++des '%S'\n", tmpfd);
		countfield[desfield]++;
		ajFmtPrintF(alistfile[desfield], "%S %S\n", *myid, tmpfd);
		ajRegPost(wrdexp, &tmpdes);
	    }
    }
    else
    {
        if(accfield >= 0 && ajStrGetLen(tmpac))
	{
	    fd = ajCharNewS(tmpac);
	    countfield[accfield]++;
	    ajListPushAppend(fdlist[accfield], fd);
	}

        if(svnfield >= 0 && ajStrGetLen(tmpsv))
	{
	    fd = ajCharNewS(tmpsv);
	    countfield[svnfield]++;
	    ajListPushAppend(fdlist[svnfield], fd);
	}

        if(svnfield >= 0 && ajStrGetLen(tmpgi))
	{
	    fd = ajCharNewS(tmpgi);
	    ajListPushAppend(fdlist[svnfield], fd);
	}

        if(desfield >= 0 && ajStrGetLen(tmpdes))
	{
	    while(ajRegExec(wrdexp, tmpdes))
	    {
		ajRegSubI(wrdexp, 1, &tmpfd);
		embDbiMaxlen(&tmpfd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&tmpfd);
		ajDebug("++des '%S'\n", tmpfd);
		fd = ajCharNewS(tmpfd);
		countfield[desfield]++;
		ajListPushAppend(fdlist[desfield], fd);
		ajRegPost(wrdexp, &tmpdes);
	    }
	}
    }

    /* ajDebug("parseNCBI '%S' '%S'\n", *myid, tmpac); */

    return ajTrue;
}
Ejemplo n.º 3
0
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
			    const AjPPatternRegex pat, AjBool reverse)
{
    ajint pos=0;
    ajint off;
    ajint len;
    AjPFeature sf    = NULL;
    AjPStr substr    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr tmpstr = NULL;
    AjPStr tmp       = ajStrNew();
    AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
    ajint adj;
    AjBool isreversed;
    AjPSeq revseq;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    pos = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
	   pos, adj, reverse, isreversed);*/
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
        ajSeqstrReverse(&seqstr);
    }

    ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);

    ajStrFmtUpper(&seqstr);

    while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
    {
	off = ajRegOffset(patexp);
	len = ajRegLenI(patexp, 0);

	if(off || len)
	{
	    ajRegSubI(patexp, 0, &substr);
	    ajRegPost(patexp, &tmp);
	    ajStrAssignS(&seqstr, substr);
            ajStrAppendS(&seqstr, tmp);
	    pos += off;

	    /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
                    pos, adj, len, off);*/
            if (reverse)
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   adj - pos - len + 2,
                                   adj - pos + 1,
                                   0.0, '-', 0);
	    else
            {
                if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                    sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                       pos, pos + len - 1,
                                       0.0);
                else
                    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   pos, pos + len - 1,
                                   0.0, '.', 0);
            }
            
	    if(isreversed)
		ajFeatReverse(sf, seqlen);

	    ajFmtPrintS (&tmpstr,"*pat %S: %S",
			 ajPatternRegexGetName(pat),
                         ajPatternRegexGetPattern(pat));
	    ajFeatTagAdd (sf,NULL,tmpstr);
	    pos += 1;
	    ajStrCutStart(&seqstr, 1);
	}
	else
	{
	    pos++;
	    ajStrCutStart(&seqstr, 1);
	}
    }

    ajStrDel(&tmpstr);
    ajStrDel(&tmp);
    ajStrDel(&substr);
    ajStrDel(&seqstr);

    if(reverse)
	ajSeqDel(&revseq);

    return;
}
Ejemplo n.º 4
0
static AjBool dbifasta_ParseFasta(AjPFile libr, ajint* dpos,
				  ajint* maxFieldLen, ajuint* countfield,
				  AjPRegexp idexp,
				  ajuint usertype, AjPFile* alistfile,
				  AjBool systemsort, AjPStr const * fields)
{
    char* fd;
    ajlong ipos;
    static AjPStr tstr = NULL;
    static ajint numFields;
    static ajint accfield = -1;
    static ajint desfield = -1;
    static ajint svnfield = -1;
    static AjBool reset = AJTRUE;

    ajuint type = usertype;

    if(!fields)
    {
	reset = ajTrue;
	accfield = svnfield = desfield = -1;
	return ajFalse;
    }

    if(reset)
    {
	numFields = 0;
	while(fields[numFields])
	{
	    if(ajStrMatchCaseC(fields[numFields], "acc"))
		accfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "sv"))
		svnfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "des"))
		desfield=numFields;
	    else
		ajWarn("EMBL parsing unknown field '%S' ignored",
		       fields[numFields]);

	    numFields++;
	}
	reset = ajFalse;
    }

    if(!dbifastaGWrdexp)
	dbifastaGWrdexp = ajRegCompC("([A-Za-z0-9]+)");

    if(!tstr)
	tstr = ajStrNew();

    *dpos = (ajint) ajFileResetPos(libr); /* Lossy cast */

    ajReadline(libr, &dbifastaGRline);

    if(!ajStrGetLen(dbifastaGRline))
        return ajFalse;

    if(!ajRegExec(idexp,dbifastaGRline))
    {
	ajStrDelStatic(&dbifastaGTmpAc);
        type = FASTATYPE_SIMPLE;
        idexp = dbifastaGIdexp;

        if(!ajRegExec(idexp, dbifastaGRline))
        {
            ajFatal("Unrecognised ID line format: %S", dbifastaGRline);
            return ajFalse;
        }

	ajWarn("Invalid ID line for selected format: %S", dbifastaGRline);
    }

    /*
    ** each case needs to set id, tmpac, tmpsv, tmpdes
    ** using empty values if they are not found
    */

    ajStrAssignC(&dbifastaGTmpSv, "");
    ajStrAssignC(&dbifastaGTmpGi, "");
    ajStrAssignC(&dbifastaGTmpDb, "");
    ajStrAssignC(&dbifastaGTmpDes, "");
    ajStrAssignC(&dbifastaGTmpAc, "");
    ajStrAssignC(&dbifastaGTmpId, "");

    switch(type)
    {
    case FASTATYPE_SIMPLE:
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_DBID:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_GCGID:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_NCBI:
	if(!ajSeqParseNcbi(dbifastaGRline, &dbifastaGTmpId, &dbifastaGTmpAc,
			   &dbifastaGTmpSv, &dbifastaGTmpGi, &dbifastaGTmpDb,
                           &dbifastaGTmpDes))
	{
	    ajStrDelStatic(&dbifastaGTmpAc);
	    return ajFalse;
	}
	break;
    case FASTATYPE_GCGIDACC:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajRegSubI(idexp,2,&dbifastaGTmpAc);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_GCGACCID:
	ajRegSubI(idexp,1,&dbifastaGTmpAc);
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_IDACC:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajRegSubI(idexp,2,&dbifastaGTmpAc);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_ACCID:
	ajRegSubI(idexp,1,&dbifastaGTmpAc);
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    default:
	ajStrDelStatic(&dbifastaGTmpAc);
	return ajFalse;
    }

    ajStrFmtUpper(&dbifastaGTmpId);
    ajStrFmtUpper(&dbifastaGTmpAc);

    if(accfield >= 0)
	embDbiMaxlen(&dbifastaGTmpAc, &maxFieldLen[accfield]);
    if(svnfield >= 0)
    {
	embDbiMaxlen(&dbifastaGTmpSv, &maxFieldLen[svnfield]);
	embDbiMaxlen(&dbifastaGTmpGi, &maxFieldLen[svnfield]);
    }

    if(systemsort)
    {
	if(accfield >= 0 && ajStrGetLen(dbifastaGTmpAc))
	{
	    countfield[accfield]++;
	    ajFmtPrintF(alistfile[accfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpAc);
	}
	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpSv))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpSv);
	}
	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpGi))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpGi);
	}
	if(desfield >= 0 && ajStrGetLen(dbifastaGTmpDes))
	    while(ajRegExec(dbifastaGWrdexp, dbifastaGTmpDes))
	    {
		ajRegSubI(dbifastaGWrdexp, 1, &dbifastaGTmpFd);
		embDbiMaxlen(&dbifastaGTmpFd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&dbifastaGTmpFd);
		ajDebug("++des '%S' tmpdes '%S\n",
			dbifastaGTmpFd, dbifastaGTmpDes);
		countfield[desfield]++;
		ajFmtPrintF(alistfile[desfield], "%S %S\n",
			    dbifastaGTmpId, dbifastaGTmpFd);
		ajRegPost(dbifastaGWrdexp, &dbifastaGTmpDes);
	    }
    }
    else
    {
	if(accfield >= 0 && ajStrGetLen(dbifastaGTmpAc))
	{
	    fd = ajCharNewS(dbifastaGTmpAc);
	    ajListPushAppend(dbifastaGFdl[accfield],fd);
	    countfield[accfield]++;
	}

	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpSv))
	{
	    fd = ajCharNewS(dbifastaGTmpSv);
	    ajListPushAppend(dbifastaGFdl[svnfield], fd);
	    countfield[svnfield]++;
	}

	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpGi))
	{
	    fd = ajCharNewS(dbifastaGTmpGi);
	    ajListPushAppend(dbifastaGFdl[svnfield], fd);
	    countfield[svnfield]++;
	}

	if(desfield >= 0 && ajStrGetLen(dbifastaGTmpDes))
	    while(ajRegExec(dbifastaGWrdexp, dbifastaGTmpDes))
	    {
		ajRegSubI(dbifastaGWrdexp, 1, &dbifastaGTmpFd);
		embDbiMaxlen(&dbifastaGTmpFd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&dbifastaGTmpFd);
		ajDebug("++des '%S' tmpdes: '%S'\n",
			dbifastaGTmpFd, dbifastaGTmpDes);
		fd = ajCharNewS(dbifastaGTmpFd);
		ajListPushAppend(dbifastaGFdl[desfield], fd);
		countfield[desfield]++;
		ajRegPost(dbifastaGWrdexp, &dbifastaGTmpDes);
	    }
    }

    ipos = ajFileResetPos(libr);

    while(ajReadline(libr, &dbifastaGRline))
    {
	if(ajStrGetCharFirst(dbifastaGRline) == '>')
	{
	    ajFileSeek(libr, ipos, 0);
	    return ajTrue;
	}
	ipos = ajFileResetPos(libr);
    }

    ajFileSeek(libr, ipos, 0);		/* end of file reached */

    return ajTrue;
}
Ejemplo n.º 5
0
static AjBool dbxflat_ParseFastq(EmbPBtreeEntry entry, AjPFile inf)
{
    AjPStr line = NULL;
    ajlong pos  = 0L;
    ajuint seqlen = 0;
    ajuint qlen = 0;
    AjPStr tmpfd  = NULL;
    AjPStr str = NULL;
    AjPStr de = NULL;
    AjBool ok;

    if(!dbxflat_wrdexp)
	dbxflat_wrdexp = ajRegCompC("([A-Za-z0-9.:=]+)");

    line = ajStrNewC("");
    
    pos = ajFileResetPos(inf);

    if(!ajReadlineTrim(inf,&line))
    {
        ajStrDel(&line);
        return ajFalse;
    }

    /* first line of entry */

    if(!ajStrPrefixC(line,"@"))
        return ajFalse;

    entry->fpos = pos;
    ajStrCutStart(&line, 1);
    ajStrExtractFirst(line, &de, &entry->id);

    if(desfield && ajStrGetLen(de))
    {
	while(ajRegExec(dbxflat_wrdexp,de))
	{
	    ajRegSubI(dbxflat_wrdexp, 1, &tmpfd);
	    str = ajStrNew();
	    ajStrAssignS(&str,tmpfd);
	    ajListPush(desfield->data,(void *)str);
	    ajRegPost(dbxflat_wrdexp, &de);
	}
    }

/* now read sequence */
    ok = ajReadlineTrim(inf,&line);
    while(ok && !ajStrPrefixC(line, "+"))
    {
        ajStrRemoveWhite(&line);
        seqlen += MAJSTRGETLEN(line);
        ok = ajReadlineTrim(inf,&line);
    }

    if(!ok)
        return ajFalse;

    ok = ajReadlineTrim(inf,&line);
    while(ok)
    {
        qlen += MAJSTRGETLEN(line);
        if(qlen < seqlen)
            ok = ajReadlineTrim(inf,&line);
        else
            ok = ajFalse;
    }

    ajStrDel(&de);
    ajStrDel(&tmpfd);
    ajStrDel(&line);
    
    return ajTrue;
}