示例#1
0
static AjBool dbiblast_parseNcbi(const AjPStr line, AjPFile * alistfile,
				 AjBool systemsort, AjPStr const * fields,
				 ajint* maxFieldLen,
				 ajuint* countfield,
				 AjPStr* myid,
				 AjPList* fdlist)
{
    char* fd;

    static ajint numFields;
    static ajint accfield = -1;
    static ajint desfield = -1;
    static ajint svnfield = -1;
    static AjBool reset = AJTRUE;

    if(!fields)
    {
	reset = ajTrue;
	accfield = svnfield = desfield = -1;
	return ajFalse;
    }

    if(reset)
    {
	numFields = 0;
	while(fields[numFields])
	{
	    if(ajStrMatchCaseC(fields[numFields], "acc"))
		accfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "sv"))
		svnfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "des"))
		desfield=numFields;
	    else
		ajWarn("EMBL parsing unknown field '%S' ignored",
		       fields[numFields]);
	    numFields++;
	}
	reset = ajFalse;
    }

    if(!wrdexp)
	wrdexp = ajRegCompC("([A-Za-z0-9]+)");

    ajStrAssignC(&tmpdes,"");
    ajStrAssignC(&t,"");
    ajStrAssignC(&tmpac,"");
    ajStrAssignC(&tmpsv,"");
    ajStrAssignC(&tmpgi,"");
    ajStrAssignC(&tmpdb,"");

    ajFmtPrintS(&t,">%S",line);

    if(!ajSeqParseNcbi(t,myid,&tmpac,&tmpsv,&tmpgi,&tmpdb,&tmpdes))
	return ajFalse;

    if(ajStrGetLen(tmpac))
	ajStrFmtUpper(&tmpac);

    if(accfield >= 0)
	embDbiMaxlen(&tmpac, &maxFieldLen[accfield]);

    if(svnfield >= 0)
    {
	embDbiMaxlen(&tmpsv, &maxFieldLen[svnfield]);
	embDbiMaxlen(&tmpgi, &maxFieldLen[svnfield]);
    }


    ajStrFmtUpper(myid);

    /* ajDebug("parseNCBI success\n"); */

    if(systemsort)
    {
	if(accfield >= 0 && ajStrGetLen(tmpac))
	{
	    countfield[accfield]++;
	    ajFmtPrintF(alistfile[accfield], "%S %S\n", *myid, tmpac);
	}
	if(svnfield >= 0 && ajStrGetLen(tmpsv))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n", *myid, tmpsv);
	}
	if(svnfield >= 0 && ajStrGetLen(tmpgi))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n", *myid, tmpgi);
	}
	if(desfield >= 0 && ajStrGetLen(tmpdes))
	    while(ajRegExec(wrdexp, tmpdes))
	    {
		ajRegSubI(wrdexp, 1, &tmpfd);
		embDbiMaxlen(&tmpfd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&tmpfd);
		ajDebug("++des '%S'\n", tmpfd);
		countfield[desfield]++;
		ajFmtPrintF(alistfile[desfield], "%S %S\n", *myid, tmpfd);
		ajRegPost(wrdexp, &tmpdes);
	    }
    }
    else
    {
        if(accfield >= 0 && ajStrGetLen(tmpac))
	{
	    fd = ajCharNewS(tmpac);
	    countfield[accfield]++;
	    ajListPushAppend(fdlist[accfield], fd);
	}

        if(svnfield >= 0 && ajStrGetLen(tmpsv))
	{
	    fd = ajCharNewS(tmpsv);
	    countfield[svnfield]++;
	    ajListPushAppend(fdlist[svnfield], fd);
	}

        if(svnfield >= 0 && ajStrGetLen(tmpgi))
	{
	    fd = ajCharNewS(tmpgi);
	    ajListPushAppend(fdlist[svnfield], fd);
	}

        if(desfield >= 0 && ajStrGetLen(tmpdes))
	{
	    while(ajRegExec(wrdexp, tmpdes))
	    {
		ajRegSubI(wrdexp, 1, &tmpfd);
		embDbiMaxlen(&tmpfd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&tmpfd);
		ajDebug("++des '%S'\n", tmpfd);
		fd = ajCharNewS(tmpfd);
		countfield[desfield]++;
		ajListPushAppend(fdlist[desfield], fd);
		ajRegPost(wrdexp, &tmpdes);
	    }
	}
    }

    /* ajDebug("parseNCBI '%S' '%S'\n", *myid, tmpac); */

    return ajTrue;
}
示例#2
0
static AjBool dbifasta_ParseFasta(AjPFile libr, ajint* dpos,
				  ajint* maxFieldLen, ajuint* countfield,
				  AjPRegexp idexp,
				  ajuint usertype, AjPFile* alistfile,
				  AjBool systemsort, AjPStr const * fields)
{
    char* fd;
    ajlong ipos;
    static AjPStr tstr = NULL;
    static ajint numFields;
    static ajint accfield = -1;
    static ajint desfield = -1;
    static ajint svnfield = -1;
    static AjBool reset = AJTRUE;

    ajuint type = usertype;

    if(!fields)
    {
	reset = ajTrue;
	accfield = svnfield = desfield = -1;
	return ajFalse;
    }

    if(reset)
    {
	numFields = 0;
	while(fields[numFields])
	{
	    if(ajStrMatchCaseC(fields[numFields], "acc"))
		accfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "sv"))
		svnfield=numFields;
	    else if(ajStrMatchCaseC(fields[numFields], "des"))
		desfield=numFields;
	    else
		ajWarn("EMBL parsing unknown field '%S' ignored",
		       fields[numFields]);

	    numFields++;
	}
	reset = ajFalse;
    }

    if(!dbifastaGWrdexp)
	dbifastaGWrdexp = ajRegCompC("([A-Za-z0-9]+)");

    if(!tstr)
	tstr = ajStrNew();

    *dpos = (ajint) ajFileResetPos(libr); /* Lossy cast */

    ajReadline(libr, &dbifastaGRline);

    if(!ajStrGetLen(dbifastaGRline))
        return ajFalse;

    if(!ajRegExec(idexp,dbifastaGRline))
    {
	ajStrDelStatic(&dbifastaGTmpAc);
        type = FASTATYPE_SIMPLE;
        idexp = dbifastaGIdexp;

        if(!ajRegExec(idexp, dbifastaGRline))
        {
            ajFatal("Unrecognised ID line format: %S", dbifastaGRline);
            return ajFalse;
        }

	ajWarn("Invalid ID line for selected format: %S", dbifastaGRline);
    }

    /*
    ** each case needs to set id, tmpac, tmpsv, tmpdes
    ** using empty values if they are not found
    */

    ajStrAssignC(&dbifastaGTmpSv, "");
    ajStrAssignC(&dbifastaGTmpGi, "");
    ajStrAssignC(&dbifastaGTmpDb, "");
    ajStrAssignC(&dbifastaGTmpDes, "");
    ajStrAssignC(&dbifastaGTmpAc, "");
    ajStrAssignC(&dbifastaGTmpId, "");

    switch(type)
    {
    case FASTATYPE_SIMPLE:
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_DBID:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_GCGID:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_NCBI:
	if(!ajSeqParseNcbi(dbifastaGRline, &dbifastaGTmpId, &dbifastaGTmpAc,
			   &dbifastaGTmpSv, &dbifastaGTmpGi, &dbifastaGTmpDb,
                           &dbifastaGTmpDes))
	{
	    ajStrDelStatic(&dbifastaGTmpAc);
	    return ajFalse;
	}
	break;
    case FASTATYPE_GCGIDACC:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajRegSubI(idexp,2,&dbifastaGTmpAc);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_GCGACCID:
	ajRegSubI(idexp,1,&dbifastaGTmpAc);
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_IDACC:
	ajRegSubI(idexp,1,&dbifastaGTmpId);
	ajRegSubI(idexp,2,&dbifastaGTmpAc);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    case FASTATYPE_ACCID:
	ajRegSubI(idexp,1,&dbifastaGTmpAc);
	ajRegSubI(idexp,2,&dbifastaGTmpId);
	ajRegPost(idexp, &dbifastaGTmpDes);
	break;
    default:
	ajStrDelStatic(&dbifastaGTmpAc);
	return ajFalse;
    }

    ajStrFmtUpper(&dbifastaGTmpId);
    ajStrFmtUpper(&dbifastaGTmpAc);

    if(accfield >= 0)
	embDbiMaxlen(&dbifastaGTmpAc, &maxFieldLen[accfield]);
    if(svnfield >= 0)
    {
	embDbiMaxlen(&dbifastaGTmpSv, &maxFieldLen[svnfield]);
	embDbiMaxlen(&dbifastaGTmpGi, &maxFieldLen[svnfield]);
    }

    if(systemsort)
    {
	if(accfield >= 0 && ajStrGetLen(dbifastaGTmpAc))
	{
	    countfield[accfield]++;
	    ajFmtPrintF(alistfile[accfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpAc);
	}
	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpSv))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpSv);
	}
	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpGi))
	{
	    countfield[svnfield]++;
	    ajFmtPrintF(alistfile[svnfield], "%S %S\n",
                        dbifastaGTmpId, dbifastaGTmpGi);
	}
	if(desfield >= 0 && ajStrGetLen(dbifastaGTmpDes))
	    while(ajRegExec(dbifastaGWrdexp, dbifastaGTmpDes))
	    {
		ajRegSubI(dbifastaGWrdexp, 1, &dbifastaGTmpFd);
		embDbiMaxlen(&dbifastaGTmpFd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&dbifastaGTmpFd);
		ajDebug("++des '%S' tmpdes '%S\n",
			dbifastaGTmpFd, dbifastaGTmpDes);
		countfield[desfield]++;
		ajFmtPrintF(alistfile[desfield], "%S %S\n",
			    dbifastaGTmpId, dbifastaGTmpFd);
		ajRegPost(dbifastaGWrdexp, &dbifastaGTmpDes);
	    }
    }
    else
    {
	if(accfield >= 0 && ajStrGetLen(dbifastaGTmpAc))
	{
	    fd = ajCharNewS(dbifastaGTmpAc);
	    ajListPushAppend(dbifastaGFdl[accfield],fd);
	    countfield[accfield]++;
	}

	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpSv))
	{
	    fd = ajCharNewS(dbifastaGTmpSv);
	    ajListPushAppend(dbifastaGFdl[svnfield], fd);
	    countfield[svnfield]++;
	}

	if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpGi))
	{
	    fd = ajCharNewS(dbifastaGTmpGi);
	    ajListPushAppend(dbifastaGFdl[svnfield], fd);
	    countfield[svnfield]++;
	}

	if(desfield >= 0 && ajStrGetLen(dbifastaGTmpDes))
	    while(ajRegExec(dbifastaGWrdexp, dbifastaGTmpDes))
	    {
		ajRegSubI(dbifastaGWrdexp, 1, &dbifastaGTmpFd);
		embDbiMaxlen(&dbifastaGTmpFd, &maxFieldLen[desfield]);
		ajStrFmtUpper(&dbifastaGTmpFd);
		ajDebug("++des '%S' tmpdes: '%S'\n",
			dbifastaGTmpFd, dbifastaGTmpDes);
		fd = ajCharNewS(dbifastaGTmpFd);
		ajListPushAppend(dbifastaGFdl[desfield], fd);
		countfield[desfield]++;
		ajRegPost(dbifastaGWrdexp, &dbifastaGTmpDes);
	    }
    }

    ipos = ajFileResetPos(libr);

    while(ajReadline(libr, &dbifastaGRline))
    {
	if(ajStrGetCharFirst(dbifastaGRline) == '>')
	{
	    ajFileSeek(libr, ipos, 0);
	    return ajTrue;
	}
	ipos = ajFileResetPos(libr);
    }

    ajFileSeek(libr, ipos, 0);		/* end of file reached */

    return ajTrue;
}