Пример #1
0
unsigned int SubstructLibrary::countMatches(const ROMol &query,
                                            bool recursionPossible,
                                            bool useChirality,
                                            bool useQueryQueryMatches,
                                            int numThreads) {
  return countMatches(query, 0, mols->size(), recursionPossible, useChirality,
                      useQueryQueryMatches, numThreads);
}
Пример #2
0
static void hspToBlocks(struct psl *psl, int *pslSpace, struct block *blk, unsigned flags)
/* build PSl blocks from an HSP */
{
/* fill in ungapped blocks */
while (nextUngappedBlk(blk))
    {
    addUngappedBlock(psl, pslSpace, blk, flags);
    countIndels(psl);
    countMatches(psl, blk, flags);
    }
assert(blk->qStart == blk->qEnd);
assert(blk->tStart == blk->tEnd);
// FIXME
//assert(blk->qStart == pslQEnd(psl, psl->blockCount-1));
//assert(blk->tStart == pslTEnd(psl, psl->blockCount-1));
}
Пример #3
0
/**
*
*   Convert phonemes to data string
*   Enter: textp = phonemes string
*   Return: phonemes = string of sound data
*			modifier = 2 bytes per sound data
*
*/
static boolean phonemesToData(const char* textp){

	size_t phonemeOut = 0; // offset into the phonemes array
	size_t modifierOut = 0; // offset into the modifiers array
	uint8_t L81=0; // attenuate
	uint8_t previousL81=16;

#ifdef _WINDOWS_
	memset(modifier,0xAA,sizeof(modifier));
#endif

	while(*textp){

		int numOut;

		// P20: Get next phoneme
		size_t longestMatch=0;
		const char* vocabEntry = s_phonemes;
		const char* bestPhoneme = null;
		uint8_t     bestPhonemeNo=0;


		// Get next phoneme, P2
		uint8_t phonemeNumber;

		for(phonemeNumber = 0; getVocab(vocabEntry,0);phonemeNumber++){

			// Find the exact matching characters
			size_t numChars = countMatches(textp,vocabEntry);

			// if not the longest match so far, or not a complete match, then ignore
			if(numChars > longestMatch){
				// P7: we have matched the whole phoneme
				longestMatch = numChars;
				bestPhoneme = vocabEntry + numChars;
				bestPhonemeNo = phonemeNumber;

			}

			// Move to next phoneme
			while(getVocab(vocabEntry++,0)!=0);		// Skip over sound data
		} // next phoneme

		// p13
		if(!bestPhoneme){
			loggerP(PSTR("Mistake in speech at "));
			logger(textp);
			loggerCRLF();
			return FALSE;
		}


	    L81 = pgm_read_byte(&s_attenuate[bestPhonemeNo]) /*+'0'*/;


		// Get char from text after the phoneme and test if it is a numeric
		if(textp[longestMatch]>='0' && textp[longestMatch]<='9'){
			// Pitch change requested
			modifier[modifierOut] = pgm_read_byte(&PitchesP[textp[longestMatch]-'1'] );
			modifier[modifierOut+1] = L81;
			longestMatch++;
		}else{
			modifier[modifierOut]=-1;
			modifier[modifierOut+1]=0;
		}

		// P10
		if(L81!='0' && L81 != previousL81 && modifierOut!=0 && modifier[modifierOut]>=0){
			modifier[modifierOut - 2] = modifier[modifierOut];
			modifier[modifierOut - 1] = '0';
		}else{
			// P11
			if( (textp[longestMatch-1] | 0x20) == 0x20){
				// end of input string or a space
				modifier[modifierOut] = (modifierOut==0) ? 16 : modifier[modifierOut-2];
			}
		}

		// Copy phoneme data to sound data
		for(numOut=0; getVocab(bestPhoneme,numOut)!= 0; numOut++){
			sounds[phonemeOut++] = getVocab(bestPhoneme,numOut) & 0x7f;
		}

		if(phonemeOut > sizeof(sounds)-16){
			loggerP(PSTR("Line too long\n"));
			return FALSE;
		}

		// P16

		// Copy the modifier setting to each sound data element for this phoneme
		if(numOut > 2){
			int count;
			for(count=0; count <= numOut; count+=2){
				modifier[modifierOut + count + 2] = modifier[modifierOut + count];
				modifier[modifierOut + count + 3] = modifier[modifierOut + count + 1];
			}
		}
		modifierOut += numOut;

		//p21
		textp += longestMatch;
		previousL81 = L81;
	}

	sounds[phonemeOut++]='z';
	sounds[phonemeOut++]='z';
	sounds[phonemeOut++]='z';
	sounds[phonemeOut++]='z';

	while(phonemeOut < sizeof(sounds)){
		sounds[phonemeOut++]=0;
	}

	while(modifierOut < sizeof(modifier)){
		modifier[modifierOut++]=-1;
		modifier[modifierOut++]=0;
	}

	return TRUE;
}
Пример #4
0
static void tabBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
	FILE *f, char *databaseName, int databaseSeqCount, 
	double databaseLetterCount, char *ourId, boolean withComment)
/* Do NCBI tabular blast output. */
{
char *queryName = abList->axtList->qName;
int querySize = abList->qSize;
struct targetHits *targetList = NULL, *target;

if (withComment)
    {
    // use date from CVS, unless checked out with -kk, then ignore.
    char * rcsDate = "$Date: 2009/02/26 00:05:49 $";
    char dateStamp[11];
    if (strlen(rcsDate) > 17)
        safencpy(dateStamp, sizeof(dateStamp), rcsDate+7, 10);
    else
        safecpy(dateStamp, sizeof(dateStamp), "");
    dateStamp[10] = 0;
    fprintf(f, "# BLAT %s [%s]\n", gfVersion, dateStamp);
    fprintf(f, "# Query: %s\n", queryName);
    fprintf(f, "# Database: %s\n", databaseName);
    fprintf(f, "%s\n", 
    	"# Fields: Query id, Subject id, % identity, alignment length, "
	"mismatches, gap openings, q. start, q. end, s. start, s. end, "
	"e-value, bit score");
    }

/* Print out details on each target. */
targetList = bundleIntoTargets(abList);
for (target = targetList; target != NULL; target = target->next)
    {
    struct axtRef *ref;
    for (ref = target->axtList; ref != NULL; ref = ref->next)
        {
	struct axt *axt = ref->axt;
	int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
	int gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
	int gapOpens = countGapOpens(axt->qSym, axt->tSym, axt->symCount);
	fprintf(f, "%s\t", axt->qName);
	fprintf(f, "%s\t", axt->tName);
	fprintf(f, "%.2f\t", 100.0 * matches/axt->symCount);
	fprintf(f, "%d\t", axt->symCount);
	fprintf(f, "%d\t", axt->symCount - matches - gaps);
	fprintf(f, "%d\t", gapOpens);
	if (axt->qStrand == '-')
	    {
	    int s = axt->qStart, e = axt->qEnd;
	    reverseIntRange(&s, &e, querySize);
	    fprintf(f, "%d\t", s+1);
	    fprintf(f, "%d\t", e);
	    printAxtTargetBlastTab(f, axt, target->size);
	    }
	else
	    {
	    fprintf(f, "%d\t", axt->qStart + 1);
	    fprintf(f, "%d\t", axt->qEnd);
	    printAxtTargetBlastTab(f, axt, target->size);
	    }
	fprintf(f, "%3.1e\t", blastzScoreToNcbiExpectation(axt->score));
	fprintf(f, "%d.0\n", blastzScoreToNcbiBits(axt->score));
	}
    }

/* Cleanup time. */
targetHitsFreeList(&targetList);
}
Пример #5
0
static void ncbiBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
	FILE *f, char *databaseName, int databaseSeqCount, 
	double databaseLetterCount, char *ourId, double minIdentity)
/* Do ncbiblast-like output at end of processing query. */
{
char asciiNum[32];
struct targetHits *targetList = NULL, *target;
char *queryName;
int querySize = abList->qSize;
boolean isTranslated = (abList->axtList->frame != 0);

/* Print out stuff that doesn't depend on query or database. */
if (ourId == NULL)
    ourId = "axtBlastOut";
fprintf(f, "%s 2.2.11 [%s]\n", progType(isProt, abList, TRUE), ourId);
fprintf(f, "\n");
fprintf(f, "Reference:  Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
fprintf(f, "\n");

/* Print query and database info. */
queryName = abList->axtList->qName;
fprintf(f, "Query= %s\n", queryName);
fprintf(f, "         (%d letters)\n", abList->qSize);
fprintf(f, "\n");
fprintf(f, "Database: %s \n",  databaseName);
sprintLongWithCommas(asciiNum, databaseLetterCount);
fprintf(f, "           %d sequences; %s total letters\n",  databaseSeqCount, asciiNum);
fprintf(f, "\n");
fprintf(f, "Searching.done\n");

targetList = bundleIntoTargets(abList);

/* Print out summary of hits. */
fprintf(f, "                                                                 Score    E\n");
fprintf(f, "Sequences producing significant alignments:                      (bits) Value\n");
fprintf(f, "\n");
for (target = targetList; target != NULL; target = target->next)
    {
    struct axtRef *ref;
    struct axt *axt;
    int matches;
    double identity, expectation;
    int bit;
    
    for (ref = target->axtList; ref != NULL; ref = ref->next)
	{
	axt = ref->axt;
	
	matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
	identity = round(100.0 * matches / axt->symCount);
	/* skip output if minIdentity not reached */
	if (identity < minIdentity) continue;
    
    	bit = blastzScoreToNcbiBits(axt->score);
        expectation = blastzScoreToNcbiExpectation(axt->score);
    	fprintf(f, "%-67s  %4d   ", target->name, bit);
    	ncbiPrintE(f, expectation);
    	fprintf(f, "\n");
    	}
    }
fprintf(f, "\n");

/* Print out details on each target. */
for (target = targetList; target != NULL; target = target->next)
    {
    struct axtRef *ref;
    struct axt *axt;
    int matches, gaps;
    char *oldName;
    
    int ii = 0;
    double identity;
    oldName = strdup("");

    for (ref = target->axtList; ref != NULL; ref = ref->next)
	{
	ii++;
	axt = ref->axt;
	
	matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
	identity = round(100.0 * matches / axt->symCount);
	
	/* skip output if minIdentity not reached */
	if (identity < minIdentity) continue;
        
	/* print target sequence name and length only once */ 
	if (!sameWord(oldName, target->name))
	    {
	    fprintf(f, "\n\n>%s \n", target->name);
	    fprintf(f, "          Length = %d\n", target->size);
	    oldName = strdup(target->name);
	    }

	fprintf(f, "\n");
	fprintf(f, " Score = %d bits (%d), Expect = ",
	     blastzScoreToNcbiBits(axt->score),
	     blastzScoreToNcbiScore(axt->score));
	ncbiPrintE(f, blastzScoreToNcbiExpectation(axt->score));
	fprintf(f, "\n");
	
	if (isProt)
	    {
	    int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
	    gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
	    fprintf(f, " Identities = %d/%d (%d%%),",
		 matches, axt->symCount, round(100.0 * matches / axt->symCount));
	    fprintf(f, " Positives = %d/%d (%d%%),",
		 positives, axt->symCount, round(100.0 * positives / axt->symCount));
	    fprintf(f, " Gaps = %d/%d (%d%%)\n",
		 gaps, axt->symCount, round(100.0 * gaps / axt->symCount));
	    if (axt->frame != 0) 
		fprintf(f, " Frame = %c%d\n", axt->tStrand, axt->frame);
	    /* set the special global variable, answer_for_kg.  
   	       This is needed for Known Genes track building.  Fan 1/21/03 */
            answer_for_kg=axt->symCount - matches;
	    }
	else
	    {
	    fprintf(f, " Identities = %d/%d (%d%%)\n",
		 matches, axt->symCount, round(100.0 * matches / axt->symCount));
	    /* blast displays dna searches as +- instead of blat's default -+ */
	    if (!isTranslated)
		if ((axt->qStrand == '-') && (axt->tStrand == '+'))
		    {
		    reverseIntRange(&axt->qStart, &axt->qEnd, querySize);
		    reverseIntRange(&axt->tStart, &axt->tEnd, target->size);
		    reverseComplement(axt->qSym, axt->symCount);
		    reverseComplement(axt->tSym, axt->symCount);
		    axt->qStrand = '+';
		    axt->tStrand = '-';
		    }
	    fprintf(f, " Strand = %s / %s\n", nameForStrand(axt->qStrand),
		nameForStrand(axt->tStrand));
	    }
	fprintf(f, "\n");
	blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
	}
    }

fprintf(f, "  Database: %s\n", databaseName);

/* Cleanup time. */
targetHitsFreeList(&targetList);
}
Пример #6
0
static void wuBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
	FILE *f, 
	char *databaseName, int databaseSeqCount, double databaseLetterCount, 
	char *ourId)
/* Do wublast-like output at end of processing query. */
{
char asciiNum[32];
struct targetHits *targetList = NULL, *target;
char *queryName;
int isRc;
int querySize = abList->qSize;
boolean isTranslated = (abList->axtList->frame != 0);

/* Print out stuff that doesn't depend on query or database. */
if (ourId == NULL)
    ourId = "axtBlastOut";
fprintf(f, "%s 2.0MP-WashU [%s]\n", progType(isProt, abList, TRUE), ourId);
fprintf(f, "\n");
fprintf(f, "Copyright (C) 2000-2002 Jim Kent\n");
fprintf(f, "All Rights Reserved\n");
fprintf(f, "\n");
fprintf(f, "Reference:  Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
fprintf(f, "\n");
if (!isProt)
    {
    fprintf(f, "Notice:  this program and its default parameter settings are optimized to find\n");
    fprintf(f, "nearly identical sequences very rapidly.  For slower but more sensitive\n");
    fprintf(f, "alignments please use other methods.\n");
    fprintf(f, "\n");
    }

/* Print query and database info. */
queryName = abList->axtList->qName;
fprintf(f, "Query=  %s\n", queryName);
fprintf(f, "        (%d letters; record %d)\n", abList->qSize, queryIx);
fprintf(f, "\n");
fprintf(f, "Database:  %s\n",  databaseName);
sprintLongWithCommas(asciiNum, databaseLetterCount);
fprintf(f, "           %d sequences; %s total letters\n",  databaseSeqCount, asciiNum);
fprintf(f, "Searching....10....20....30....40....50....60....70....80....90....100%% done\n");
fprintf(f, "\n");

targetList = bundleIntoTargets(abList);

/* Print out summary of hits. */
fprintf(f, "                                                                     Smallest\n");
fprintf(f, "                                                                       Sum\n");
fprintf(f, "                                                              High  Probability\n");
fprintf(f, "Sequences producing High-scoring Segment Pairs:              Score  P(N)      N\n");
fprintf(f, "\n");
for (target = targetList; target != NULL; target = target->next)
    {
    double expectation = blastzScoreToWuExpectation(target->score, databaseLetterCount);
    double p = expectationToProbability(expectation);
    fprintf(f, "%-61s %4d  %8.1e %2d\n", target->name, 
    	blastzToWublastScore(target->score), p, slCount(target->axtList));
    }

/* Print out details on each target. */
for (target = targetList; target != NULL; target = target->next)
    {
    fprintf(f, "\n\n>%s\n", target->name);
    fprintf(f, "        Length = %d\n", target->size);
    fprintf(f, "\n");
    for (isRc=0; isRc <= 1; ++isRc)
	{
	boolean saidStrand = FALSE;
	char strand = (isRc ? '-' : '+');
	char *strandName = nameForStrand(strand);
	struct axtRef *ref;
	struct axt *axt;
	for (ref = target->axtList; ref != NULL; ref = ref->next)
	    {
	    axt = ref->axt;
	    if (axt->qStrand == strand)
		{
		int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
		int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
		if (!saidStrand)
		    {
		    saidStrand = TRUE;
		    if (!isProt)
			fprintf(f, "  %s Strand HSPs:\n\n", strandName);
		    }
		fprintf(f, " Score = %d (%2.1f bits), Expect = %5.1e, P = %5.1e\n",
		     blastzToWublastScore(axt->score), 
		     blastzScoreToWuBits(axt->score, isProt),
		     blastzScoreToWuExpectation(axt->score, databaseLetterCount),
		     blastzScoreToWuExpectation(axt->score, databaseLetterCount));
		fprintf(f, " Identities = %d/%d (%d%%), Positives = %d/%d (%d%%)",
		     matches, axt->symCount, round(100.0 * matches / axt->symCount),
		     positives, axt->symCount, round(100.0 * positives / axt->symCount));
		if (isProt)
		    {
		    if (axt->frame != 0)
		        fprintf(f, ", Frame = %c%d", axt->tStrand, axt->frame);
		    fprintf(f, "\n");
		    }
		else
		    fprintf(f, ", Strand = %s / Plus\n", strandName);
		fprintf(f, "\n");
		blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
		}
	    }
	}
    }

/* Cleanup time. */
targetHitsFreeList(&targetList);
}
Пример #7
0
BBS removeNonmatchingReagents(const ChemicalReaction &rxn, BBS bbs,
                              const EnumerationParams &params) {
  PRECONDITION(bbs.size() <= rxn.getNumReactantTemplates(),
               "Number of Reagents not compatible with reaction templates");
  BBS result;
  result.resize(bbs.size());

  for (size_t reactant_idx = 0; reactant_idx < bbs.size(); ++reactant_idx) {
    size_t removedCount = 0;
    const unsigned int maxMatches =
        (params.reagentMaxMatchCount == INT_MAX)
            ? 0
            : rdcast<unsigned int>(params.reagentMaxMatchCount);

    ROMOL_SPTR reactantTemplate = rxn.getReactants()[reactant_idx];
    for (size_t reagent_idx = 0; reagent_idx < bbs[reactant_idx].size();
         ++reagent_idx) {
      ROMOL_SPTR mol = bbs[reactant_idx][reagent_idx];
      size_t matches =
          countMatches(*mol.get(), *reactantTemplate.get(), maxMatches);

      bool removeReagent = false;
      if (!matches || matches > rdcast<size_t>(params.reagentMaxMatchCount)) {
        removeReagent = true;
      }

      if (!removeReagent && params.sanePartialProducts) {
        // see if we have any sane products in the results
        std::vector<MOL_SPTR_VECT> partialProducts =
            rxn.runReactant(mol, reactant_idx);
        for (size_t productTemplate_idx = 0;
             productTemplate_idx < partialProducts.size();
             ++productTemplate_idx) {
          int saneProducts = 0;
          for (size_t product_idx = 0;
               product_idx < partialProducts[productTemplate_idx].size();
               ++product_idx) {
            try {
              RWMol *m = dynamic_cast<RWMol *>(
                  partialProducts[productTemplate_idx][product_idx].get());
              MolOps::sanitizeMol(*m);
              saneProducts++;
            } catch (...) {
            }
          }

          if (!saneProducts) {
            // if any product template has no sane products, we bail
            removeReagent = true;
            break;
          }
        }
      }

      if (removeReagent)
        removedCount++;
      else
        result[reactant_idx].push_back(mol);
    }

    if (removedCount) {
      BOOST_LOG(rdInfoLog) << "Removed " << removedCount
                           << " non matching reagents at template "
                           << reactant_idx << std::endl;
    }
  }
  return result;
}