unsigned int SubstructLibrary::countMatches(const ROMol &query, bool recursionPossible, bool useChirality, bool useQueryQueryMatches, int numThreads) { return countMatches(query, 0, mols->size(), recursionPossible, useChirality, useQueryQueryMatches, numThreads); }
static void hspToBlocks(struct psl *psl, int *pslSpace, struct block *blk, unsigned flags) /* build PSl blocks from an HSP */ { /* fill in ungapped blocks */ while (nextUngappedBlk(blk)) { addUngappedBlock(psl, pslSpace, blk, flags); countIndels(psl); countMatches(psl, blk, flags); } assert(blk->qStart == blk->qEnd); assert(blk->tStart == blk->tEnd); // FIXME //assert(blk->qStart == pslQEnd(psl, psl->blockCount-1)); //assert(blk->tStart == pslTEnd(psl, psl->blockCount-1)); }
/** * * Convert phonemes to data string * Enter: textp = phonemes string * Return: phonemes = string of sound data * modifier = 2 bytes per sound data * */ static boolean phonemesToData(const char* textp){ size_t phonemeOut = 0; // offset into the phonemes array size_t modifierOut = 0; // offset into the modifiers array uint8_t L81=0; // attenuate uint8_t previousL81=16; #ifdef _WINDOWS_ memset(modifier,0xAA,sizeof(modifier)); #endif while(*textp){ int numOut; // P20: Get next phoneme size_t longestMatch=0; const char* vocabEntry = s_phonemes; const char* bestPhoneme = null; uint8_t bestPhonemeNo=0; // Get next phoneme, P2 uint8_t phonemeNumber; for(phonemeNumber = 0; getVocab(vocabEntry,0);phonemeNumber++){ // Find the exact matching characters size_t numChars = countMatches(textp,vocabEntry); // if not the longest match so far, or not a complete match, then ignore if(numChars > longestMatch){ // P7: we have matched the whole phoneme longestMatch = numChars; bestPhoneme = vocabEntry + numChars; bestPhonemeNo = phonemeNumber; } // Move to next phoneme while(getVocab(vocabEntry++,0)!=0); // Skip over sound data } // next phoneme // p13 if(!bestPhoneme){ loggerP(PSTR("Mistake in speech at ")); logger(textp); loggerCRLF(); return FALSE; } L81 = pgm_read_byte(&s_attenuate[bestPhonemeNo]) /*+'0'*/; // Get char from text after the phoneme and test if it is a numeric if(textp[longestMatch]>='0' && textp[longestMatch]<='9'){ // Pitch change requested modifier[modifierOut] = pgm_read_byte(&PitchesP[textp[longestMatch]-'1'] ); modifier[modifierOut+1] = L81; longestMatch++; }else{ modifier[modifierOut]=-1; modifier[modifierOut+1]=0; } // P10 if(L81!='0' && L81 != previousL81 && modifierOut!=0 && modifier[modifierOut]>=0){ modifier[modifierOut - 2] = modifier[modifierOut]; modifier[modifierOut - 1] = '0'; }else{ // P11 if( (textp[longestMatch-1] | 0x20) == 0x20){ // end of input string or a space modifier[modifierOut] = (modifierOut==0) ? 16 : modifier[modifierOut-2]; } } // Copy phoneme data to sound data for(numOut=0; getVocab(bestPhoneme,numOut)!= 0; numOut++){ sounds[phonemeOut++] = getVocab(bestPhoneme,numOut) & 0x7f; } if(phonemeOut > sizeof(sounds)-16){ loggerP(PSTR("Line too long\n")); return FALSE; } // P16 // Copy the modifier setting to each sound data element for this phoneme if(numOut > 2){ int count; for(count=0; count <= numOut; count+=2){ modifier[modifierOut + count + 2] = modifier[modifierOut + count]; modifier[modifierOut + count + 3] = modifier[modifierOut + count + 1]; } } modifierOut += numOut; //p21 textp += longestMatch; previousL81 = L81; } sounds[phonemeOut++]='z'; sounds[phonemeOut++]='z'; sounds[phonemeOut++]='z'; sounds[phonemeOut++]='z'; while(phonemeOut < sizeof(sounds)){ sounds[phonemeOut++]=0; } while(modifierOut < sizeof(modifier)){ modifier[modifierOut++]=-1; modifier[modifierOut++]=0; } return TRUE; }
static void tabBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, FILE *f, char *databaseName, int databaseSeqCount, double databaseLetterCount, char *ourId, boolean withComment) /* Do NCBI tabular blast output. */ { char *queryName = abList->axtList->qName; int querySize = abList->qSize; struct targetHits *targetList = NULL, *target; if (withComment) { // use date from CVS, unless checked out with -kk, then ignore. char * rcsDate = "$Date: 2009/02/26 00:05:49 $"; char dateStamp[11]; if (strlen(rcsDate) > 17) safencpy(dateStamp, sizeof(dateStamp), rcsDate+7, 10); else safecpy(dateStamp, sizeof(dateStamp), ""); dateStamp[10] = 0; fprintf(f, "# BLAT %s [%s]\n", gfVersion, dateStamp); fprintf(f, "# Query: %s\n", queryName); fprintf(f, "# Database: %s\n", databaseName); fprintf(f, "%s\n", "# Fields: Query id, Subject id, % identity, alignment length, " "mismatches, gap openings, q. start, q. end, s. start, s. end, " "e-value, bit score"); } /* Print out details on each target. */ targetList = bundleIntoTargets(abList); for (target = targetList; target != NULL; target = target->next) { struct axtRef *ref; for (ref = target->axtList; ref != NULL; ref = ref->next) { struct axt *axt = ref->axt; int matches = countMatches(axt->qSym, axt->tSym, axt->symCount); int gaps = countGaps(axt->qSym, axt->tSym, axt->symCount); int gapOpens = countGapOpens(axt->qSym, axt->tSym, axt->symCount); fprintf(f, "%s\t", axt->qName); fprintf(f, "%s\t", axt->tName); fprintf(f, "%.2f\t", 100.0 * matches/axt->symCount); fprintf(f, "%d\t", axt->symCount); fprintf(f, "%d\t", axt->symCount - matches - gaps); fprintf(f, "%d\t", gapOpens); if (axt->qStrand == '-') { int s = axt->qStart, e = axt->qEnd; reverseIntRange(&s, &e, querySize); fprintf(f, "%d\t", s+1); fprintf(f, "%d\t", e); printAxtTargetBlastTab(f, axt, target->size); } else { fprintf(f, "%d\t", axt->qStart + 1); fprintf(f, "%d\t", axt->qEnd); printAxtTargetBlastTab(f, axt, target->size); } fprintf(f, "%3.1e\t", blastzScoreToNcbiExpectation(axt->score)); fprintf(f, "%d.0\n", blastzScoreToNcbiBits(axt->score)); } } /* Cleanup time. */ targetHitsFreeList(&targetList); }
static void ncbiBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, FILE *f, char *databaseName, int databaseSeqCount, double databaseLetterCount, char *ourId, double minIdentity) /* Do ncbiblast-like output at end of processing query. */ { char asciiNum[32]; struct targetHits *targetList = NULL, *target; char *queryName; int querySize = abList->qSize; boolean isTranslated = (abList->axtList->frame != 0); /* Print out stuff that doesn't depend on query or database. */ if (ourId == NULL) ourId = "axtBlastOut"; fprintf(f, "%s 2.2.11 [%s]\n", progType(isProt, abList, TRUE), ourId); fprintf(f, "\n"); fprintf(f, "Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n"); fprintf(f, "\n"); /* Print query and database info. */ queryName = abList->axtList->qName; fprintf(f, "Query= %s\n", queryName); fprintf(f, " (%d letters)\n", abList->qSize); fprintf(f, "\n"); fprintf(f, "Database: %s \n", databaseName); sprintLongWithCommas(asciiNum, databaseLetterCount); fprintf(f, " %d sequences; %s total letters\n", databaseSeqCount, asciiNum); fprintf(f, "\n"); fprintf(f, "Searching.done\n"); targetList = bundleIntoTargets(abList); /* Print out summary of hits. */ fprintf(f, " Score E\n"); fprintf(f, "Sequences producing significant alignments: (bits) Value\n"); fprintf(f, "\n"); for (target = targetList; target != NULL; target = target->next) { struct axtRef *ref; struct axt *axt; int matches; double identity, expectation; int bit; for (ref = target->axtList; ref != NULL; ref = ref->next) { axt = ref->axt; matches = countMatches(axt->qSym, axt->tSym, axt->symCount); identity = round(100.0 * matches / axt->symCount); /* skip output if minIdentity not reached */ if (identity < minIdentity) continue; bit = blastzScoreToNcbiBits(axt->score); expectation = blastzScoreToNcbiExpectation(axt->score); fprintf(f, "%-67s %4d ", target->name, bit); ncbiPrintE(f, expectation); fprintf(f, "\n"); } } fprintf(f, "\n"); /* Print out details on each target. */ for (target = targetList; target != NULL; target = target->next) { struct axtRef *ref; struct axt *axt; int matches, gaps; char *oldName; int ii = 0; double identity; oldName = strdup(""); for (ref = target->axtList; ref != NULL; ref = ref->next) { ii++; axt = ref->axt; matches = countMatches(axt->qSym, axt->tSym, axt->symCount); identity = round(100.0 * matches / axt->symCount); /* skip output if minIdentity not reached */ if (identity < minIdentity) continue; /* print target sequence name and length only once */ if (!sameWord(oldName, target->name)) { fprintf(f, "\n\n>%s \n", target->name); fprintf(f, " Length = %d\n", target->size); oldName = strdup(target->name); } fprintf(f, "\n"); fprintf(f, " Score = %d bits (%d), Expect = ", blastzScoreToNcbiBits(axt->score), blastzScoreToNcbiScore(axt->score)); ncbiPrintE(f, blastzScoreToNcbiExpectation(axt->score)); fprintf(f, "\n"); if (isProt) { int positives = countPositives(axt->qSym, axt->tSym, axt->symCount); gaps = countGaps(axt->qSym, axt->tSym, axt->symCount); fprintf(f, " Identities = %d/%d (%d%%),", matches, axt->symCount, round(100.0 * matches / axt->symCount)); fprintf(f, " Positives = %d/%d (%d%%),", positives, axt->symCount, round(100.0 * positives / axt->symCount)); fprintf(f, " Gaps = %d/%d (%d%%)\n", gaps, axt->symCount, round(100.0 * gaps / axt->symCount)); if (axt->frame != 0) fprintf(f, " Frame = %c%d\n", axt->tStrand, axt->frame); /* set the special global variable, answer_for_kg. This is needed for Known Genes track building. Fan 1/21/03 */ answer_for_kg=axt->symCount - matches; } else { fprintf(f, " Identities = %d/%d (%d%%)\n", matches, axt->symCount, round(100.0 * matches / axt->symCount)); /* blast displays dna searches as +- instead of blat's default -+ */ if (!isTranslated) if ((axt->qStrand == '-') && (axt->tStrand == '+')) { reverseIntRange(&axt->qStart, &axt->qEnd, querySize); reverseIntRange(&axt->tStart, &axt->tEnd, target->size); reverseComplement(axt->qSym, axt->symCount); reverseComplement(axt->tSym, axt->symCount); axt->qStrand = '+'; axt->tStrand = '-'; } fprintf(f, " Strand = %s / %s\n", nameForStrand(axt->qStrand), nameForStrand(axt->tStrand)); } fprintf(f, "\n"); blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated); } } fprintf(f, " Database: %s\n", databaseName); /* Cleanup time. */ targetHitsFreeList(&targetList); }
static void wuBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, FILE *f, char *databaseName, int databaseSeqCount, double databaseLetterCount, char *ourId) /* Do wublast-like output at end of processing query. */ { char asciiNum[32]; struct targetHits *targetList = NULL, *target; char *queryName; int isRc; int querySize = abList->qSize; boolean isTranslated = (abList->axtList->frame != 0); /* Print out stuff that doesn't depend on query or database. */ if (ourId == NULL) ourId = "axtBlastOut"; fprintf(f, "%s 2.0MP-WashU [%s]\n", progType(isProt, abList, TRUE), ourId); fprintf(f, "\n"); fprintf(f, "Copyright (C) 2000-2002 Jim Kent\n"); fprintf(f, "All Rights Reserved\n"); fprintf(f, "\n"); fprintf(f, "Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n"); fprintf(f, "\n"); if (!isProt) { fprintf(f, "Notice: this program and its default parameter settings are optimized to find\n"); fprintf(f, "nearly identical sequences very rapidly. For slower but more sensitive\n"); fprintf(f, "alignments please use other methods.\n"); fprintf(f, "\n"); } /* Print query and database info. */ queryName = abList->axtList->qName; fprintf(f, "Query= %s\n", queryName); fprintf(f, " (%d letters; record %d)\n", abList->qSize, queryIx); fprintf(f, "\n"); fprintf(f, "Database: %s\n", databaseName); sprintLongWithCommas(asciiNum, databaseLetterCount); fprintf(f, " %d sequences; %s total letters\n", databaseSeqCount, asciiNum); fprintf(f, "Searching....10....20....30....40....50....60....70....80....90....100%% done\n"); fprintf(f, "\n"); targetList = bundleIntoTargets(abList); /* Print out summary of hits. */ fprintf(f, " Smallest\n"); fprintf(f, " Sum\n"); fprintf(f, " High Probability\n"); fprintf(f, "Sequences producing High-scoring Segment Pairs: Score P(N) N\n"); fprintf(f, "\n"); for (target = targetList; target != NULL; target = target->next) { double expectation = blastzScoreToWuExpectation(target->score, databaseLetterCount); double p = expectationToProbability(expectation); fprintf(f, "%-61s %4d %8.1e %2d\n", target->name, blastzToWublastScore(target->score), p, slCount(target->axtList)); } /* Print out details on each target. */ for (target = targetList; target != NULL; target = target->next) { fprintf(f, "\n\n>%s\n", target->name); fprintf(f, " Length = %d\n", target->size); fprintf(f, "\n"); for (isRc=0; isRc <= 1; ++isRc) { boolean saidStrand = FALSE; char strand = (isRc ? '-' : '+'); char *strandName = nameForStrand(strand); struct axtRef *ref; struct axt *axt; for (ref = target->axtList; ref != NULL; ref = ref->next) { axt = ref->axt; if (axt->qStrand == strand) { int matches = countMatches(axt->qSym, axt->tSym, axt->symCount); int positives = countPositives(axt->qSym, axt->tSym, axt->symCount); if (!saidStrand) { saidStrand = TRUE; if (!isProt) fprintf(f, " %s Strand HSPs:\n\n", strandName); } fprintf(f, " Score = %d (%2.1f bits), Expect = %5.1e, P = %5.1e\n", blastzToWublastScore(axt->score), blastzScoreToWuBits(axt->score, isProt), blastzScoreToWuExpectation(axt->score, databaseLetterCount), blastzScoreToWuExpectation(axt->score, databaseLetterCount)); fprintf(f, " Identities = %d/%d (%d%%), Positives = %d/%d (%d%%)", matches, axt->symCount, round(100.0 * matches / axt->symCount), positives, axt->symCount, round(100.0 * positives / axt->symCount)); if (isProt) { if (axt->frame != 0) fprintf(f, ", Frame = %c%d", axt->tStrand, axt->frame); fprintf(f, "\n"); } else fprintf(f, ", Strand = %s / Plus\n", strandName); fprintf(f, "\n"); blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated); } } } } /* Cleanup time. */ targetHitsFreeList(&targetList); }
BBS removeNonmatchingReagents(const ChemicalReaction &rxn, BBS bbs, const EnumerationParams ¶ms) { PRECONDITION(bbs.size() <= rxn.getNumReactantTemplates(), "Number of Reagents not compatible with reaction templates"); BBS result; result.resize(bbs.size()); for (size_t reactant_idx = 0; reactant_idx < bbs.size(); ++reactant_idx) { size_t removedCount = 0; const unsigned int maxMatches = (params.reagentMaxMatchCount == INT_MAX) ? 0 : rdcast<unsigned int>(params.reagentMaxMatchCount); ROMOL_SPTR reactantTemplate = rxn.getReactants()[reactant_idx]; for (size_t reagent_idx = 0; reagent_idx < bbs[reactant_idx].size(); ++reagent_idx) { ROMOL_SPTR mol = bbs[reactant_idx][reagent_idx]; size_t matches = countMatches(*mol.get(), *reactantTemplate.get(), maxMatches); bool removeReagent = false; if (!matches || matches > rdcast<size_t>(params.reagentMaxMatchCount)) { removeReagent = true; } if (!removeReagent && params.sanePartialProducts) { // see if we have any sane products in the results std::vector<MOL_SPTR_VECT> partialProducts = rxn.runReactant(mol, reactant_idx); for (size_t productTemplate_idx = 0; productTemplate_idx < partialProducts.size(); ++productTemplate_idx) { int saneProducts = 0; for (size_t product_idx = 0; product_idx < partialProducts[productTemplate_idx].size(); ++product_idx) { try { RWMol *m = dynamic_cast<RWMol *>( partialProducts[productTemplate_idx][product_idx].get()); MolOps::sanitizeMol(*m); saneProducts++; } catch (...) { } } if (!saneProducts) { // if any product template has no sane products, we bail removeReagent = true; break; } } } if (removeReagent) removedCount++; else result[reactant_idx].push_back(mol); } if (removedCount) { BOOST_LOG(rdInfoLog) << "Removed " << removedCount << " non matching reagents at template " << reactant_idx << std::endl; } } return result; }