void MangoSubstructure::_correctQueryStereo (QueryMolecule &query) { // Remove stereobond marks that are connected with R-groups for (int v = query.vertexBegin(); v != query.vertexEnd(); v = query.vertexNext(v)) { if (!query.isRSite(v)) continue; const Vertex &vertex = query.getVertex(v); for (int nei = vertex.neiBegin(); nei != vertex.neiEnd(); nei = vertex.neiNext(nei)) { int edge = vertex.neiEdge(nei); if (query.cis_trans.getParity(edge) != 0) query.cis_trans.setParity(edge, 0); } } MoleculeRGroups &rgroups = query.rgroups; int n_rgroups = rgroups.getRGroupCount(); for (int i = 1; i <= n_rgroups; i++) { PtrPool<BaseMolecule> &frags = rgroups.getRGroup(i).fragments; for (int j = frags.begin(); j != frags.end(); j = frags.next(j)) { QueryMolecule &fragment = frags[j]->asQueryMolecule(); _correctQueryStereo(fragment); } } }
void MoleculePiSystemsMatcher::_markMappedPiSystems (QueryMolecule &query, const int *mapping) { for (int qv = query.vertexBegin(); qv != query.vertexEnd(); qv = query.vertexNext(qv)) { int v = mapping[qv]; if (v < 0) continue; // Such vertex must be ignored int pi_system_idx = _atom_pi_system_idx[v]; if (pi_system_idx == _NOT_IN_PI_SYSTEM) continue; if (!_pi_systems[pi_system_idx].initialized) _extractPiSystem(pi_system_idx); _Pi_System &pi_system = _pi_systems[pi_system_idx]; if (!pi_system.pi_system_mapped) { pi_system.pi_system_mapped = true; pi_system.localizer->unfixAll(); } } }
bool AromaticityMatcher::isNecessary (QueryMolecule &query) { for (int e = query.edgeBegin(); e < query.edgeEnd(); e = query.edgeNext(e)) { if (!query.aromaticity.canBeAromatic(e)) continue; QueryMolecule::Bond &bond = query.getBond(e); // Check if bond isn't aromatic but can be aromatic if (bond.possibleValue(QueryMolecule::BOND_ORDER, BOND_SINGLE)) return true; if (bond.possibleValue(QueryMolecule::BOND_ORDER, BOND_DOUBLE)) return true; } // Check R-groups MoleculeRGroups &rgroups = query.rgroups; int n_rgroups = rgroups.getRGroupCount(); for (int i = 1; i <= n_rgroups; i++) { PtrPool<BaseMolecule> &frags = rgroups.getRGroup(i).fragments; for (int j = frags.begin(); j != frags.end(); j = frags.next(j)) { QueryMolecule &fragment = frags[j]->asQueryMolecule(); if ( AromaticityMatcher::isNecessary(fragment)) return true; } } return false; }
void MangoSubstructure::_initSmartsQuery (QueryMolecule &query_in, QueryMolecule &query_out) { QS_DEF(Array<int>, transposition); MoleculeSubstructureMatcher::makeTransposition(query_in, transposition); query_out.makeSubmolecule(query_in, transposition, 0); _nei_query_counters.calculate(query_out); query_out.optimize(); }
QueryMoleculeAromatizer::QueryMoleculeAromatizer (QueryMolecule &molecule, const AromaticityOptions &options) : AromatizerBase(molecule), TL_CP_GET(_pi_labels), TL_CP_GET(_aromatic_cycles) { _pi_labels.clear_resize(molecule.vertexEnd()); _aromatic_cycles.clear(); _aromatic_cycles.reserve(100); _mode = FUZZY; _collecting = false; _options = options; }
bool QueryMoleculeAromatizer::_aromatizeBondsFuzzy (QueryMolecule &mol, const AromaticityOptions &options) { bool aromatized = false; QueryMoleculeAromatizer aromatizer(mol, options); aromatizer.setMode(QueryMoleculeAromatizer::FUZZY); aromatizer.precalculatePiLabels(); aromatizer.aromatize(); mol.aromaticity.clear(); for (int e_idx = mol.edgeBegin(); e_idx < mol.edgeEnd(); e_idx = mol.edgeNext(e_idx)) { bool aromatic_constraint = mol.getBond(e_idx).possibleValue(QueryMolecule::BOND_ORDER, BOND_AROMATIC); if (aromatic_constraint || aromatizer.isBondAromatic(e_idx)) { mol.aromaticity.setCanBeAromatic(e_idx, true); aromatized = true; } } return aromatized; }
void MangoSubstructure::_initQuery (QueryMolecule &query_in, QueryMolecule &query_out) { _correctQueryStereo(query_in); QueryMoleculeAromatizer::aromatizeBonds(query_in, AromaticityOptions::BASIC); _nei_query_counters.calculate(query_in); QS_DEF(Array<int>, transposition); _nei_query_counters.makeTranspositionForSubstructure(query_in, transposition); query_out.makeSubmolecule(query_in, transposition, 0); _nei_query_counters.calculate(query_out); }
// Some cycles with query features can be aromatized bool QueryMoleculeAromatizer::_aromatizeBondsExact (QueryMolecule &qmol, const AromaticityOptions &options) { bool aromatized = false; QueryMoleculeAromatizer aromatizer(qmol, options); aromatizer.setMode(QueryMoleculeAromatizer::EXACT); aromatizer.precalculatePiLabels(); aromatizer.aromatize(); for (int e_idx = qmol.edgeBegin(); e_idx < qmol.edgeEnd(); e_idx = qmol.edgeNext(e_idx)) if (aromatizer.isBondAromatic(e_idx)) { AutoPtr<QueryMolecule::Bond> bond(qmol.releaseBond(e_idx)); bond->removeConstraints(QueryMolecule::BOND_ORDER); AutoPtr<QueryMolecule::Bond> arom_bond( new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_AROMATIC)); qmol.resetBond(e_idx, QueryMolecule::Bond::und(bond.release(), arom_bond.release())); aromatized = true; } return aromatized; }
int QueryMolecule::parseQueryAtom (QueryMolecule& qm, int aid, Array<int>& list) { QueryMolecule::Atom& qa = qm.getAtom(aid); QueryMolecule::Atom* qc = stripKnownAttrs(qa); if (qc != NULL && isNotAtom(*qc, ELEM_H)) return QUERY_ATOM_A; bool notList = false; if (collectAtomList(qa, list, notList) || (qa.type == QueryMolecule::OP_NOT && collectAtomList(*qa.child(0), list, notList) && !notList)) { // !notList is to check there's no double negation if (list.size() == 0) return -1; notList = notList || qa.type == QueryMolecule::OP_NOT; if (!notList && list.size() == 5 && list[0] == ELEM_F && list[1] == ELEM_Cl && list[2] == ELEM_Br && list[3] == ELEM_I && list[4] == ELEM_At) return QUERY_ATOM_X; if (notList && list.size() == 2 && ((list[0] == ELEM_C && list[1] == ELEM_H) || (list[0] == ELEM_H && list[1] == ELEM_C))) return QUERY_ATOM_Q; return notList ? QUERY_ATOM_NOTLIST : QUERY_ATOM_LIST; } return -1; }
bool QueryMoleculeAromatizer::_aromatizeRGroupFragment (QueryMolecule &fragment, bool add_single_bonds, const AromaticityOptions &options) { // Add additional atom to attachment points int additional_atom = fragment.addAtom(new QueryMolecule::Atom(QueryMolecule::ATOM_RSITE, 1)); // Connect it with attachment points int maxOrder = fragment.attachmentPointCount(); for (int i = 1; i <= maxOrder; i++) { int pointIndex = 0; int point; while (true) { point = fragment.getAttachmentPoint(i, pointIndex); if (point == -1) break; if (fragment.findEdgeIndex(point, additional_atom) == -1) { AutoPtr<QueryMolecule::Bond> bond; if (add_single_bonds) bond.reset(new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_SINGLE)); else bond.reset(new QueryMolecule::Bond()); fragment.addBond(point, additional_atom, bond.release()); } pointIndex++; } } bool aromatized = _aromatizeBonds(fragment, additional_atom, options); QS_DEF(Array<int>, indices); indices.clear(); indices.push(additional_atom); fragment.removeAtoms(indices); return aromatized; }
bool MoleculePiSystemsMatcher::_fixAtoms (QueryMolecule &query, const int *mapping) { // Fix charges for (int qv = query.vertexBegin(); qv != query.vertexEnd(); qv = query.vertexNext(qv)) { int v = mapping[qv]; if (v < 0) continue; // Such vertex must be ignored int pi_system_idx = _atom_pi_system_idx[v]; if (pi_system_idx == _NOT_IN_PI_SYSTEM) continue; _Pi_System &pi_system = _pi_systems[pi_system_idx]; QueryMolecule::Atom &qatom = query.getAtom(qv); int pv = pi_system.inv_mapping[v]; int charge = query.getAtomCharge(qv); if (charge != CHARGE_UNKNOWN) { bool ret = pi_system.localizer->fixAtomCharge(pv, charge); if (!ret) return false; } else if (qatom.hasConstraint(QueryMolecule::ATOM_CHARGE)) throw Error("Unsupported atom charge specified"); int valence = query.getExplicitValence(qv); if (valence != -1) { bool ret = pi_system.localizer->fixAtomConnectivity(pv, valence); if (!ret) return false; } else if (qatom.hasConstraint(QueryMolecule::ATOM_VALENCE)) throw Error("Unsupported atom charge specified"); } return true; }
bool QueryMolecule::queryAtomIsRegular (QueryMolecule& qm, int aid) { QueryMolecule::Atom& qa = qm.getAtom(aid); QueryMolecule::Atom* qc = stripKnownAttrs(qa); return qc && qc->type == QueryMolecule::ATOM_NUMBER; }
bool QueryMoleculeAromatizer::_aromatizeBonds (QueryMolecule &mol, int additional_atom, const AromaticityOptions &options) { bool aromatized = false; // Mark edges that can be aromatic in some matching aromatized |= _aromatizeBondsFuzzy(mol, options); // Aromatize all aromatic cycles aromatized |= _aromatizeBondsExact(mol, options); MoleculeRGroups &rgroups = mol.rgroups; int n_rgroups = rgroups.getRGroupCount(); // Check if r-groups are attached with single bonds QS_DEF(Array<bool>, rgroups_attached_single); rgroups_attached_single.clear(); for (int v = mol.vertexBegin(); v != mol.vertexEnd(); v = mol.vertexNext(v)) { if (v == additional_atom) continue; if (mol.isRSite(v)) { // Check if neighbor bonds are single const Vertex &vertex = mol.getVertex(v); for (int nei = vertex.neiBegin(); nei != vertex.neiEnd(); nei = vertex.neiNext(nei)) { int edge = vertex.neiEdge(nei); QueryMolecule::Bond &bond = mol.getBond(edge); // DP TODO: implement smth. like Node::possibleOtherValueExcept() ... bool can_be_double = bond.possibleValue(QueryMolecule::BOND_ORDER, BOND_DOUBLE); bool can_be_triple = bond.possibleValue(QueryMolecule::BOND_ORDER, BOND_TRIPLE); bool can_be_arom = bond.possibleValue(QueryMolecule::BOND_ORDER, BOND_AROMATIC); if (can_be_double || can_be_triple || can_be_arom) { QS_DEF(Array<int>, sites); mol.getAllowedRGroups(v, sites); for (int j = 0; j < sites.size(); j++) { rgroups_attached_single.expandFill(sites[j] + 1, true); rgroups_attached_single[sites[j]] = false; } } } } } rgroups_attached_single.expandFill(n_rgroups + 1, true); for (int i = 1; i <= n_rgroups; i++) { PtrPool<BaseMolecule> &frags = rgroups.getRGroup(i).fragments; for (int j = frags.begin(); j != frags.end(); j = frags.next(j)) { QueryMolecule &fragment = frags[j]->asQueryMolecule(); aromatized |= _aromatizeRGroupFragment(fragment, rgroups_attached_single[i], options); } } return aromatized; }
QueryMoleculeAromatizer::QueryMoleculeAromatizer (QueryMolecule &molecule, const AromaticityOptions &options) : AromatizerBase(molecule), CP_INIT, TL_CP_GET(_pi_labels), TL_CP_GET(_aromatic_cycles) { _pi_labels.clear_resize(molecule.vertexEnd()); _aromatic_cycles.clear(); _aromatic_cycles.reserve(100); _mode = FUZZY; _collecting = false; _options = options; } void QueryMoleculeAromatizer::precalculatePiLabels () { for (int v_idx = _basemol.vertexBegin(); v_idx < _basemol.vertexEnd(); v_idx = _basemol.vertexNext(v_idx)) _pi_labels[v_idx] = _getPiLabel(v_idx); } bool QueryMoleculeAromatizer::_checkVertex (int v_idx) { return _pi_labels[v_idx].canBeAromatic(); } bool QueryMoleculeAromatizer::_isCycleAromatic (const int *cycle, int cycle_len) { QueryMolecule &query = (QueryMolecule &)_basemol; // Single/double bond can't be aromatic and Check if cycle wasn't aromatic bool all_aromatic = true; for (int i = 0; i < cycle_len; i++) { int a = cycle[i], b = cycle[(i + 1) % cycle_len]; int e_idx = _basemol.findEdgeIndex(a, b); if (!query.possibleBondOrder(e_idx, BOND_AROMATIC)) all_aromatic = false; } if (all_aromatic) return false; PiValue cycle_sum(0, 0); // Check Huckel's rule for (int i = 0; i < cycle_len; i++) { PiValue &cur = _pi_labels[cycle[i]]; if (cur.min == -1 || cur.max == -1) throw Error("interal error in _isCycleAromatic"); cycle_sum.max += cur.max; cycle_sum.min += cur.min; } // Check Huckel's rule if (_mode == EXACT) { if (cycle_sum.min != cycle_sum.max) return false; int sum = cycle_sum.min; // Check if cycle have pi-lables sum 4n+2 for drawn query if (sum % 4 != 2) return false; return true; } // // Fuzzy mode: check if circle can have 4n-2 value // if (cycle_sum.max - cycle_sum.min > 3) return true; int residue_min = (cycle_sum.min + 2) % 4; int residue_max = (cycle_sum.max + 2) % 4; if (residue_min == 0 || residue_min > residue_max) return true; return false; }
bool MoleculePiSystemsMatcher::_fixBonds (QueryMolecule &query, const int *mapping) { for (int e = query.edgeBegin(); e != query.edgeEnd(); e = query.edgeNext(e)) { const Edge &query_edge = query.getEdge(e); if (mapping[query_edge.beg] < 0 || mapping[query_edge.end] < 0) continue; // Edges connected with ignored vertices int target_edge = Graph::findMappedEdge(query, _target, e, mapping); const Edge &edge = _target.getEdge(target_edge); int p1_idx = _atom_pi_system_idx[edge.beg]; int p2_idx = _atom_pi_system_idx[edge.end]; if (p1_idx == _NOT_IN_PI_SYSTEM || p2_idx == _NOT_IN_PI_SYSTEM || p1_idx != p2_idx) continue; if (!_pi_systems[p1_idx].initialized) throw Error("pi-system must be initialized here"); _Pi_System &pi_system = _pi_systems[p1_idx]; int pi_sys_edge = Graph::findMappedEdge(_target, pi_system.pi_system, target_edge, pi_system.inv_mapping.ptr()); // Get target topology int topology = _target.getBondTopology(target_edge); QueryMolecule::Bond &qbond = query.getBond(e); bool can_be_single = qbond.possibleValuePair( QueryMolecule::BOND_ORDER, BOND_SINGLE, QueryMolecule::BOND_TOPOLOGY, topology); bool can_be_double = qbond.possibleValuePair( QueryMolecule::BOND_ORDER, BOND_DOUBLE, QueryMolecule::BOND_TOPOLOGY, topology); bool can_be_triple = qbond.possibleValuePair( QueryMolecule::BOND_ORDER, BOND_TRIPLE, QueryMolecule::BOND_TOPOLOGY, topology); if (!can_be_single && !can_be_double && !can_be_triple) return false; if (can_be_single && can_be_double && can_be_triple) continue; bool ret = false; // initializing to avoid compiler warning if (can_be_single && can_be_double) // Here can_be_triple = false because of previous check ret = pi_system.localizer->fixBondSingleDouble(pi_sys_edge); else { if (can_be_triple) { if (can_be_single) throw Error("Unsupported bond order specified (can be single or triple)"); else if (can_be_double) throw Error("Unsupported bond order specified (can be double or triple)"); ret = pi_system.localizer->fixBond(pi_sys_edge, BOND_TRIPLE); } if (can_be_single) ret = pi_system.localizer->fixBond(pi_sys_edge, BOND_SINGLE); if (can_be_double) ret = pi_system.localizer->fixBond(pi_sys_edge, BOND_DOUBLE); } if (!ret) return false; } return true; }
bool MoleculeSubstructureMatcher::matchQueryAtom (QueryMolecule::Atom *query, BaseMolecule &target, int super_idx, FragmentMatchCache *fmcache, dword flags) { int i; switch (query->type) { case QueryMolecule::OP_NONE: return true; case QueryMolecule::OP_AND: for (i = 0; i < query->children.size(); i++) if (!matchQueryAtom(query->child(i), target, super_idx, fmcache, flags)) return false; return true; case QueryMolecule::OP_OR: for (i = 0; i < query->children.size(); i++) if (matchQueryAtom(query->child(i), target, super_idx, fmcache, flags)) return true; return false; case QueryMolecule::OP_NOT: return !matchQueryAtom(query->child(0), target, super_idx, fmcache, flags ^ MATCH_DISABLED_AS_TRUE); case QueryMolecule::ATOM_NUMBER: return query->valueWithinRange(target.getAtomNumber(super_idx)); case QueryMolecule::ATOM_PSEUDO: return target.isPseudoAtom(super_idx) && strcmp(query->alias.ptr(), target.getPseudoAtom(super_idx)) == 0; case QueryMolecule::ATOM_RSITE: return true; case QueryMolecule::ATOM_ISOTOPE: return query->valueWithinRange(target.getAtomIsotope(super_idx)); case QueryMolecule::ATOM_CHARGE: { if (flags & MATCH_ATOM_CHARGE) return query->valueWithinRange(target.getAtomCharge(super_idx)); return (flags & MATCH_DISABLED_AS_TRUE) != 0; } case QueryMolecule::ATOM_RADICAL: { if (target.isPseudoAtom(super_idx) || target.isRSite(super_idx)) return false; return query->valueWithinRange(target.getAtomRadical(super_idx)); } case QueryMolecule::ATOM_VALENCE: { if (flags & MATCH_ATOM_VALENCE) { if (target.isPseudoAtom(super_idx) || target.isRSite(super_idx)) return false; return query->valueWithinRange(target.getAtomValence(super_idx)); } return (flags & MATCH_DISABLED_AS_TRUE) != 0; } case QueryMolecule::ATOM_CONNECTIVITY: { int conn = target.getVertex(super_idx).degree(); if (!target.isPseudoAtom(super_idx) && !target.isRSite(super_idx)) conn += target.asMolecule().getImplicitH(super_idx); return query->valueWithinRange(conn); } case QueryMolecule::ATOM_TOTAL_BOND_ORDER: { // TODO: target.isPseudoAtom(super_idx) || target.isRSite(super_idx) return query->valueWithinRange(target.asMolecule().getAtomConnectivity(super_idx)); } case QueryMolecule::ATOM_TOTAL_H: { if (target.isPseudoAtom(super_idx) || target.isRSite(super_idx)) return false; return query->valueWithinRange(target.getAtomTotalH(super_idx)); } case QueryMolecule::ATOM_SUBSTITUENTS: return query->valueWithinRange(target.getAtomSubstCount(super_idx)); case QueryMolecule::ATOM_SSSR_RINGS: return query->valueWithinRange(target.vertexCountSSSR(super_idx)); case QueryMolecule::ATOM_SMALLEST_RING_SIZE: return query->valueWithinRange(target.vertexSmallestRingSize(super_idx)); case QueryMolecule::ATOM_RING_BONDS: case QueryMolecule::ATOM_RING_BONDS_AS_DRAWN: return query->valueWithinRange(target.getAtomRingBondsCount(super_idx)); case QueryMolecule::ATOM_UNSATURATION: return !target.isSaturatedAtom(super_idx); case QueryMolecule::ATOM_FRAGMENT: { if (fmcache == 0) throw Error("unexpected 'fragment' constraint"); QueryMolecule *fragment = query->fragment.get(); const char *smarts = fragment->fragment_smarts.ptr(); if (fragment->vertexCount() == 0) throw Error("empty fragment"); if (smarts != 0 && strlen(smarts) > 0) { fmcache->expand(super_idx + 1); int *value = fmcache->at(super_idx).at2(smarts); if (value != 0) return *value != 0; } MoleculeSubstructureMatcher matcher(target.asMolecule()); matcher.not_ignore_first_atom = true; matcher.setQuery(*fragment); matcher.fmcache = fmcache; bool result = matcher.fix(fragment->vertexBegin(), super_idx); if (result) result = matcher.find(); if (smarts != 0 && strlen(smarts) > 0) { fmcache->expand(super_idx + 1); fmcache->at(super_idx).insert(smarts, result ? 1 : 0); } return result; } case QueryMolecule::ATOM_AROMATICITY: return query->valueWithinRange(target.getAtomAromaticity(super_idx)); case QueryMolecule::HIGHLIGHTING: return query->valueWithinRange((int)target.isAtomHighlighted(super_idx)); default: throw Error("bad query atom type: %d", query->type); } }
bool MoleculeSubstructureMatcher::_shouldUnfoldTargetHydrogens (QueryMolecule &query, bool is_fragment, bool disable_folding_query_h) { int i, j; for (i = query.vertexBegin(); i != query.vertexEnd(); i = query.vertexNext(i)) { // skip R-atoms if (query.isRSite(i)) continue; if (query.possibleAtomNumberAndIsotope(i, ELEM_H, 0)) { const Vertex &vertex = query.getVertex(i); // Degree 2 or higher => definilely not a hydrogen if (vertex.degree() > 1) continue; // Can be lone hydrogen? if (vertex.degree() == 0) return true; // degree is 1 at this point int edge_idx = vertex.neiEdge(vertex.neiBegin()); // is it is double or triple bond => not hydrogen if (query.getBondOrder(edge_idx) > 1) continue; // ring bond? if (query.getBondTopology(edge_idx) == TOPOLOGY_RING) continue; // can be something other than hydrogen? if (query.getAtomNumber(i) == -1) return true; if (is_fragment && i == query.vertexBegin()) // If first atom in a fragment is hydrogen then hydrogens should // be unfolded because of the matching logic: when fragment will be // matched this first hydrogen should match some atom. // If hydrogens is not be unfolded in this case then // [$([#1][N])]C will not match NC. return true; // If we need to find all embeddings then query hydrogens cannot be ignored: // For example, if we are searching number of matcher for N-[#1] in N then // it should 3 instead of 1 if (disable_folding_query_h) return true; // Check if hydrogen forms a cis-trans bond or stereocenter int nei_vertex_idx = vertex.neiVertex(vertex.neiBegin()); if (query.stereocenters.exists(nei_vertex_idx)) return true; // For example for this query hydrogens should be unfolded: [H]\\C=C/C const Vertex &nei_vertex = query.getVertex(nei_vertex_idx); for (int nei = nei_vertex.neiBegin(); nei != nei_vertex.neiEnd(); nei = nei_vertex.neiNext(nei)) { int edge = nei_vertex.neiEdge(nei); if (query.cis_trans.getParity(edge) != 0) return true; } } if (_shouldUnfoldTargetHydrogens_A(&query.getAtom(i), is_fragment, disable_folding_query_h)) return true; } MoleculeRGroups &rgroups = query.rgroups; int n_rgroups = rgroups.getRGroupCount(); for (i = 1; i <= n_rgroups; i++) { PtrPool<BaseMolecule> &frags = rgroups.getRGroup(i).fragments; for (j = frags.begin(); j != frags.end(); j = frags.next(j)) if (_shouldUnfoldTargetHydrogens(frags[j]->asQueryMolecule(), is_fragment, disable_folding_query_h)) return true; } return false; }