bool ReactionExactMatcher::_prepare_ee (EmbeddingEnumerator &ee, BaseMolecule &submol, Molecule &supermol, void *context) { int i; ReactionExactMatcher &self = *(ReactionExactMatcher *)context; for (i = submol.vertexBegin(); i != submol.vertexEnd(); i = submol.vertexNext(i)) { const Vertex &vertex = submol.getVertex(i); if (submol.getAtomNumber(i) == ELEM_H && vertex.degree() == 1 && submol.getAtomNumber(vertex.neiVertex(vertex.neiBegin())) != ELEM_H) if (submol.getAtomIsotope(i) == 0 || !(self.flags & MoleculeExactMatcher::CONDITION_ISOTOPE)) ee.ignoreSubgraphVertex(i); } for (i = supermol.vertexBegin(); i != supermol.vertexEnd(); i = supermol.vertexNext(i)) { const Vertex &vertex = supermol.getVertex(i); if (supermol.getAtomNumber(i) == ELEM_H && vertex.degree() == 1 && supermol.getAtomNumber(vertex.neiVertex(vertex.neiBegin())) != ELEM_H) if (supermol.getAtomIsotope(i) == 0 || !(self.flags & MoleculeExactMatcher::CONDITION_ISOTOPE)) ee.ignoreSupergraphVertex(i); } if (ee.countUnmappedSubgraphVertices() != ee.countUnmappedSupergraphVertices()) return false; if (ee.countUnmappedSubgraphEdges() != ee.countUnmappedSupergraphEdges()) return false; return true; }
void IndigoInchi::neutralizeV5Nitrogen (Molecule &mol) { // Initial structure C[C@H](O)[C@H](COC)/N=[N+](\[O-])/C=CCCCCCC // is loaded via InChI as CCCCCCC=CN(=O)=N[C@@H](COC)[C@H](C)O // and we cannot restore cis-trans configuration for O=N=N-C bond for (int v = mol.vertexBegin(); v != mol.vertexEnd(); v = mol.vertexNext(v)) if (mol.isNitrogenV5(v)) { const Vertex &vertex = mol.getVertex(v); for (int nei = vertex.neiBegin(); nei != vertex.neiEnd(); nei = vertex.neiNext(nei)) { int nei_edge = vertex.neiEdge(nei); if (mol.getBondOrder(nei_edge) != BOND_DOUBLE) continue; int nei_idx = vertex.neiVertex(nei); int number = mol.getAtomNumber(nei_idx); int charge = mol.getAtomCharge(nei_idx); int radical = mol.getAtomRadical(nei_idx); if ((number == ELEM_O || number == ELEM_S) && charge == 0 && radical == 0) { mol.setAtomCharge(v, 1); mol.setAtomCharge(nei_idx, -1); mol.setBondOrder(nei_edge, BOND_SINGLE); break; } } } }
void MoleculeInChICompoment::_getCanonicalMolecule (Molecule &source_mol, Molecule &cano_mol) { QS_DEF(Array<int>, ignored); ignored.clear_resize(source_mol.vertexEnd()); ignored.zerofill(); for (int i = source_mol.vertexBegin(); i < source_mol.vertexEnd(); i = source_mol.vertexNext(i)) if (source_mol.getAtomNumber(i) == ELEM_H && source_mol.getVertex(i).degree() == 1) ignored[i] = 1; AutomorphismSearch as; as.getcanon = true; as.compare_vertex_degree_first = false; as.refine_reverse_degree = true; as.refine_by_sorted_neighbourhood = true; as.ignored_vertices = ignored.ptr(); as.cb_vertex_cmp = _cmpVertex; as.cb_compare_mapped = _cmpMappings; as.cb_check_automorphism = _checkAutomorphism; as.context = (void *)this; as.process(source_mol); QS_DEF(Array<int>, canonical_order); as.getCanonicalNumbering(canonical_order); cano_mol.makeSubmolecule(source_mol, canonical_order, NULL); if (dbg_handle_canonical_component_cb != NULL) dbg_handle_canonical_component_cb(cano_mol); }
int MangoExact::vertexCode (Molecule &mol, int vertex_idx) { if (mol.isPseudoAtom(vertex_idx)) return CRC32::get(mol.getPseudoAtom(vertex_idx)); if (mol.isRSite(vertex_idx)) return ELEM_RSITE; return mol.getAtomNumber(vertex_idx); }
void MangoExact::calculateHash (Molecule &mol, Hash &hash) { hash.clear(); QS_DEF(Molecule, mol_without_h); QS_DEF(Array<int>, vertices); int i; vertices.clear(); for (i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i)) if (mol.getAtomNumber(i) != ELEM_H) vertices.push(i); mol_without_h.makeSubmolecule(mol, vertices, 0); // Decompose into connected components int n_comp = mol_without_h.countComponents(); QS_DEF(Molecule, component); QS_DEF(Array<int>, vertex_codes); for (int i = 0; i < n_comp; i++) { Filter filter(mol_without_h.getDecomposition().ptr(), Filter::EQ, i); component.makeSubmolecule(mol_without_h, filter, 0, 0); SubgraphHash hh(component); vertex_codes.clear_resize(component.vertexEnd()); for (int v = component.vertexBegin(); v != component.vertexEnd(); v = component.vertexNext(v)) vertex_codes[v] = component.atomCode(v); hh.vertex_codes = &vertex_codes; hh.max_iterations = (component.edgeCount() + 1) / 2; dword component_hash = hh.getHash(); // Find component hash in all hashes bool found = false; for (int j = 0; j < hash.size(); j++) if (hash[j].hash == component_hash) { hash[j].count++; found = true; break; } if (!found) { HashElement &hash_element = hash.push(); hash_element.count = 1; hash_element.hash = component_hash; } } }
void MoleculeRGroupsComposition::decorate(const Array<int> &fs, Molecule &mol) const { mol.clone(_mol, nullptr, nullptr); for (int i = 0; i < fs.size(); i++) { BaseMolecule &fragment = _fragment(i, fs[i]); int rsite = _rsite2vertex.at(i); int apcount = fragment.attachmentPointCount(); int apoint = fragment.getAttachmentPoint(apcount, 0); Array<int> map; mol.mergeWithMolecule(fragment, &map); int atom = mol.getAtomNumber(map[apoint]); if (mol.mergeAtoms(rsite, map[apoint]) == rsite) { mol.resetAtom(rsite, atom); } } mol.removeAttachmentPoints(); mol.rgroups.clear(); }
void CmfSaver::_encodeAtom (Molecule &mol, int idx, const int *mapping) { int number = 0; if (mol.isPseudoAtom(idx)) { const char *str = mol.getPseudoAtom(idx); size_t len = strlen(str); if (len < 1) throw Error("empty pseudo-atom"); if (len > 255) throw Error("pseudo-atom labels %d characters long are not supported (255 is the limit)", len); _encode(CMF_PSEUDOATOM); _encode((byte)len); do { _encode(*str); } while (*(++str) != 0); } else if (mol.isRSite(idx)) { int bits = mol.getRSiteBits(idx); if (bits > 255) { _encode(CMF_RSITE_EXT); _output->writePackedUInt((unsigned int)bits); } else { _encode(CMF_RSITE); _encode(bits); } } else { number = mol.getAtomNumber(idx); if (number <= 0 || number >= ELEM_MAX) throw Error("unexpected atom label"); _encode(number); } int charge = mol.getAtomCharge(idx); if (charge != 0) { int charge2 = charge - CMF_MIN_CHARGE; if (charge2 < 0 || charge2 >= CMF_NUM_OF_CHARGES) { _encode(CMF_CHARGE_EXT); int charge3 = charge + 128; if (charge3 < 0 || charge >= 256) throw Error("unexpected atom charge: %d", charge); _encode(charge3); } else _encode(charge2 + CMF_CHARGES); } int isotope = mol.getAtomIsotope(idx); if (isotope > 0) { int deviation = isotope - Element::getDefaultIsotope(number); if (deviation == 0) _encode(CMF_ISOTOPE_ZERO); else if (deviation == 1) _encode(CMF_ISOTOPE_PLUS1); else if (deviation == 2) _encode(CMF_ISOTOPE_PLUS2); else if (deviation == -1) _encode(CMF_ISOTOPE_MINUS1); else if (deviation == -2) _encode(CMF_ISOTOPE_MINUS2); else { deviation += 100; if (deviation < 0 || deviation > 255) throw Error("unexpected %s isotope: %d", Element::toString(number), isotope); _encode(CMF_ISOTOPE_OTHER); _encode(deviation); } } int radical = 0; if (!mol.isPseudoAtom(idx) && !mol.isRSite(idx)) { try { radical = mol.getAtomRadical(idx); } catch (Element::Error) { } } if (radical > 0) { if (radical == RADICAL_SINGLET) _encode(CMF_RADICAL_SINGLET); else if (radical == RADICAL_DOUBLET) _encode(CMF_RADICAL_DOUBLET); else if (radical == RADICAL_TRIPLET) _encode(CMF_RADICAL_TRIPLET); else throw Error("bad radical value: %d", radical); } MoleculeStereocenters &stereo = mol.stereocenters; int stereo_type = stereo.getType(idx); if (stereo_type == MoleculeStereocenters::ATOM_ANY) _encode(CMF_STEREO_ANY); else if (stereo_type != 0) { bool rigid; int code; const int *pyramid = stereo.getPyramid(idx); if (pyramid[3] == -1) rigid = MoleculeStereocenters::isPyramidMappingRigid(pyramid, 3, mapping); else rigid = MoleculeStereocenters::isPyramidMappingRigid(pyramid, 4, mapping); if (stereo_type == MoleculeStereocenters::ATOM_ABS) code = CMF_STEREO_ABS_0; else { int group = stereo.getGroup(idx); if (group < 1 || group > CMF_MAX_STEREOGROUPS) throw Error("stereogroup number %d out of range", group); if (stereo_type == MoleculeStereocenters::ATOM_AND) code = CMF_STEREO_AND_0 + group - 1; else // stereo_type == MoleculeStereocenters::ATOM_OR code = CMF_STEREO_OR_0 + group - 1; } if (!rigid) // CMF_STEREO_*_0 -> CMF_STEREO_*_1 code += CMF_MAX_STEREOGROUPS * 2 + 1; _encode(code); } if (mol.allene_stereo.isCenter(idx)) { int left, right, parity, subst[4]; mol.allene_stereo.getByAtomIdx(idx, left, right, subst, parity); if (subst[1] != -1 && mapping[subst[1]] != -1 && mapping[subst[1]] < mapping[subst[0]]) parity = 3 - parity; if (subst[3] != -1 && mapping[subst[3]] != -1 && mapping[subst[3]] < mapping[subst[2]]) parity = 3 - parity; if (parity == 1) _encode(CMF_STEREO_ALLENE_0); else _encode(CMF_STEREO_ALLENE_1); } int impl_h = 0; if (!mol.isPseudoAtom(idx) && !mol.isRSite(idx) && Molecule::shouldWriteHCount(mol, idx)) { try { impl_h = mol.getImplicitH(idx); if (impl_h < 0 || impl_h > CMF_MAX_IMPLICIT_H) throw Error("implicit hydrogen count %d out of range", impl_h); _encode(CMF_IMPLICIT_H + impl_h); } catch (Element::Error) { } } if (!mol.isRSite(idx) && !mol.isPseudoAtom(idx)) { if (mol.getAtomAromaticity(idx) == ATOM_AROMATIC && (charge != 0 || (number != ELEM_C && number != ELEM_O))) { try { int valence = mol.getAtomValence(idx); if (valence < 0 || valence > CMF_MAX_VALENCE) { _encode(CMF_VALENCE_EXT); _output->writePackedUInt(valence); } else _encode(CMF_VALENCE + valence); } catch (Element::Error) { } } } int i; for (i = 1; i <= mol.attachmentPointCount(); i++) { int j, aidx; for (j = 0; (aidx = mol.getAttachmentPoint(i, j)) != -1; j++) if (aidx == idx) { _encode(CMF_ATTACHPT); _encode(i); } } if (atom_flags != 0) { int i, flags = atom_flags[idx]; for (i = 0; i < CMF_NUM_OF_ATOM_FLAGS; i++) if (flags & (1 << i)) _encode(CMF_ATOM_FLAGS + i); } if (save_highlighting) if (mol.isAtomHighlighted(idx)) _encode(CMF_HIGHLIGHTED); }
void IndigoInchi::generateInchiInput (Molecule &mol, inchi_Input &input, Array<inchi_Atom> &atoms, Array<inchi_Stereo0D> &stereo) { QS_DEF(Array<int>, mapping); mapping.clear_resize(mol.vertexEnd()); mapping.fffill(); int index = 0; for (int v = mol.vertexBegin(); v != mol.vertexEnd(); v = mol.vertexNext(v)) mapping[v] = index++; atoms.clear_resize(index); atoms.zerofill(); stereo.clear(); for (int v = mol.vertexBegin(); v != mol.vertexEnd(); v = mol.vertexNext(v)) { inchi_Atom &atom = atoms[mapping[v]]; int atom_number = mol.getAtomNumber(v); if (atom_number == ELEM_PSEUDO) throw IndigoError("Molecule with pseudoatom (%s) cannot be converted into InChI", mol.getPseudoAtom(v)); if (atom_number == ELEM_RSITE) throw IndigoError("Molecule with RGroups cannot be converted into InChI"); strncpy(atom.elname, Element::toString(atom_number), ATOM_EL_LEN); Vec3f &c = mol.getAtomXyz(v); atom.x = c.x; atom.y = c.y; atom.z = c.z; // connectivity const Vertex &vtx = mol.getVertex(v); int nei_idx = 0; for (int nei = vtx.neiBegin(); nei != vtx.neiEnd(); nei = vtx.neiNext(nei)) { int v_nei = vtx.neiVertex(nei); atom.neighbor[nei_idx] = mapping[v_nei]; int edge_idx = vtx.neiEdge(nei); atom.bond_type[nei_idx] = getInchiBondType(mol.getBondOrder(edge_idx)); int bond_stereo = INCHI_BOND_STEREO_NONE; if (mol.cis_trans.isIgnored(edge_idx)) bond_stereo = INCHI_BOND_STEREO_DOUBLE_EITHER; else { int dir = mol.getBondDirection2(v, v_nei); if (mol.getBondDirection2(v, v_nei) == BOND_EITHER) bond_stereo = INCHI_BOND_STEREO_SINGLE_1EITHER; else if (mol.getBondDirection2(v_nei, v) == BOND_EITHER) bond_stereo = INCHI_BOND_STEREO_SINGLE_2EITHER; } atom.bond_stereo[nei_idx] = bond_stereo; nei_idx++; } atom.num_bonds = vtx.degree(); // Other properties atom.isotopic_mass = mol.getAtomIsotope(v); atom.radical = mol.getAtomRadical(v); atom.charge = mol.getAtomCharge(v); // Hydrogens int hcount = -1; if (Molecule::shouldWriteHCount(mol, v) || mol.isExplicitValenceSet(v) || mol.isImplicitHSet(v)) { if (mol.getAtomAromaticity(v) == ATOM_AROMATIC && atom_number == ELEM_C && atom.charge == 0 && atom.radical == 0) { // Do not set number of implicit hydrogens here as InChI throws an exception on // the molecule B1=CB=c2cc3B=CC=c3cc12 ; } else // set -1 to tell InChI add implicit hydrogens automatically hcount = mol.getImplicitH_NoThrow(v, -1); } atom.num_iso_H[0] = hcount; } // Process cis-trans bonds for (int e = mol.edgeBegin(); e != mol.edgeEnd(); e = mol.edgeNext(e)) { if (mol.cis_trans.getParity(e) == 0) continue; int subst[4]; mol.cis_trans.getSubstituents_All(e, subst); const Edge &edge = mol.getEdge(e); inchi_Stereo0D &st = stereo.push(); // Write it as // #0 - #1 = #2 - #3 st.neighbor[0] = mapping[subst[0]]; st.neighbor[1] = mapping[edge.beg]; st.neighbor[2] = mapping[edge.end]; st.neighbor[3] = mapping[subst[2]]; if (mol.cis_trans.getParity(e) == MoleculeCisTrans::CIS) st.parity = INCHI_PARITY_ODD; else st.parity = INCHI_PARITY_EVEN; st.central_atom = NO_ATOM; st.type = INCHI_StereoType_DoubleBond; } // Process tetrahedral stereocenters for (int i = mol.stereocenters.begin(); i != mol.stereocenters.end(); i = mol.stereocenters.next(i)) { int v = mol.stereocenters.getAtomIndex(i); int type, group, pyramid[4]; mol.stereocenters.get(v, type, group, pyramid); if (type == MoleculeStereocenters::ATOM_ANY) continue; for (int i = 0; i < 4; i++) if (pyramid[i] != -1) pyramid[i] = mapping[pyramid[i]]; inchi_Stereo0D &st = stereo.push(); /* 4 neighbors X neighbor[4] : {#W, #X, #Y, #Z} | central_atom: #A W--A--Y type : INCHI_StereoType_Tetrahedral | Z parity: if (X,Y,Z) are clockwize when seen from W then parity is 'e' otherwise 'o' Example (see AXYZW above): if W is above the plane XYZ then parity = 'e' 3 neighbors Y Y neighbor[4] : {#A, #X, #Y, #Z} / / central_atom: #A X--A (e.g. O=S ) type : INCHI_StereoType_Tetrahedral \ \ Z Z */ int offset = 0; if (pyramid[3] == -1) offset = 1; st.neighbor[offset] = mapping[pyramid[0]]; st.neighbor[offset + 1] = mapping[pyramid[1]]; st.neighbor[offset + 2] = mapping[pyramid[2]]; if (offset == 0) st.neighbor[3] = mapping[pyramid[3]]; else st.neighbor[0] = mapping[v]; st.parity = INCHI_PARITY_ODD; if (offset != 0) st.parity = INCHI_PARITY_ODD; else st.parity = INCHI_PARITY_EVEN; st.central_atom = mapping[v]; st.type = INCHI_StereoType_Tetrahedral; } input.atom = atoms.ptr(); input.num_atoms = atoms.size(); input.stereo0D = stereo.ptr(); input.num_stereo0D = stereo.size(); input.szOptions = options.ptr(); }
void MoleculeAutomorphismSearch::_calculateHydrogensAndDegree (Molecule &mol) { _hcount.clear_resize(mol.vertexEnd()); _degree.clear_resize(mol.vertexEnd()); _degree.zerofill(); for (int i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i)) { if (mol.isRSite(i) || mol.isPseudoAtom(i) || mol.isTemplateAtom(i)) _hcount[i] = 0; else _hcount[i] = mol.getImplicitH_NoThrow(i, -1); if (_hcount[i] < 0) { if (mol.getAtomAromaticity(i) == ATOM_AROMATIC) { if (mol.getAtomNumber(i) == ELEM_C && mol.getAtomCharge(i) == 0) { if (mol.getVertex(i).degree() == 3) _hcount[i] = 0; else if (mol.getVertex(i).degree() == 2) _hcount[i] = 1; } else if (mol.getAtomNumber(i) == ELEM_O && mol.getAtomCharge(i) == 0) _hcount[i] = 0; else { if (!allow_undefined) // This code will throw an error with a good explanation _hcount[i] = mol.getImplicitH(i); else // Make number of hydrogens unique in order to make such atoms unique _hcount[i] = 101 + i; } } else { // Number of atoms are underfined, but all the properties like // connectivity, charge, and etc., and this mean that such // atoms are comparable even. // For example, this cis-trans bond is invalid even if the number // of hydrogens are undefined: CC=C(N(C)=O)N(C)=O _hcount[i] = 100; // Any big number. // Later this number can be increased including neighbour hydrogens, // and this is correct, because nitrogens in these molecules are different: // C[N](C)=O and [H][N]([H])(C)(C)=O } } const Vertex &vertex = mol.getVertex(i); _degree[i] = 0; if (ignored_vertices != 0 && ignored_vertices[i]) continue; for (int j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j)) { if (mol.getAtomNumber(vertex.neiVertex(j)) == ELEM_H && mol.getAtomIsotope(vertex.neiVertex(j)) == 0) _hcount[i]++; if (ignored_vertices == 0 || ignored_vertices[vertex.neiVertex(j)] == 0) _degree[i]++; } } // Compute independent components if the canonical ordering is not required _independent_component_index.clear_resize(mol.vertexEnd()); if (!find_canonical_ordering) { // We can mark different connected components as independent GraphDecomposer decomposer(mol); decomposer.decompose(); _independent_component_index.copy(decomposer.getDecomposition()); } else _independent_component_index.fffill(); }