void IndigoInchi::generateInchiInput (Molecule &mol, inchi_Input &input, Array<inchi_Atom> &atoms, Array<inchi_Stereo0D> &stereo) { QS_DEF(Array<int>, mapping); mapping.clear_resize(mol.vertexEnd()); mapping.fffill(); int index = 0; for (int v = mol.vertexBegin(); v != mol.vertexEnd(); v = mol.vertexNext(v)) mapping[v] = index++; atoms.clear_resize(index); atoms.zerofill(); stereo.clear(); for (int v = mol.vertexBegin(); v != mol.vertexEnd(); v = mol.vertexNext(v)) { inchi_Atom &atom = atoms[mapping[v]]; int atom_number = mol.getAtomNumber(v); if (atom_number == ELEM_PSEUDO) throw IndigoError("Molecule with pseudoatom (%s) cannot be converted into InChI", mol.getPseudoAtom(v)); if (atom_number == ELEM_RSITE) throw IndigoError("Molecule with RGroups cannot be converted into InChI"); strncpy(atom.elname, Element::toString(atom_number), ATOM_EL_LEN); Vec3f &c = mol.getAtomXyz(v); atom.x = c.x; atom.y = c.y; atom.z = c.z; // connectivity const Vertex &vtx = mol.getVertex(v); int nei_idx = 0; for (int nei = vtx.neiBegin(); nei != vtx.neiEnd(); nei = vtx.neiNext(nei)) { int v_nei = vtx.neiVertex(nei); atom.neighbor[nei_idx] = mapping[v_nei]; int edge_idx = vtx.neiEdge(nei); atom.bond_type[nei_idx] = getInchiBondType(mol.getBondOrder(edge_idx)); int bond_stereo = INCHI_BOND_STEREO_NONE; if (mol.cis_trans.isIgnored(edge_idx)) bond_stereo = INCHI_BOND_STEREO_DOUBLE_EITHER; else { int dir = mol.getBondDirection2(v, v_nei); if (mol.getBondDirection2(v, v_nei) == BOND_EITHER) bond_stereo = INCHI_BOND_STEREO_SINGLE_1EITHER; else if (mol.getBondDirection2(v_nei, v) == BOND_EITHER) bond_stereo = INCHI_BOND_STEREO_SINGLE_2EITHER; } atom.bond_stereo[nei_idx] = bond_stereo; nei_idx++; } atom.num_bonds = vtx.degree(); // Other properties atom.isotopic_mass = mol.getAtomIsotope(v); atom.radical = mol.getAtomRadical(v); atom.charge = mol.getAtomCharge(v); // Hydrogens int hcount = -1; if (Molecule::shouldWriteHCount(mol, v) || mol.isExplicitValenceSet(v) || mol.isImplicitHSet(v)) { if (mol.getAtomAromaticity(v) == ATOM_AROMATIC && atom_number == ELEM_C && atom.charge == 0 && atom.radical == 0) { // Do not set number of implicit hydrogens here as InChI throws an exception on // the molecule B1=CB=c2cc3B=CC=c3cc12 ; } else // set -1 to tell InChI add implicit hydrogens automatically hcount = mol.getImplicitH_NoThrow(v, -1); } atom.num_iso_H[0] = hcount; } // Process cis-trans bonds for (int e = mol.edgeBegin(); e != mol.edgeEnd(); e = mol.edgeNext(e)) { if (mol.cis_trans.getParity(e) == 0) continue; int subst[4]; mol.cis_trans.getSubstituents_All(e, subst); const Edge &edge = mol.getEdge(e); inchi_Stereo0D &st = stereo.push(); // Write it as // #0 - #1 = #2 - #3 st.neighbor[0] = mapping[subst[0]]; st.neighbor[1] = mapping[edge.beg]; st.neighbor[2] = mapping[edge.end]; st.neighbor[3] = mapping[subst[2]]; if (mol.cis_trans.getParity(e) == MoleculeCisTrans::CIS) st.parity = INCHI_PARITY_ODD; else st.parity = INCHI_PARITY_EVEN; st.central_atom = NO_ATOM; st.type = INCHI_StereoType_DoubleBond; } // Process tetrahedral stereocenters for (int i = mol.stereocenters.begin(); i != mol.stereocenters.end(); i = mol.stereocenters.next(i)) { int v = mol.stereocenters.getAtomIndex(i); int type, group, pyramid[4]; mol.stereocenters.get(v, type, group, pyramid); if (type == MoleculeStereocenters::ATOM_ANY) continue; for (int i = 0; i < 4; i++) if (pyramid[i] != -1) pyramid[i] = mapping[pyramid[i]]; inchi_Stereo0D &st = stereo.push(); /* 4 neighbors X neighbor[4] : {#W, #X, #Y, #Z} | central_atom: #A W--A--Y type : INCHI_StereoType_Tetrahedral | Z parity: if (X,Y,Z) are clockwize when seen from W then parity is 'e' otherwise 'o' Example (see AXYZW above): if W is above the plane XYZ then parity = 'e' 3 neighbors Y Y neighbor[4] : {#A, #X, #Y, #Z} / / central_atom: #A X--A (e.g. O=S ) type : INCHI_StereoType_Tetrahedral \ \ Z Z */ int offset = 0; if (pyramid[3] == -1) offset = 1; st.neighbor[offset] = mapping[pyramid[0]]; st.neighbor[offset + 1] = mapping[pyramid[1]]; st.neighbor[offset + 2] = mapping[pyramid[2]]; if (offset == 0) st.neighbor[3] = mapping[pyramid[3]]; else st.neighbor[0] = mapping[v]; st.parity = INCHI_PARITY_ODD; if (offset != 0) st.parity = INCHI_PARITY_ODD; else st.parity = INCHI_PARITY_EVEN; st.central_atom = mapping[v]; st.type = INCHI_StereoType_Tetrahedral; } input.atom = atoms.ptr(); input.num_atoms = atoms.size(); input.stereo0D = stereo.ptr(); input.num_stereo0D = stereo.size(); input.szOptions = options.ptr(); }
void CanonicalSmilesSaver::saveMolecule (Molecule &mol_) const { if (mol_.vertexCount() < 1) return; QS_DEF(Array<int>, ignored); QS_DEF(Array<int>, order); QS_DEF(Array<int>, ranks); QS_DEF(Molecule, mol); int i; if (mol_.repeating_units.size() > 0) throw Error("can not canonicalize a polymer"); // Detect hydrogens configuration if aromatic but not ambiguous bool found_invalid_h = false; for (i = mol_.vertexBegin(); i != mol_.vertexEnd(); i = mol_.vertexNext(i)) { if (mol_.isRSite(i) || mol_.isPseudoAtom(i)) continue; if (mol_.getImplicitH_NoThrow(i, -1) == -1) found_invalid_h = true; } if (found_invalid_h) { AromaticityOptions options; options.method = AromaticityOptions::GENERIC; options.unique_dearomatization = true; MoleculeDearomatizer::restoreHydrogens(mol_, options); } mol.clone(mol_, 0, 0); // TODO: canonicalize allenes properly mol.allene_stereo.clear(); ignored.clear_resize(mol.vertexEnd()); ignored.zerofill(); for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i)) if (mol.convertableToImplicitHydrogen(i)) ignored[i] = 1; for (i = mol.edgeBegin(); i != mol.edgeEnd(); i = mol.edgeNext(i)) if (mol.getBondTopology(i) == TOPOLOGY_RING && mol.cis_trans.getParity(i) != 0) { // we save cis/trans ring bonds into SMILES, but only those who // do not participate in bigger ring systems const Edge &edge = mol.getEdge(i); if (mol.getAtomRingBondsCount(edge.beg) != 2 || mol.getAtomRingBondsCount(edge.end) != 2) { mol.cis_trans.setParity(i, 0); continue; } // also, discard the cis-trans bonds that have been converted to aromatic const Vertex &beg = mol.getVertex(edge.beg); const Vertex &end = mol.getVertex(edge.end); bool have_singlebond_beg = false; bool have_singlebond_end = false; int j; for (j = beg.neiBegin(); j != beg.neiEnd(); j = beg.neiNext(j)) if (mol.getBondOrder(beg.neiEdge(j)) == BOND_SINGLE) have_singlebond_beg = true; for (j = end.neiBegin(); j != end.neiEnd(); j = end.neiNext(j)) if (mol.getBondOrder(end.neiEdge(j)) == BOND_SINGLE) have_singlebond_end = true; if (!have_singlebond_beg || !have_singlebond_end) { mol.cis_trans.setParity(i, 0); continue; } } MoleculeAutomorphismSearch of; of.detect_invalid_cistrans_bonds = find_invalid_stereo; of.detect_invalid_stereocenters = find_invalid_stereo; of.find_canonical_ordering = true; of.ignored_vertices = ignored.ptr(); of.process(mol); of.getCanonicalNumbering(order); for (i = mol.edgeBegin(); i != mol.edgeEnd(); i = mol.edgeNext(i)) if (mol.cis_trans.getParity(i) != 0 && of.invalidCisTransBond(i)) mol.cis_trans.setParity(i, 0); for (i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i)) if (mol.stereocenters.getType(i) > MoleculeStereocenters::ATOM_ANY && of.invalidStereocenter(i)) mol.stereocenters.remove(i); ranks.clear_resize(mol.vertexEnd()); for (i = 0; i < order.size(); i++) ranks[order[i]] = i; SmilesSaver saver(_output); saver.ignore_invalid_hcount = false; saver.vertex_ranks = ranks.ptr(); saver.ignore_hydrogens = true; saver.canonize_chiralities = true; saver.saveMolecule(mol); }
void MoleculeAutomorphismSearch::_calculateHydrogensAndDegree (Molecule &mol) { _hcount.clear_resize(mol.vertexEnd()); _degree.clear_resize(mol.vertexEnd()); _degree.zerofill(); for (int i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i)) { if (mol.isRSite(i) || mol.isPseudoAtom(i) || mol.isTemplateAtom(i)) _hcount[i] = 0; else _hcount[i] = mol.getImplicitH_NoThrow(i, -1); if (_hcount[i] < 0) { if (mol.getAtomAromaticity(i) == ATOM_AROMATIC) { if (mol.getAtomNumber(i) == ELEM_C && mol.getAtomCharge(i) == 0) { if (mol.getVertex(i).degree() == 3) _hcount[i] = 0; else if (mol.getVertex(i).degree() == 2) _hcount[i] = 1; } else if (mol.getAtomNumber(i) == ELEM_O && mol.getAtomCharge(i) == 0) _hcount[i] = 0; else { if (!allow_undefined) // This code will throw an error with a good explanation _hcount[i] = mol.getImplicitH(i); else // Make number of hydrogens unique in order to make such atoms unique _hcount[i] = 101 + i; } } else { // Number of atoms are underfined, but all the properties like // connectivity, charge, and etc., and this mean that such // atoms are comparable even. // For example, this cis-trans bond is invalid even if the number // of hydrogens are undefined: CC=C(N(C)=O)N(C)=O _hcount[i] = 100; // Any big number. // Later this number can be increased including neighbour hydrogens, // and this is correct, because nitrogens in these molecules are different: // C[N](C)=O and [H][N]([H])(C)(C)=O } } const Vertex &vertex = mol.getVertex(i); _degree[i] = 0; if (ignored_vertices != 0 && ignored_vertices[i]) continue; for (int j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j)) { if (mol.getAtomNumber(vertex.neiVertex(j)) == ELEM_H && mol.getAtomIsotope(vertex.neiVertex(j)) == 0) _hcount[i]++; if (ignored_vertices == 0 || ignored_vertices[vertex.neiVertex(j)] == 0) _degree[i]++; } } // Compute independent components if the canonical ordering is not required _independent_component_index.clear_resize(mol.vertexEnd()); if (!find_canonical_ordering) { // We can mark different connected components as independent GraphDecomposer decomposer(mol); decomposer.decompose(); _independent_component_index.copy(decomposer.getDecomposition()); } else _independent_component_index.fffill(); }