void MoleculeAutomorphismSearch::_calculateHydrogensAndDegree (Molecule &mol) { _hcount.clear_resize(mol.vertexEnd()); _degree.clear_resize(mol.vertexEnd()); _degree.zerofill(); for (int i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i)) { if (mol.isRSite(i) || mol.isPseudoAtom(i)) _hcount[i] = 0; else _hcount[i] = mol.getImplicitH_NoThrow(i, -1); if (_hcount[i] < 0) { if (mol.getAtomAromaticity(i) == ATOM_AROMATIC) { if (mol.getAtomNumber(i) == ELEM_C && mol.getAtomCharge(i) == 0) { if (mol.getVertex(i).degree() == 3) _hcount[i] = 0; else if (mol.getVertex(i).degree() == 2) _hcount[i] = 1; } else if (mol.getAtomNumber(i) == ELEM_O && mol.getAtomCharge(i) == 0) _hcount[i] = 0; else { if (!allow_undefined) // This code will throw an error with a good explanation _hcount[i] = mol.getImplicitH(i); else // Make number of hydrogens unique in order to make such atoms unique _hcount[i] = 101 + i; } } else { // Number of atoms are underfined, but all the properties like // connectivity, charge, and etc., and this mean that such // atoms are comparable even. // For example, this cis-trans bond is invalid even if the number // of hydrogens are undefined: CC=C(N(C)=O)N(C)=O _hcount[i] = 100; // Any big number. // Later this number can be increased including neighbour hydrogens, // and this is correct, because nitrogens in these molecules are different: // C[N](C)=O and [H][N]([H])(C)(C)=O } } const Vertex &vertex = mol.getVertex(i); _degree[i] = 0; if (ignored_vertices != 0 && ignored_vertices[i]) continue; for (int j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j)) { if (mol.getAtomNumber(vertex.neiVertex(j)) == ELEM_H && mol.getAtomIsotope(vertex.neiVertex(j)) == 0) _hcount[i]++; if (ignored_vertices == 0 || ignored_vertices[vertex.neiVertex(j)] == 0) _degree[i]++; } } // Compute independent components if the canonical ordering is not required _independent_component_index.clear_resize(mol.vertexEnd()); if (!find_canonical_ordering) { // We can mark different connected components as independent GraphDecomposer decomposer(mol); decomposer.decompose(); _independent_component_index.copy(decomposer.getDecomposition()); } else _independent_component_index.fffill(); }
bool AbbreviationExpander::tryExpandToken (TokenChain &tokens, size_t &offset, Molecule &m, AttPoint &attach_to) { Token &cur = tokens[offset]; if (cur.multiplier != 1) return false; Array<int> connection_points; if (cur.type == Token::Element) { if (cur.index == ELEM_H) { offset++; attach_to = AttPoint(-1, 0); return true; } int added = m.addAtom(cur.index); // Get the number of bonds to connect int valence, hyd; int conn = attach_to.order; if (offset + 1 < tokens.size()) { Token &next = tokens[offset + 1]; conn += next.multiplier; } if (!Element::calcValence(cur.index, 0, 0, conn, valence, hyd, false)) { // Ignore next atom // Appear in the OH3C case when H3 is belong to C conn = attach_to.order; if (!Element::calcValence(cur.index, 0, 0, conn, valence, hyd, false)) return false; } for (int i = 0; i < hyd + conn; i++) connection_points.push(added); } else if (cur.type == Token::Pattern) { // Add pattern BufferScanner scanner(abbreviations[cur.index].expansion.c_str()); SmilesLoader loader(scanner); Molecule abbr; loader.loadMolecule(abbr); Array<int> mapping; Array<int> rsites; m.mergeWithMolecule(abbr, &mapping); for (int v = abbr.vertexBegin(); v != abbr.vertexEnd(); v = abbr.vertexNext(v)) { int mapped = mapping[v]; if (m.isRSite(mapped)) { dword bits = m.getRSiteBits(mapped); int id1 = bitGetOneHOIndex(bits); int id2 = bitGetOneHOIndex(bits); if (id1 != id2) throw Exception("Invalid abbreviations specification: %s", abbreviations[cur.index].expansion.c_str()); if (id1 != 0) id1--; // R == R1 const Vertex &vertex = m.getVertex(mapped); int nei = vertex.neiBegin(); connection_points.expandFill(id1 + 1, -1); connection_points[id1] = vertex.neiVertex(nei); // Point connected to the RSite rsites.push(mapped); } } m.removeAtoms(rsites); } else return false; bool rollback = false; int atom_bound = m.vertexCount(); size_t offset2 = offset + 1; attachBond(m, attach_to, connection_points[0]); int i = attach_to.order; while (i < connection_points.size() - 1 && !rollback) { if (offset2 >= tokens.size()) { // If we are at the end then there can be an implicit double bond // Example: -CH2CH= // When we read C H there are no more tokens break; } Token &next = tokens[offset2]; for (int j = 0; j < next.multiplier; j++) { if (i >= connection_points.size()) { rollback = true; break; } if (next.type == Token::Branch) { AttPoint point(connection_points[i], 1); if (!expandParsedTokensWithRev(next.branch, m, point) || point.index != -1) { rollback = true; break; } } else { TokenChain chain; chain.push_back(next); chain[0].multiplier = 1; size_t local_offset = 0; AttPoint point(connection_points[i], 1); if (!tryExpandToken(chain, local_offset, m, point) || point.index != -1) { rollback = true; break; } } i++; } offset2++; } if (i > connection_points.size()) rollback = true; if (!rollback) { if (i == connection_points.size()) { // This is terminal attach_to = AttPoint(-1, 0); } else if (i == connection_points.size() - 1) attach_to = AttPoint(connection_points[i], 1); // Last attachment point else { // Number of tokens are incomlete means that there are double bonds after attach_to = AttPoint(connection_points[i], connection_points.size() - i); } } if (rollback) { // Rollback Array<int> new_atoms; for (int v = m.vertexBegin(); v != m.vertexEnd(); v = m.vertexNext(v)) if (v >= atom_bound) new_atoms.push(v); m.removeAtoms(new_atoms); return false; } offset = offset2; return true; }
void convertMolfile (char *path, char *filename, FileOutput &cpp_file) { FileScanner molfile("%s\\%s", path, filename); MolfileLoader mf_loader(molfile); Molecule mol; QS_DEF(Array<int>, edges); printf("%s\n", filename); mf_loader.loadMolecule(mol, true); BiconnectedDecomposer bd(mol); if (bd.decompose() != 1) { printf("Error: %s is not biconnected\n", filename); return; } int i, j; edges.clear_reserve(mol.edgeCount()); for (i = mol.edgeBegin() ; i < mol.edgeEnd(); i = mol.edgeNext(i)) edges.push(i); edges.qsort(edge_cmp, &mol); const Edge &edge = mol.getEdge(edges[edges.size() / 2]); Vec3f v1 = mol.getAtomPos(edge.beg); Vec3f v2 = mol.getAtomPos(edge.end); v1.z = 0.f; v2.z = 0.f; float scale = Vec3f::dist(v1, v2); if (scale < 0.0001f) { printf("Error: %s has zero bond\n", filename); return; } scale = 1.f / scale; int first_idx = mol.vertexBegin(); Vec3f pos = mol.getAtomPos(first_idx); for (i = mol.vertexNext(first_idx); i < mol.vertexEnd(); i = mol.vertexNext(i)) { if (mol.getAtomPos(i).y < pos.y) { pos = mol.getAtomPos(i); first_idx = i; } } for (i = mol.vertexBegin() ; i < mol.vertexEnd(); i = mol.vertexNext(i)) { mol.getAtom2(i).pos.sub(pos); mol.getAtom2(i).pos.scale(scale); } char buf[1024]; sprintf_s(buf, "BEGIN_PATTERN(\"%s\")", filename); cpp_file.writeStringCR(buf); for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i)) { sprintf_s(buf, " ADD_ATOM(%d, %ff, %ff)", i, mol.getAtomPos(i).x, mol.getAtomPos(i).y); cpp_file.writeStringCR(buf); } for (i = mol.edgeBegin(); i < mol.edgeEnd(); i = mol.edgeNext(i)) { const Edge &edge = mol.getEdge(i); int type = mol.getBond(i).type; int qtype = mol.getQueryBond(i).type; sprintf_s(buf, " ADD_BOND(%d, %d, %d)", edge.beg, edge.end, qtype != 0 ? qtype : type); cpp_file.writeStringCR(buf); } Vec2f v, inter; Vec2f pos_i; int idx = mol.vertexCount(); i = first_idx; float max_angle, cur_angle; float i_angle = 0; int next_nei = 0; int point_idx = 0; pos_i.set(mol.getAtomPos(i).x, mol.getAtomPos(i).y); while (true) { const Vertex &vert = mol.getVertex(i); if (i != first_idx) { v.set(pos_i.x, pos_i.y); pos_i.set(mol.getAtomPos(i).x, mol.getAtomPos(i).y); v.sub(pos_i); i_angle = v.tiltAngle2(); } else if (point_idx > 0) break; sprintf_s(buf, " OUTLINE_POINT(%d, %ff, %ff)", point_idx++, pos_i.x, pos_i.y); cpp_file.writeStringCR(buf); max_angle = 0.f; for (j = vert.neiBegin(); j < vert.neiEnd(); j = vert.neiNext(j)) { const Vec3f &pos_nei = mol.getAtomPos(vert.neiVertex(j)); v.set(pos_nei.x - pos_i.x, pos_nei.y - pos_i.y); cur_angle = v.tiltAngle2() - i_angle; if (cur_angle < 0.f) cur_angle += 2 * PI; if (max_angle < cur_angle) { max_angle = cur_angle; next_nei = j; } } i = vert.neiVertex(next_nei); float dist, min_dist = 0.f; int int_edge; Vec2f cur_v1 = pos_i; Vec2f cur_v2(mol.getAtomPos(i).x, mol.getAtomPos(i).y); while (min_dist < 10000.f) { min_dist = 10001.f; for (j = mol.edgeBegin(); j < mol.edgeEnd(); j = mol.edgeNext(j)) { const Edge &edge = mol.getEdge(j); Vec2f cur_v3(mol.getAtomPos(edge.beg).x, mol.getAtomPos(edge.beg).y); Vec2f cur_v4(mol.getAtomPos(edge.end).x, mol.getAtomPos(edge.end).y); if (Vec2f::intersection(cur_v1, cur_v2, cur_v3, cur_v4, v)) if ((dist = Vec2f::dist(cur_v1, v)) < min_dist) { inter = v; min_dist = dist; int_edge = j; } } if (min_dist < 10000.f) { sprintf_s(buf, " OUTLINE_POINT(%d, %ff, %ff)", point_idx++, v.x, v.y); cpp_file.writeStringCR(buf); const Edge &edge = mol.getEdge(int_edge); Vec2f cur_v3(mol.getAtomPos(edge.beg).x, mol.getAtomPos(edge.beg).y); Vec2f cur_v4(mol.getAtomPos(edge.end).x, mol.getAtomPos(edge.end).y); Vec2f cur_v1v; Vec2f cur_v3v; Vec2f cur_v4v; cur_v1v.diff(cur_v1, inter); cur_v3v.diff(cur_v3, inter); cur_v4v.diff(cur_v4, inter); float angle1 = cur_v1v.tiltAngle2(); float angle3 = cur_v3v.tiltAngle2() - angle1; float angle4 = cur_v4v.tiltAngle2() - angle1; if (angle3 < 0) angle3 += 2 * PI; if (angle4 < 0) angle4 += 2 * PI; cur_v1 = inter; if (angle3 > angle4) { cur_v2 = cur_v3; i = edge.beg; } else { cur_v2 = cur_v4; i = edge.end; } } } } cpp_file.writeStringCR("END_PATTERN()"); }