void CmfSaver::_encodeAtom (Molecule &mol, int idx, const int *mapping) { int number = 0; if (mol.isPseudoAtom(idx)) { const char *str = mol.getPseudoAtom(idx); size_t len = strlen(str); if (len < 1) throw Error("empty pseudo-atom"); if (len > 255) throw Error("pseudo-atom labels %d characters long are not supported (255 is the limit)", len); _encode(CMF_PSEUDOATOM); _encode((byte)len); do { _encode(*str); } while (*(++str) != 0); } else if (mol.isRSite(idx)) { int bits = mol.getRSiteBits(idx); if (bits > 255) { _encode(CMF_RSITE_EXT); _output->writePackedUInt((unsigned int)bits); } else { _encode(CMF_RSITE); _encode(bits); } } else { number = mol.getAtomNumber(idx); if (number <= 0 || number >= ELEM_MAX) throw Error("unexpected atom label"); _encode(number); } int charge = mol.getAtomCharge(idx); if (charge != 0) { int charge2 = charge - CMF_MIN_CHARGE; if (charge2 < 0 || charge2 >= CMF_NUM_OF_CHARGES) { _encode(CMF_CHARGE_EXT); int charge3 = charge + 128; if (charge3 < 0 || charge >= 256) throw Error("unexpected atom charge: %d", charge); _encode(charge3); } else _encode(charge2 + CMF_CHARGES); } int isotope = mol.getAtomIsotope(idx); if (isotope > 0) { int deviation = isotope - Element::getDefaultIsotope(number); if (deviation == 0) _encode(CMF_ISOTOPE_ZERO); else if (deviation == 1) _encode(CMF_ISOTOPE_PLUS1); else if (deviation == 2) _encode(CMF_ISOTOPE_PLUS2); else if (deviation == -1) _encode(CMF_ISOTOPE_MINUS1); else if (deviation == -2) _encode(CMF_ISOTOPE_MINUS2); else { deviation += 100; if (deviation < 0 || deviation > 255) throw Error("unexpected %s isotope: %d", Element::toString(number), isotope); _encode(CMF_ISOTOPE_OTHER); _encode(deviation); } } int radical = 0; if (!mol.isPseudoAtom(idx) && !mol.isRSite(idx)) { try { radical = mol.getAtomRadical(idx); } catch (Element::Error) { } } if (radical > 0) { if (radical == RADICAL_SINGLET) _encode(CMF_RADICAL_SINGLET); else if (radical == RADICAL_DOUBLET) _encode(CMF_RADICAL_DOUBLET); else if (radical == RADICAL_TRIPLET) _encode(CMF_RADICAL_TRIPLET); else throw Error("bad radical value: %d", radical); } MoleculeStereocenters &stereo = mol.stereocenters; int stereo_type = stereo.getType(idx); if (stereo_type == MoleculeStereocenters::ATOM_ANY) _encode(CMF_STEREO_ANY); else if (stereo_type != 0) { bool rigid; int code; const int *pyramid = stereo.getPyramid(idx); if (pyramid[3] == -1) rigid = MoleculeStereocenters::isPyramidMappingRigid(pyramid, 3, mapping); else rigid = MoleculeStereocenters::isPyramidMappingRigid(pyramid, 4, mapping); if (stereo_type == MoleculeStereocenters::ATOM_ABS) code = CMF_STEREO_ABS_0; else { int group = stereo.getGroup(idx); if (group < 1 || group > CMF_MAX_STEREOGROUPS) throw Error("stereogroup number %d out of range", group); if (stereo_type == MoleculeStereocenters::ATOM_AND) code = CMF_STEREO_AND_0 + group - 1; else // stereo_type == MoleculeStereocenters::ATOM_OR code = CMF_STEREO_OR_0 + group - 1; } if (!rigid) // CMF_STEREO_*_0 -> CMF_STEREO_*_1 code += CMF_MAX_STEREOGROUPS * 2 + 1; _encode(code); } if (mol.allene_stereo.isCenter(idx)) { int left, right, parity, subst[4]; mol.allene_stereo.getByAtomIdx(idx, left, right, subst, parity); if (subst[1] != -1 && mapping[subst[1]] != -1 && mapping[subst[1]] < mapping[subst[0]]) parity = 3 - parity; if (subst[3] != -1 && mapping[subst[3]] != -1 && mapping[subst[3]] < mapping[subst[2]]) parity = 3 - parity; if (parity == 1) _encode(CMF_STEREO_ALLENE_0); else _encode(CMF_STEREO_ALLENE_1); } int impl_h = 0; if (!mol.isPseudoAtom(idx) && !mol.isRSite(idx) && Molecule::shouldWriteHCount(mol, idx)) { try { impl_h = mol.getImplicitH(idx); if (impl_h < 0 || impl_h > CMF_MAX_IMPLICIT_H) throw Error("implicit hydrogen count %d out of range", impl_h); _encode(CMF_IMPLICIT_H + impl_h); } catch (Element::Error) { } } if (!mol.isRSite(idx) && !mol.isPseudoAtom(idx)) { if (mol.getAtomAromaticity(idx) == ATOM_AROMATIC && (charge != 0 || (number != ELEM_C && number != ELEM_O))) { try { int valence = mol.getAtomValence(idx); if (valence < 0 || valence > CMF_MAX_VALENCE) { _encode(CMF_VALENCE_EXT); _output->writePackedUInt(valence); } else _encode(CMF_VALENCE + valence); } catch (Element::Error) { } } } int i; for (i = 1; i <= mol.attachmentPointCount(); i++) { int j, aidx; for (j = 0; (aidx = mol.getAttachmentPoint(i, j)) != -1; j++) if (aidx == idx) { _encode(CMF_ATTACHPT); _encode(i); } } if (atom_flags != 0) { int i, flags = atom_flags[idx]; for (i = 0; i < CMF_NUM_OF_ATOM_FLAGS; i++) if (flags & (1 << i)) _encode(CMF_ATOM_FLAGS + i); } if (save_highlighting) if (mol.isAtomHighlighted(idx)) _encode(CMF_HIGHLIGHTED); }
bool AbbreviationExpander::tryExpandToken (TokenChain &tokens, size_t &offset, Molecule &m, AttPoint &attach_to) { Token &cur = tokens[offset]; if (cur.multiplier != 1) return false; Array<int> connection_points; if (cur.type == Token::Element) { if (cur.index == ELEM_H) { offset++; attach_to = AttPoint(-1, 0); return true; } int added = m.addAtom(cur.index); // Get the number of bonds to connect int valence, hyd; int conn = attach_to.order; if (offset + 1 < tokens.size()) { Token &next = tokens[offset + 1]; conn += next.multiplier; } if (!Element::calcValence(cur.index, 0, 0, conn, valence, hyd, false)) { // Ignore next atom // Appear in the OH3C case when H3 is belong to C conn = attach_to.order; if (!Element::calcValence(cur.index, 0, 0, conn, valence, hyd, false)) return false; } for (int i = 0; i < hyd + conn; i++) connection_points.push(added); } else if (cur.type == Token::Pattern) { // Add pattern BufferScanner scanner(abbreviations[cur.index]->expansion.c_str()); SmilesLoader loader(scanner); Molecule abbr; loader.loadMolecule(abbr); Array<int> mapping; Array<int> rsites; m.mergeWithMolecule(abbr, &mapping); for (int v = abbr.vertexBegin(); v != abbr.vertexEnd(); v = abbr.vertexNext(v)) { int mapped = mapping[v]; if (m.isRSite(mapped)) { dword bits = m.getRSiteBits(mapped); int id1 = bitGetOneHOIndex(bits); int id2 = bitGetOneHOIndex(bits); if (id1 != id2) throw Exception("Invalid abbreviations specification: %s", abbreviations[cur.index]->expansion.c_str()); if (id1 != 0) id1--; // R == R1 const Vertex &vertex = m.getVertex(mapped); int nei = vertex.neiBegin(); connection_points.expandFill(id1 + 1, -1); connection_points[id1] = vertex.neiVertex(nei); // Point connected to the RSite rsites.push(mapped); } } m.removeAtoms(rsites); } else return false; bool rollback = false; int atom_bound = m.vertexCount(); size_t offset2 = offset + 1; attachBond(m, attach_to, connection_points[0]); int i = attach_to.order; while (i < connection_points.size() - 1 && !rollback) { if (offset2 >= tokens.size()) { // If we are at the end then there can be an implicit double bond // Example: -CH2CH= // When we read C H there are no more tokens break; } Token &next = tokens[offset2]; for (int j = 0; j < next.multiplier; j++) { if (i >= connection_points.size()) { rollback = true; break; } if (next.type == Token::Branch) { AttPoint point(connection_points[i], 1); if (!expandParsedTokensWithRev(next.branch, m, point) || point.index != -1) { rollback = true; break; } } else { TokenChain chain; chain.push_back(next); chain[0].multiplier = 1; size_t local_offset = 0; AttPoint point(connection_points[i], 1); if (!tryExpandToken(chain, local_offset, m, point) || point.index != -1) { rollback = true; break; } } i++; } offset2++; } if (i > connection_points.size()) rollback = true; if (!rollback) { if (i == connection_points.size()) { // This is terminal attach_to = AttPoint(-1, 0); } else if (i == connection_points.size() - 1) attach_to = AttPoint(connection_points[i], 1); // Last attachment point else { // Number of tokens are incomlete means that there are double bonds after attach_to = AttPoint(connection_points[i], connection_points.size() - i); } } if (rollback) { // Rollback Array<int> new_atoms; for (int v = m.vertexBegin(); v != m.vertexEnd(); v = m.vertexNext(v)) if (v >= atom_bound) new_atoms.push(v); m.removeAtoms(new_atoms); return false; } offset = offset2; return true; }