void constructBRICSAtomTypes(std::map<unsigned int,std::string> &defs, std::map<unsigned int,ROMOL_SPTR> *environs){ /* After some discussion, the L2 definitions ("N.pl3" in the original paper) have been removed and incorporated into a (almost) general purpose amine definition in L5 ("N.sp3" in the paper). The problem is one of consistency. Based on the original definitions you should get the following fragmentations: C1CCCCC1NC(=O)C -> C1CCCCC1N[2*].[1*]C(=O)C c1ccccc1NC(=O)C -> c1ccccc1[16*].[2*]N[2*].[1*]C(=O)C This difference just didn't make sense to us. By switching to the unified definition we end up with: C1CCCCC1NC(=O)C -> C1CCCCC1[15*].[5*]N[5*].[1*]C(=O)C c1ccccc1NC(=O)C -> c1ccccc1[16*].[5*]N[5*].[1*]C(=O)C */ const std::string BRICSdefs="1 [C;D3]([#0,#6,#7,#8])(=O)\n\ 3 [O;D2]-;!@[#0,#6,#1]\n\ 5 [N;!D1;!$(N=*);!$(N-[!#6;!#16;!#0;!#1]);!$([N;R]@[C;R]=O)]\n\ 9 [n;+0;$(n(:[c,n,o,s]):[c,n,o,s])]\n\ 10 [N;R;$(N(@C(=O))@[C,N,O,S])]\n\ 11 [S;D2](-;!@[#0,#6])\n\ 12 [S;D4]([#6,#0])(=O)(=O)\n\ 6 [C;D3;!R](=O)-;!@[#0,#6,#7,#8]\n\ 13 [C;$(C(-;@[C,N,O,S])-;@[N,O,S])]\n\ 14 [c;$(c(:[c,n,o,s]):[n,o,s])]\n\ 15 [C;$(C(-;@C)-;@C)]\n\ 4 [C;!D1;!$(C=*)]-;!@[#6]\n\ 7 [C;D2,D3]-[#6]\n\ 8 [C;!R;!D1;!$(C!-*)]\n\ 16 [c;$(c(:c):c)]"; constructFragmenterAtomTypes(BRICSdefs,defs,"//",true,environs); }
void constructFragmenterAtomTypes( const std::string &str, std::map<unsigned int, std::string> &defs, const std::string &comment, bool validate, std::map<unsigned int, ROMOL_SPTR> *environs) { std::stringstream istr(str); constructFragmenterAtomTypes(&istr, defs, comment, validate, environs); }