/* * Document-method: on_parse_complete= * * call-seq: on_parse_complete = Proc.new { |obj| ... } * * This callback setter allows you to pass a Proc/lambda or any other object that responds to #call. * * It will pass a single parameter, the ruby object built from the last parsed JSON object */ static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback) { yajl_parser_wrapper * wrapper; GetParser(self, wrapper); wrapper->parse_complete_callback = callback; return Qnil; }
ret_ CXMLLoaderNetwork::Load(XercesDOMParser *pParser, const ch_1 *pszEnvironmentPath) { _START(LOAD); #ifdef _DEBUG_ if (!pParser) _RET(PARAMETER_NULL | PARAMETER_1); if (!pszEnvironmentPath) _RET(PARAMETER_NULL | PARAMETER_2); if (null_v == pszEnvironmentPath[0]) _RET(PARAMETER_EMPTY | PARAMETER_2); #endif SetParser(pParser); ch_1 sNetwork[ENVIRONMENT_PATH_LENGTH]; memset(sNetwork, 0, ENVIRONMENT_PATH_LENGTH); strncpy(sNetwork, pszEnvironmentPath, ENVIRONMENT_PATH_LENGTH); strncat(sNetwork, NETWORK_XML_FILE, ENVIRONMENT_PATH_LENGTH); DOMDocument *pNetworkDoc = null_v; try { GetParser()->parse(sNetwork); pNetworkDoc = GetParser()->getDocument(); } catch (const OutOfMemoryException &err) { auto_xerces_str sErr(err.getMessage()); printf("%s\n", (const ch_1 *)sErr); _RET(XML_LOADER_ERROR); } catch (const XMLException &err) { auto_xerces_str sErr(err.getMessage()); printf("%s\n", (const ch_1 *)sErr); _RET(XML_LOADER_ERROR); } catch (const DOMException &err) { auto_xerces_str sErr(err.getMessage()); printf("%s\n", (const ch_1 *)sErr); _RET(XML_LOADER_ERROR); } catch (...) { printf("Unexpected error during parsing.\n"); _RET(XML_LOADER_ERROR); } DOMElement *pRoot = pNetworkDoc->getDocumentElement(); if (!pRoot) _RET(XML_LOADER_ERROR); DOMElement *pChild = (DOMElement *)pRoot->getFirstChild(); if (!pChild) _RET(XML_LOADER_ERROR); auto_xerces_str wsIdentity ("identity"); auto_xerces_str wsPDU ("pdu"); auto_xerces_str wsDirection ("direction"); auto_xerces_str wsName ("name"); auto_xerces_str wsProtocolName ("protocol"); auto_xerces_str wsCommandID ("command_id"); auto_xerces_str wsSizeID ("size_id"); auto_xerces_str wsLocalPort ("local_port"); auto_xerces_str wsAuto ("auto"); auto_xerces_str wsFilter ("filter"); auto_xerces_str wsMaxConnections("max_connections"); auto_xerces_str wsRemoteIP ("remote_ip"); auto_xerces_str wsRemotePort ("remote_port"); auto_xerces_str wsReconnect ("reconnect"); auto_xerces_str wsAcceptorName ("acceptor"); auto_xerces_str wsConnectorName ("connector"); auto_xerces_str wsReceiverName ("receiver"); auto_xerces_str wsSenderName ("sender"); auto_xerces_str wsType ("type"); while (pChild) { ENetworkType NetworkType = NETWORK_NONE; CProtocolInfo *pProtocol = null_v; CField *pCommandIDField = null_v; CField *pSizeIDField = null_v; bool_ bIsAutoStart = true_v; if (0 == XMLString::compareString(pChild->getNodeName(), wsAcceptorName)) { NetworkType = NETWORK_ACCEPTOR; } else if (0 == XMLString::compareString(pChild->getNodeName(), wsConnectorName)) { NetworkType = NETWORK_CONNECTOR; } else if (0 == XMLString::compareString(pChild->getNodeName(), wsReceiverName)) { NetworkType = NETWORK_RECEIVER; } else if (0 == XMLString::compareString(pChild->getNodeName(), wsSenderName)) { NetworkType = NETWORK_SENDER; } else { pChild = (DOMElement *)pChild->getNextSibling(); continue; } auto_xerces_str sProtocolName(pChild->getAttribute(wsProtocolName)); if (SUCCESS != _ERR( CXMLLoaderProtocol::Instance()->Load(pParser, pszEnvironmentPath, sProtocolName))) { _RET(XML_LOADER_ERROR); } // if (SUCCESS != _ERR( CProtocolManager::instance()->getProtocol(sProtocolName, pProtocol))) { _RET(XML_LOADER_ERROR); } // auto_xerces_str sCommandID(pChild->getAttribute(wsCommandID)); if (SUCCESS != _ERR(pProtocol->getHeadField(sCommandID, pCommandIDField)) || FIELD_NORMAL_STYLE != (pCommandIDField->type() & FIELD_NORMAL_STYLE) || 4 < _LEN(pCommandIDField->type())) { _RET(XML_LOADER_ERROR); } // auto_xerces_str sSizeID(pChild->getAttribute(wsSizeID)); if (SUCCESS != _ERR(pProtocol->getHeadField(sSizeID, pSizeIDField)) || FIELD_NORMAL_STYLE != (pSizeIDField->type() & FIELD_NORMAL_STYLE) || 4 < _LEN(pSizeIDField->type())) { _RET(XML_LOADER_ERROR); } // auto_xerces_str wsAutoFalse("false"); if (0 == XMLString::compareString(wsAutoFalse, pChild->getAttribute(wsAuto))) { bIsAutoStart = false_v; } CNode *pNetwork = null_v; auto_xerces_str sName(pChild->getAttribute(wsName)); switch (NetworkType) { case NETWORK_NONE: _RET(XML_LOADER_ERROR); case NETWORK_ACCEPTOR: { auto_xerces_str sLocalPort(pChild->getAttribute(wsLocalPort)); auto_xerces_str sMaxConnections(pChild->getAttribute(wsMaxConnections)); pNetwork = new CAcceptor(pProtocol, pCommandIDField, pSizeIDField, (ub_2)atoi(sLocalPort), (size_)atoi(sMaxConnections), bIsAutoStart); } break; case NETWORK_CONNECTOR: { // auto_xerces_str nLocalPort (pChild->getAttribute(wsLocalPort)); auto_xerces_str sRemoteIP (pChild->getAttribute(wsRemoteIP)); auto_xerces_str nRemotePort (pChild->getAttribute(wsRemotePort)); auto_xerces_str sReconnect (pChild->getAttribute(wsReconnect)); pNetwork = new CConnector(pProtocol, pCommandIDField, pSizeIDField, (ub_2)atoi(nLocalPort), (const ch_1 *)sRemoteIP, (ub_2)atoi(nRemotePort), (b_4)atoi(sReconnect), bIsAutoStart); } break; case NETWORK_RECEIVER: { // auto_xerces_str sLocalPort(pChild->getAttribute(wsLocalPort)); pNetwork = new CReceiver(pProtocol, pCommandIDField, pSizeIDField, (ub_2)atoi(sLocalPort), bIsAutoStart); } break; case NETWORK_SENDER: { // auto_xerces_str sLocalPort(pChild->getAttribute(wsLocalPort)); pNetwork = new CSender(pProtocol, pCommandIDField, pSizeIDField, (ub_2)atoi(sLocalPort), bIsAutoStart); } } CNodeConf *pNetworkConf = (CNodeConf *) pNetwork->getConf(); // DOMElement *pSub = (DOMElement *)pChild->getFirstChild(); if (!pSub) _RET(XML_LOADER_ERROR); while (pSub) { if (0 == XMLString::compareString(pSub->getNodeName(), wsIdentity)) { // auto_xerces_str sIdentity(pSub->getAttribute(wsIdentity)); ch_1 *sIdentityName = null_v; if (SUCCESS != _ERR(GetLastName(sIdentity, sIdentityName))) _RET(XML_LOADER_ERROR); v_ *pV = pProtocol->data().value(sIdentityName); if (!pV) _RET(XML_LOADER_ERROR); // auto_xerces_str sPDU(pSub->getAttribute(wsPDU)); CPduInfo *pPDU = null_v; if (SUCCESS != _ERR(pProtocol->getPdu(sPDU, pPDU))) _RET(XML_LOADER_ERROR); // auto_xerces_str sDirection(pSub->getAttribute(wsDirection)); EDirection Direction; if (SUCCESS != _ERR(GetDirection(sDirection, Direction))) _RET(XML_LOADER_ERROR); // if (SUCCESS != _ERR(pNetworkConf->ConfigPDU(*pV, pPDU, Direction))) { _RET(XML_LOADER_ERROR); } } else if (0 == XMLString::compareString(pSub->getNodeName(), wsFilter)) { CIPFilter *pIPFilter = null_v; if (NETWORK_ACCEPTOR == NetworkType) { pIPFilter = &((CAcceptorConf *)pNetworkConf)->IPFilter(); } else if (NETWORK_RECEIVER == NetworkType) { pIPFilter = &((CReceiverConf *)pNetworkConf)->IPFilter(); } else { _RET(XML_LOADER_ERROR); } auto_xerces_str sType(pSub->getAttribute(wsType)); if (0 == strcmp(sType, "forbid")) { pIPFilter->setForbid(true_v); } else if (0 == strcmp(sType, "permit")) { pIPFilter->setForbid(false_v); } auto_xerces_str sIPGroup(pSub->getTextContent()); if (false_v == pIPFilter->addIpGroup((const ch_1 *) sIPGroup)) _RET(XML_LOADER_ERROR); } pSub = (DOMElement *)pSub->getNextSibling(); } if (SUCCESS != _ERR(CNetworkManager::instance()->AddNetwork( (const char *)sName, NetworkType, pNetwork))) { _RET(XML_LOADER_ERROR); } pChild = (DOMElement *)pChild->getNextSibling(); } _RET(SUCCESS); }
void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( const InputPath &inputPath, size_t lastPos, ChartParserCallback &outColl) { const StaticData &staticData = StaticData::Instance(); const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal(); const WordsRange &range = inputPath.GetWordsRange(); size_t relEndPos = range.GetEndPos() - range.GetStartPos(); size_t absEndPos = range.GetEndPos(); // MAIN LOOP. create list of nodes of target phrases DottedRuleStackOnDisk &expandableDottedRuleList = *m_expandableDottedRuleListVec[range.GetStartPos()]; // sort save nodes so only do nodes with most counts expandableDottedRuleList.SortSavedNodes(); const DottedRuleStackOnDisk::SavedNodeColl &savedNodeColl = expandableDottedRuleList.GetSavedNodeColl(); //cerr << "savedNodeColl=" << savedNodeColl.size() << " "; const ChartCellLabel &sourceWordLabel = GetSourceAt(absEndPos); for (size_t ind = 0; ind < (savedNodeColl.size()) ; ++ind) { const SavedNodeOnDisk &savedNode = *savedNodeColl[ind]; const DottedRuleOnDisk &prevDottedRule = savedNode.GetDottedRule(); const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode(); size_t startPos = prevDottedRule.IsRoot() ? range.GetStartPos() : prevDottedRule.GetWordsRange().GetEndPos() + 1; // search for terminal symbol if (startPos == absEndPos) { OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceWordLabel.GetLabel()); if (sourceWordBerkeleyDb != NULL) { const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper); if (node != NULL) { // TODO figure out why source word is needed from node, not from sentence // prob to do with factors or non-term //const Word &sourceWord = node->GetSourceWord(); DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, sourceWordLabel, prevDottedRule); expandableDottedRuleList.Add(relEndPos+1, dottedRule); // cache for cleanup m_sourcePhraseNode.push_back(node); } delete sourceWordBerkeleyDb; } } // search for non-terminals size_t endPos, stackInd; if (startPos > absEndPos) continue; else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) { // start. endPos = absEndPos - 1; stackInd = relEndPos; } else { endPos = absEndPos; stackInd = relEndPos + 1; } // get target nonterminals in this span from chart const ChartCellLabelSet &chartNonTermSet = GetTargetLabelSet(startPos, endPos); //const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal() // ,&defaultTargetNonTerm = staticData.GetOutputDefaultNonTerminal(); // go through each SOURCE lhs const NonTerminalSet &sourceLHSSet = GetParser().GetInputPath(startPos, endPos).GetNonTerminalSet(); NonTerminalSet::const_iterator iterSourceLHS; for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) { const Word &sourceLHS = *iterSourceLHS; OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceLHS); if (sourceLHSBerkeleyDb == NULL) { delete sourceLHSBerkeleyDb; continue; // vocab not in pt. node definately won't be in there } const OnDiskPt::PhraseNode *sourceNode = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); delete sourceLHSBerkeleyDb; if (sourceNode == NULL) continue; // didn't find source node // go through each TARGET lhs ChartCellLabelSet::const_iterator iterChartNonTerm; for (iterChartNonTerm = chartNonTermSet.begin(); iterChartNonTerm != chartNonTermSet.end(); ++iterChartNonTerm) { if (*iterChartNonTerm == NULL) { continue; } const ChartCellLabel &cellLabel = **iterChartNonTerm; bool doSearch = true; if (m_dictionary.m_maxSpanDefault != NOT_FOUND) { // for Hieu's source syntax bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm; size_t nonTermNumWordsCovered = endPos - startPos + 1; doSearch = isSourceSyntaxNonTerm ? nonTermNumWordsCovered <= m_dictionary.m_maxSpanLabelled : nonTermNumWordsCovered <= m_dictionary.m_maxSpanDefault; } if (doSearch) { OnDiskPt::Word *chartNonTermBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_outputFactorsVec, cellLabel.GetLabel()); if (chartNonTermBerkeleyDb == NULL) continue; const OnDiskPt::PhraseNode *node = sourceNode->GetChild(*chartNonTermBerkeleyDb, m_dbWrapper); delete chartNonTermBerkeleyDb; if (node == NULL) continue; // found matching entry //const Word &sourceWord = node->GetSourceWord(); DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, cellLabel, prevDottedRule); expandableDottedRuleList.Add(stackInd, dottedRule); m_sourcePhraseNode.push_back(node); } } // for (iterChartNonTerm delete sourceNode; } // for (iterLabelListf // return list of target phrases DottedRuleCollOnDisk &nodes = expandableDottedRuleList.Get(relEndPos + 1); // source LHS DottedRuleCollOnDisk::const_iterator iterDottedRuleColl; for (iterDottedRuleColl = nodes.begin(); iterDottedRuleColl != nodes.end(); ++iterDottedRuleColl) { // node of last source word const DottedRuleOnDisk &prevDottedRule = **iterDottedRuleColl; if (prevDottedRule.Done()) continue; prevDottedRule.Done(true); const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode(); //get node for each source LHS const NonTerminalSet &lhsSet = GetParser().GetInputPath(range.GetStartPos(), range.GetEndPos()).GetNonTerminalSet(); NonTerminalSet::const_iterator iterLabelSet; for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) { const Word &sourceLHS = *iterLabelSet; OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceLHS); if (sourceLHSBerkeleyDb == NULL) continue; const TargetPhraseCollection *targetPhraseCollection = NULL; const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); if (node) { uint64_t tpCollFilePos = node->GetValue(); std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache = m_cache.find(tpCollFilePos); if (iterCache == m_cache.end()) { const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper); std::vector<float> weightT = staticData.GetWeights(&m_dictionary); targetPhraseCollection = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec ,m_outputFactorsVec ,m_dictionary ,weightT ,m_dbWrapper.GetVocab() ,true); delete tpcollBerkeleyDb; m_cache[tpCollFilePos] = targetPhraseCollection; } else { // just get out of cache targetPhraseCollection = iterCache->second; } UTIL_THROW_IF2(targetPhraseCollection == NULL, "Error"); if (!targetPhraseCollection->IsEmpty()) { AddCompletedRule(prevDottedRule, *targetPhraseCollection, range, outColl); } } // if (node) delete node; delete sourceLHSBerkeleyDb; } } } // for (size_t ind = 0; ind < savedNodeColl.size(); ++ind) //cerr << numDerivations << " "; }
// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label. void ChartRuleLookupManagerMemoryPerSentence::UpdateCompressedMatrix(size_t startPos, size_t origEndPos, size_t lastPos) { std::vector<size_t> endPosVec; size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals(); m_compressedMatrixVec.resize(lastPos+1); // we only need to update cell at [startPos, origEndPos-1] for initial lookup if (startPos < origEndPos) { endPosVec.push_back(origEndPos-1); } // update all cells starting from startPos+1 for lookup of rule extensions else if (startPos == origEndPos) { startPos++; for (size_t endPos = startPos; endPos <= lastPos; endPos++) { endPosVec.push_back(endPos); } //re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span for (size_t pos = startPos+1; pos <= lastPos; pos++) { CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos]; cellMatrix.resize(numNonTerms); for (size_t i = 0; i < numNonTerms; i++) { if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) { cellMatrix[i].pop_back(); } } } } if (startPos > lastPos) { return; } // populate compressed matrix with all chart cells that start at current start position CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos]; cellMatrix.clear(); cellMatrix.resize(numNonTerms); for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) { size_t endPos = *p; // target non-terminal labels for the span const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos); if (targetNonTerms.GetSize() == 0) { continue; } #if !defined(UNLABELLED_SOURCE) // source non-terminal labels for the span const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos); // can this ever be true? Moses seems to pad the non-terminal set of the input with [X] if (inputPath.GetNonTerminalSet().size() == 0) { continue; } #endif for (size_t i = 0; i < numNonTerms; i++) { const ChartCellLabel *cellLabel = targetNonTerms.Find(i); if (cellLabel != NULL) { float score = cellLabel->GetBestScore(m_outColl); cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score)); } } } }
const uni_char *EmailNotification_WBXML_ContentHandler::OpaqueAttrHandlerL(char **buf) { return DecodeDateTime(GetParser(), buf); }
// Given a partial rule application ending at startPos-1 and given the sets of // source and target non-terminals covering the span [startPos, endPos], // determines the full or partial rule applications that can be produced through // extending the current rule application by a single non-terminal. void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication( const DottedRuleInMemory &prevDottedRule, size_t startPos, size_t endPos, size_t stackInd, DottedRuleColl & dottedRuleColl) { // source non-terminal labels for the remainder const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos); const NonTerminalSet &sourceNonTerms = inputPath.GetNonTerminalSet(); // target non-terminal labels for the remainder const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos); // note where it was found in the prefix tree of the rule dictionary const PhraseDictionaryNodeMemory &node = prevDottedRule.GetLastNode(); const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node.GetNonTerminalMap(); const size_t numChildren = nonTermMap.size(); if (numChildren == 0) { return; } const size_t numSourceNonTerms = sourceNonTerms.size(); const size_t numTargetNonTerms = targetNonTerms.GetSize(); const size_t numCombinations = numSourceNonTerms * numTargetNonTerms; // We can search by either: // 1. Enumerating all possible source-target NT pairs that are valid for // the span and then searching for matching children in the node, // or // 2. Iterating over all the NT children in the node, searching // for each source and target NT in the span's sets. // We'll do whichever minimises the number of lookups: if (numCombinations <= numChildren*2) { // loop over possible source non-terminal labels (as found in input tree) NonTerminalSet::const_iterator p = sourceNonTerms.begin(); NonTerminalSet::const_iterator sEnd = sourceNonTerms.end(); for (; p != sEnd; ++p) { const Word & sourceNonTerm = *p; // loop over possible target non-terminal labels (as found in chart) ChartCellLabelSet::const_iterator q = targetNonTerms.begin(); ChartCellLabelSet::const_iterator tEnd = targetNonTerms.end(); for (; q != tEnd; ++q) { const ChartCellLabel &cellLabel = q->second; // try to match both source and target non-terminal const PhraseDictionaryNodeMemory * child = node.GetChild(sourceNonTerm, cellLabel.GetLabel()); // nothing found? then we are done if (child == NULL) { continue; } // create new rule #ifdef USE_BOOST_POOL DottedRuleInMemory *rule = m_dottedRulePool.malloc(); new (rule) DottedRuleInMemory(*child, cellLabel, prevDottedRule); #else DottedRuleInMemory *rule = new DottedRuleInMemory(*child, cellLabel, prevDottedRule); #endif dottedRuleColl.Add(stackInd, rule); } } } else { // loop over possible expansions of the rule PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p; PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end(); for (p = nonTermMap.begin(); p != end; ++p) { // does it match possible source and target non-terminals? const PhraseDictionaryNodeMemory::NonTerminalMapKey &key = p->first; const Word &sourceNonTerm = key.first; if (sourceNonTerms.find(sourceNonTerm) == sourceNonTerms.end()) { continue; } const Word &targetNonTerm = key.second; const ChartCellLabel *cellLabel = targetNonTerms.Find(targetNonTerm); if (!cellLabel) { continue; } // create new rule const PhraseDictionaryNodeMemory &child = p->second; #ifdef USE_BOOST_POOL DottedRuleInMemory *rule = m_dottedRulePool.malloc(); new (rule) DottedRuleInMemory(child, *cellLabel, prevDottedRule); #else DottedRuleInMemory *rule = new DottedRuleInMemory(child, *cellLabel, prevDottedRule); #endif dottedRuleColl.Add(stackInd, rule); } } }