// 2006/11/07 void handleDocument(DOMDocument* document, char* outputfile) { if (document == NULL) { cerr << _PREFIX_ << "WARNING null XML Document\n"; return; } DOMElement* docElement = NULL; docElement = document->getDocumentElement(); if (docElement == NULL) { cerr << _PREFIX_ << "WARNING null XML Document Element\n"; return; } DOMNodeList* sections = docElement->getElementsByTagName(XMLString::transcode("section")); int sCount = 0; int paragraphID = 1; for (unsigned int i = 0; i < sections->getLength(); i++) { int sectID = i+1; if (verbose) cerr << _PREFIX_ << "Section " << i; DOMElement* aSect = NULL; aSect = (DOMElement*)sections->item(i); string sectName = XMLString::transcode(((DOMElement*)aSect)->getAttribute(XMLString::transcode("name"))); string analyze = XMLString::transcode(((DOMElement*)aSect)->getAttribute(XMLString::transcode("analyze"))); if (verbose) cerr << " ['" << sectName << "'"; // Ignore non affected sections if ( ( analyze == "yes" ) || ( sectName.length() == 0 ) || ( affectedSections.find(sectName + "|") != string::npos )) { if (verbose) cerr << " analyze=yes "; // Deal with unnamed and affected sections // create a stream aSect->normalize(); stringstream text(XMLString::transcode(aSect->getTextContent())); if (verbose) { string temp(XMLString::transcode(aSect->getTextContent())); cerr << temp.length() << " charcters]\n" << _PREFIX_ << " "; } aSect->removeChild(aSect->getFirstChild()); vector<vector<string> > result; result = seg->segmentInVectors(text); for(vector<vector<string> >::iterator itp = result.begin(); itp != result.end(); itp++) { stringstream parID; parID << _ID_PREFIX_SECTION << sectID << _ID_PREFIX_PARAGRAPH << paragraphID++; sCount += addSegmentedParagraphToSection(*itp,aSect,parID.str().c_str()); } if (verbose) cerr << endl; } else { if (verbose) cerr << " analyze=no]" << endl; } } if (stamp) stampDocument(docElement); /* SERIALIZE XML DOCUMENT */ if (outputfile == NULL) xmlInterface->serializeTo(document); else xmlInterface->serializeTo(document,outputfile); if (verbose) cerr << _PREFIX_ << "Segmented in " << sCount << " sentences." << endl; }