Ejemplo n.º 1
0
// 2006/11/07
void handleDocument(DOMDocument* document, char* outputfile)
{
  if (document == NULL) {
    cerr << _PREFIX_ << "WARNING null XML Document\n";
    return;
  }
  DOMElement* docElement = NULL;
  docElement = document->getDocumentElement();

  if (docElement == NULL) {
    cerr << _PREFIX_ << "WARNING null XML Document Element\n";
    return;
  }


  DOMNodeList* sections = docElement->getElementsByTagName(XMLString::transcode("section"));
  int sCount = 0;
  int paragraphID = 1;

  for (unsigned int i = 0; i < sections->getLength(); i++) {
    int sectID = i+1;
    if (verbose) cerr << _PREFIX_ << "Section " << i;
    DOMElement* aSect = NULL;
    aSect = (DOMElement*)sections->item(i);
    string sectName = XMLString::transcode(((DOMElement*)aSect)->getAttribute(XMLString::transcode("name")));
    string analyze =
      XMLString::transcode(((DOMElement*)aSect)->getAttribute(XMLString::transcode("analyze")));
    if (verbose) cerr << " ['" << sectName << "'";
    // Ignore non affected sections 
    if ( ( analyze == "yes" ) ||
         ( sectName.length() == 0 ) ||
         ( affectedSections.find(sectName + "|") != string::npos )) {      
      
      if (verbose) cerr << " analyze=yes ";
      // Deal with unnamed and affected sections

      // create a stream
      aSect->normalize();
      stringstream text(XMLString::transcode(aSect->getTextContent()));

      if (verbose) {
        string temp(XMLString::transcode(aSect->getTextContent()));
        cerr << temp.length() << " charcters]\n" << _PREFIX_ << "           ";
      }

      aSect->removeChild(aSect->getFirstChild());

      vector<vector<string> > result;
      result = seg->segmentInVectors(text);

      for(vector<vector<string> >::iterator itp = result.begin(); 
          itp != result.end(); 
          itp++) {
        stringstream parID;
        parID << _ID_PREFIX_SECTION << sectID << _ID_PREFIX_PARAGRAPH << paragraphID++;
        sCount += addSegmentedParagraphToSection(*itp,aSect,parID.str().c_str());
      }
   
      if (verbose) cerr << endl;
    } else {
      if (verbose) cerr << " analyze=no]" << endl;
    }
  }

  if (stamp) stampDocument(docElement);

  /* SERIALIZE XML DOCUMENT */
  if (outputfile == NULL) xmlInterface->serializeTo(document);
  else xmlInterface->serializeTo(document,outputfile);

  if (verbose) cerr << _PREFIX_ << "Segmented in " << sCount << " sentences." << endl;
}