void TrecDocumentProcessorTestCase::testProcessGZipFile() { TrecDocumentProcessor processor; processor.init(m_pDocSchema.get(), m_pDocTemp.get()); DocumentSource docSource(m_pDocSchema.get()); RawDocumentPtr pRawDoc = new RawDocument(); pRawDoc->setPath(getTestPath() + "/1.gz"); docSource.setRawDocument(pRawDoc); Answer ans; makeAnswer(TEST_FILE2, ans); size_t i = 0; do { processor.process(docSource); DocumentPtr pDoc = docSource.stealLastDocument(); CPPUNIT_ASSERT(pDoc); Document::Iterator it = pDoc->iterator(); while (it.hasNext()) { const Field* pField = it.next(); CPPUNIT_ASSERT_EQUAL(ans[i].first, pField->getFieldSchema()->getName()); CPPUNIT_ASSERT_EQUAL(ans[i].second, std::string(pField->getValue().c_str())); ++i; } } while(docSource.toBeContinued()); CPPUNIT_ASSERT_EQUAL(ans.size(), i); }
void Document::HandleJsonReferences(void) { Document::Iterator i = this->Begin(); while (i != this->End()) { // Looking for an object with a string member named // $ref. This special combination signifies a JSON // reference. If this is found, we create a JSON reference // element to replace the object. Element& element = i.GetElement(); if ( (element.GetType() == ELEMENT_TYPE_OBJECT) && (element.HasElement("$ref")) && (element.GetElement("$ref").GetType() == ELEMENT_TYPE_STRING) ) { ElementReference* referenceElement = new ElementReference( element.GetElement("$ref").GetValueAsString(), this); if (element.parentElement != nullptr) { element.parentElement->ReplaceElement(element, *referenceElement); } } i++; } }
void TrecDocumentProcessorTestCase::testProcessFileMisField() { string sPath = writeTestFile("trec_file3.txt", TEST_FILE_MISS_FIELD); TrecDocumentProcessor processor; processor.init(m_pDocSchema.get(), m_pDocTemp.get()); DocumentSource docSource(m_pDocSchema.get()); RawDocumentPtr pRawDoc = new RawDocument(); pRawDoc->setPath(sPath); docSource.setRawDocument(pRawDoc); processor.process(docSource); DocumentPtr pDoc = docSource.stealLastDocument(); CPPUNIT_ASSERT(pDoc); Answer ans; makeAnswer(TEST_FILE_MISS_FIELD, ans); Document::Iterator it = pDoc->iterator(); CPPUNIT_ASSERT_EQUAL(ans.size(), it.size()); size_t i = 0; while (it.hasNext()) { const Field* pField = it.next(); // cout << ans[i].first << " : " << ans[i].second << endl; CPPUNIT_ASSERT_EQUAL(ans[i].first, pField->getFieldSchema()->getName()); CPPUNIT_ASSERT_EQUAL(ans[i].second, std::string(pField->getValue().c_str())); ++i; } }
void StandardDocumentProcessorTestCase::testProcessWithEmptyField() { String sPath = writeTestFile("file_with_empty_field.txt", TEST_FILE_WITH_EMPTY_FIELD); StandardDocumentProcessor processor; processor.init(m_pDocSchema.get()); DocumentSource docSource(m_pDocSchema.get()); RawDocumentPtr pRawDoc = new RawDocument(); pRawDoc->setPath(sPath); docSource.setRawDocument(pRawDoc); processor.process(docSource); DocumentPtr pDoc = docSource.stealLastDocument(); CPPUNIT_ASSERT(pDoc.isNotNull()); Answer ans; makeAnswer(TEST_FILE_WITH_EMPTY_FIELD, ans); Document::Iterator it = pDoc->iterator(); CPPUNIT_ASSERT_EQUAL(ans.size(), it.size()); size_t i = 0; while (it.hasNext()) { const Field* pField = it.next(); // cout << ans[i].first << " : " << ans[i].second << endl; CPPUNIT_ASSERT_EQUAL(ans[i].first, pField->getFieldSchema()->getName()); CPPUNIT_ASSERT_EQUAL(ans[i].second, std::string(pField->getValue().c_str())); ++i; } }
void StandardDocumentProcessorTestCase::testProcessMultiFile() { string sPath = writeTestFile("file2.txt", TEST_FILE2); StandardDocumentProcessor processor; processor.init(m_pDocSchema.get()); DocumentSource docSource(m_pDocSchema.get()); Answer ans; makeAnswer(TEST_FILE2, ans); RawDocumentPtr pRawDoc = new RawDocument(); pRawDoc->setPath(sPath); docSource.setRawDocument(pRawDoc); size_t i = 0; do { processor.process(docSource); DocumentPtr pDoc = docSource.stealLastDocument(); CPPUNIT_ASSERT(pDoc.isNotNull()); Document::Iterator it = pDoc->iterator(); while (it.hasNext()) { const Field* pField = it.next(); CPPUNIT_ASSERT_EQUAL(ans[i].first, pField->getFieldSchema()->getName()); CPPUNIT_ASSERT_EQUAL(ans[i].second, std::string(pField->getValue().c_str())); ++i; } } while(docSource.toBeContinued()); CPPUNIT_ASSERT_EQUAL(ans.size(), i); }
bool_t Document::ValidateAgainstSchema(bool_t raiseException) { bool_t returnValue = true; Document::Iterator i = this->Begin(); while ((i != this->End()) && (returnValue)) { returnValue = i.GetElement().ValidateAgainstSchema( raiseException); i++; } return returnValue; }
void TrecDocumentProcessorTestCase::testProcessTrecFileWithMeta() { m_pDocSchema = new DocumentSchema(); m_pDocSchema->addTextField("field1"); m_pDocSchema->addTextField("field2"); m_pDocSchema->addTextField("url"); m_pDocSchema->addTextField("date"); m_pDocSchema->addTextField("content_type"); m_pDocSchema->addTextField("content_length"); m_pDocSchema->addTextField("last_modified"); m_pDocSchema->addTextField("title"); m_pDocSchema->addTextField("body"); m_pDocTemp = new DocumentTemplate(); m_pDocTemp->setDocTag("DOC"); m_pDocTemp->setEmbeddedField("body"); m_pDocTemp->addMeta("url", "url"); m_pDocTemp->addMeta("Date", "date"); m_pDocTemp->addMeta("Content-Type", "content_type"); m_pDocTemp->addMeta("Content-Length", "content_length"); m_pDocTemp->addMeta("Last-Modified", "last_modified"); m_pDocTemp->addMeta("title", "title"); m_pDocTemp->addTag("field1", "field1"); m_pDocTemp->addTag("field2", "field2"); m_pDocTemp->addTag("DOCHDR"); m_pDocTemp->makeSure(m_pDocSchema.get()); string sPath = writeTestFile("trec_file3.txt", TEST_FILE3); TrecDocumentProcessor processor; processor.setContentType("html"); processor.init(m_pDocSchema.get(), m_pDocTemp.get()); DocumentSource docSource(m_pDocSchema.get()); RawDocumentPtr pRawDoc = new RawDocument(); pRawDoc->setPath(sPath); docSource.setRawDocument(pRawDoc); processor.process(docSource); DocumentPtr pDoc = docSource.stealLastDocument(); CPPUNIT_ASSERT(pDoc); Document::Iterator it = pDoc->iterator(); CPPUNIT_ASSERT(it.hasNext()); const Field* pField = it.next(); CPPUNIT_ASSERT_EQUAL(string("value1"), string(pField->getValue().c_str())); CPPUNIT_ASSERT(it.hasNext()); pField = it.next(); CPPUNIT_ASSERT_EQUAL(string("value2"), string(pField->getValue().c_str())); CPPUNIT_ASSERT(it.hasNext()); pField = it.next(); CPPUNIT_ASSERT_EQUAL(string("http://www.ram.org:80/ramblings/movies/jimmy_hollywood.html"), string(pField->getValue().c_str())); CPPUNIT_ASSERT(it.hasNext()); pField = it.next(); CPPUNIT_ASSERT_EQUAL(string("Wednesday, 01-Jan-97 15:20:23 GMT"), string(pField->getValue().c_str())); CPPUNIT_ASSERT(it.hasNext()); pField = it.next(); CPPUNIT_ASSERT_EQUAL(string("text/html"), string(pField->getValue().c_str())); CPPUNIT_ASSERT(it.hasNext()); pField = it.next(); CPPUNIT_ASSERT_EQUAL(string("1873"), string(pField->getValue().c_str())); CPPUNIT_ASSERT(it.hasNext()); pField = it.next(); CPPUNIT_ASSERT_EQUAL(string("Thursday, 23-Nov-95 03:11:57 GMT"), string(pField->getValue().c_str())); CPPUNIT_ASSERT(it.hasNext()); pField = it.next(); CPPUNIT_ASSERT_EQUAL(string(" Jimmy Hollywood movie review "), string(pField->getValue().c_str())); CPPUNIT_ASSERT(it.hasNext()); pField = it.next(); CPPUNIT_ASSERT_EQUAL(string(" Jimmy Hollywood "), string(pField->getValue().c_str())); }