static unsigned int processDocuments( const strus::PatternMatcherInstanceInterface* ptinst, const KeyTokenMap& keytokenmap, const std::vector<TreeNode*> treear, const std::vector<strus::utils::Document>& docs, std::map<std::string,double>& stats, const char* outputpath) { unsigned int totalNofmatches = 0; std::vector<strus::utils::Document>::const_iterator di = docs.begin(), de = docs.end(); std::size_t didx = 0; for (; di != de; ++di,++didx) { #ifdef STRUS_LOWLEVEL_DEBUG std::cout << "document " << di->tostring() << std::endl; #endif std::vector<strus::analyzer::PatternMatcherResult> results = eliminateDuplicates( sortResults( processDocument( ptinst, *di, stats))); if (outputpath) { std::ostringstream out; out << "number of matches " << results.size() << std::endl; strus::utils::printResults( out, std::vector<strus::SegmenterPosition>(), results); std::string outputfile( outputpath); outputfile.push_back( strus::dirSeparator()); outputfile.append( "res.txt"); strus::writeFile( outputfile, out.str()); } std::vector<strus::analyzer::PatternMatcherResult> expectedResults = eliminateDuplicates( sortResults( processDocumentAlt( keytokenmap, treear, *di))); if (outputpath) { std::ostringstream out; out << "number of matches " << expectedResults.size() << std::endl; strus::utils::printResults( out, std::vector<strus::SegmenterPosition>(), expectedResults); std::string outputfile( outputpath); outputfile.push_back( strus::dirSeparator()); outputfile.append( "exp.txt"); strus::writeFile( outputfile, out.str()); } if (!compareResults( results, expectedResults)) { throw std::runtime_error(std::string( "results differ to expected for document ") + di->id); } totalNofmatches += results.size(); if (g_errorBuffer->hasError()) { throw std::runtime_error("error matching rule"); } } return totalNofmatches; }
vector<Result*> QueryProcessor::searchIndex(string search_string, IndexHandler*& ih) { parseQuery(search_string); //check different types of arguments vector<Page*>results; if (currentQ->getandArgs().size() > 0) { for (auto e: currentQ->getandArgs()) { if (results.size() > 0) { set<Page*>test(results.begin(), results.end()); results.clear(); set<Page*> andargs = ih->searchIndex(e); set_intersection(test.begin(), test.end(), andargs.begin(), andargs.end(), back_inserter(results)); } else { set<Page*> andargs = ih->searchIndex(e); copy(andargs.begin(), andargs.end(), back_inserter(results)); } } } else if (currentQ->getorArgs().size() > 0) { set<Page*>orResultSet; for (auto e: currentQ->getorArgs()) { set<Page*> a = ih->searchIndex(e); orResultSet.insert(a.begin(), a.end()); } copy(orResultSet.begin(), orResultSet.end(), back_inserter(results)); } else if (currentQ->getnormArgs().size() > 0) { set<Page*> a = ih->searchIndex(currentQ->getnormArgs()[0]); copy(a.begin(), a.end(), back_inserter(results)); } if (currentQ->getnotArgs().size() > 0) { for (auto e: currentQ->getnotArgs()) { set<Page*>test(results.begin(), results.end()); results.clear(); set<Page*> notargs = ih->searchIndex(e); set_difference(test.begin(), test.end(), notargs.begin(), notargs.end(), back_inserter(results)); } } for (auto e: results) cout << e->getTitle() << endl; vector<Result*> resultsvector = sortResults(results); return resultsvector; }
static void sortResults(SWResult* list[], int start, int end) { MatcherScore key; int frontIdx; int backIdx; int pivot; if (start < end) { pivot = (start + end) / 2; swapResults(&list[start], &list[pivot]); key = swResultGetScore(list[start]); frontIdx = start + 1; backIdx = end; while (frontIdx <= backIdx) { while ((frontIdx <= end) && (swResultGetScore(list[frontIdx]) <= key)) { frontIdx++; } while ((backIdx >= start) && (swResultGetScore(list[backIdx]) > key)) { backIdx--; } if (frontIdx < backIdx) { swapResults(&list[frontIdx], &list[backIdx]); } } swapResults(&list[start], &list[backIdx]); sortResults(list, start, backIdx - 1); sortResults(list, backIdx + 1, end); } }
extern void swDataPrint(SWData* swData) { if (TEST_MODE) return; double rows = swData->rows; double columns = swData->columns; double gcups = columns * rows / swData->time * 10e-10; printf( "Size: %d * %d | Time: %lf | GCuPS: %lf\n", swData->rows, swData->columns, swData->time, gcups ); if (!swData->shotgun) { sortResults(swData->results, 0, swData->resultNmr - 1); } int resultIdx; for (resultIdx = swData->resultNmr - 1; resultIdx >= 0; --resultIdx) { printf("\n"); swResultPrint(swData->results[resultIdx]); } }
int main(int argc, char * argv[]) { copyArgV = argv; int i = 0; int c = 0; int fd = 0; int smallCount = 100000; int shortFile = 1; if(argc < 2) { fprintf(stderr, "Invlaid number of Arguments. Exiting Program"); return 1; } if (pthread_mutex_init(&mu, NULL) != 0) { fprintf(stderr, "can't init mutex"); } if (pthread_cond_init(&cv, NULL) != 0) { fprintf(stderr, "can't init condition variable"); } //finds the smallest file, saves it's index for(c = 1; c < argc; c++) { fd = open(argv[c], O_RDONLY); if(fd < 0) { fprintf(stderr, "Could not open file! Invalid file name. Exiting program\n"); exit(-1); } else { int bufferSize = (int)lseek(fd, 0, SEEK_END); if(bufferSize == -1) { fprintf(stderr, "lseek failed. exiting program\n"); exit(-1); } fileSize[c] = bufferSize; if(bufferSize < smallCount && bufferSize != 0) { shortFile = c; smallCount = bufferSize; } } } //switch shortest file with first file in argument array char * temp = argv[shortFile]; argv[shortFile] = argv[1]; argv[1] = temp; int numFile = 1; for(numFile = 1; numFile < argc; numFile++) { int fd = open(argv[numFile], O_RDONLY); //check if file could be opened if(fd < 0) { fprintf(stderr, "Could not open file. Invalid File name. Exiting program\n"); exit(-1); } else { int bufferSize = (int)lseek((int)fd, 0, SEEK_END); if(bufferSize == -1) { fprintf(stderr, "lseek failed. exiting program\n"); exit(-1); } if(lseek(fd, 0 , SEEK_SET) == -1) { fprintf(stderr, "lseek failed. exiting program\n"); exit(-1); } if(bufferSize == 0) { fprintf(stderr, "Empty file, skipping file. The rest of the files will still be processed.\n"); } else { //FOR LOAD IMBALANCE. Process shorter files serially, larger threads with multiple threads per file if(bufferSize >= 500000) { //multi threaded threadCount = 0; pthread_t pt[MAX_THREADS]; for(i = 0; i < MAX_THREADS; i++) { threadInfo * myThread = malloc(sizeof(threadInfo)); if(myThread == NULL) { fprintf(stderr, "malloc failed, exiting program\n"); exit(-1); } myThread -> threadID = i; myThread -> filePosition = numFile; pthread_create(&pt[i], NULL, work, (void*)myThread); } int w = 0; for(w = 0; w < MAX_THREADS; w++) { if(pthread_join(pt[w], NULL) != 0) { fprintf(stderr, "Could not join threads!\n"); } } checkThreadRef(numFile); } else { //serial char buffer[bufferSize]; read(fd, buffer, bufferSize); createHash(0, bufferSize, buffer, numFile); } } } } checkRef(argc); createResults(); int numResults = 0; i = 0; while(results[i].count != 0) { i++; } numResults = i; sortResults(numResults); printResults(); return 0; }