static unsigned int processDocuments( const strus::PatternMatcherInstanceInterface* ptinst, const KeyTokenMap& keytokenmap, const std::vector<TreeNode*> treear, const std::vector<strus::utils::Document>& docs, std::map<std::string,double>& stats, const char* outputpath)
{
	unsigned int totalNofmatches = 0;
	std::vector<strus::utils::Document>::const_iterator di = docs.begin(), de = docs.end();
	std::size_t didx = 0;
	for (; di != de; ++di,++didx)
	{
#ifdef STRUS_LOWLEVEL_DEBUG
		std::cout << "document " << di->tostring() << std::endl;
#endif
		std::vector<strus::analyzer::PatternMatcherResult>
			results = eliminateDuplicates( sortResults( processDocument( ptinst, *di, stats)));

		if (outputpath)
		{
			std::ostringstream out;
			out << "number of matches " << results.size() << std::endl;
			strus::utils::printResults( out, std::vector<strus::SegmenterPosition>(), results);

			std::string outputfile( outputpath);
			outputfile.push_back( strus::dirSeparator());
			outputfile.append( "res.txt");

			strus::writeFile( outputfile, out.str());
		}
		std::vector<strus::analyzer::PatternMatcherResult>
			expectedResults = eliminateDuplicates( sortResults( processDocumentAlt( keytokenmap, treear, *di)));

		if (outputpath)
		{
			std::ostringstream out;
			out << "number of matches " << expectedResults.size() << std::endl;
			strus::utils::printResults( out, std::vector<strus::SegmenterPosition>(), expectedResults);

			std::string outputfile( outputpath);
			outputfile.push_back( strus::dirSeparator());
			outputfile.append( "exp.txt");

			strus::writeFile( outputfile, out.str());
		}

		if (!compareResults( results, expectedResults))
		{
			throw std::runtime_error(std::string( "results differ to expected for document ") + di->id);
		}
		totalNofmatches += results.size();
		if (g_errorBuffer->hasError())
		{
			throw std::runtime_error("error matching rule");
		}
	}
	return totalNofmatches;
}
vector<Result*> QueryProcessor::searchIndex(string search_string, IndexHandler*& ih)
{
  parseQuery(search_string);
  //check different types of arguments
  vector<Page*>results;
  if (currentQ->getandArgs().size() > 0)
  {
    for (auto e: currentQ->getandArgs())
    {
      if (results.size() > 0)
      {
        set<Page*>test(results.begin(), results.end());
        results.clear();
        set<Page*> andargs = ih->searchIndex(e);
        set_intersection(test.begin(), test.end(), andargs.begin(), andargs.end(), back_inserter(results));
      }
      else
      {
        set<Page*> andargs = ih->searchIndex(e);
        copy(andargs.begin(), andargs.end(), back_inserter(results));
      }
    }
  }
  else if (currentQ->getorArgs().size() > 0)
  {
    set<Page*>orResultSet;
    for (auto e: currentQ->getorArgs())
    {
      set<Page*> a = ih->searchIndex(e);
      orResultSet.insert(a.begin(), a.end());
    }
    copy(orResultSet.begin(), orResultSet.end(), back_inserter(results));
  }
  else if (currentQ->getnormArgs().size() > 0)
  {
    set<Page*> a = ih->searchIndex(currentQ->getnormArgs()[0]);
    copy(a.begin(), a.end(), back_inserter(results));
  }
  if (currentQ->getnotArgs().size() > 0)
  {
    for (auto e: currentQ->getnotArgs())
    {
      set<Page*>test(results.begin(), results.end());
      results.clear();
      set<Page*> notargs = ih->searchIndex(e);
      set_difference(test.begin(), test.end(), notargs.begin(), notargs.end(), back_inserter(results));
    }
  }
  for (auto e: results)
    cout << e->getTitle() << endl;
  vector<Result*> resultsvector = sortResults(results);
  return resultsvector;
}
Exemple #3
0
static void sortResults(SWResult* list[], int start, int end) {

    MatcherScore key;
    int frontIdx;
    int backIdx;
    int pivot;

    if (start < end) {

        pivot = (start + end) / 2;
        swapResults(&list[start], &list[pivot]);
        key = swResultGetScore(list[start]);

        frontIdx = start + 1;
        backIdx = end;

        while (frontIdx <= backIdx) {

            while ((frontIdx <= end) && (swResultGetScore(list[frontIdx]) <= key)) {
                frontIdx++;
            }

            while ((backIdx >= start) && (swResultGetScore(list[backIdx]) > key)) {
                backIdx--;
            }

            if (frontIdx < backIdx) {
                swapResults(&list[frontIdx], &list[backIdx]);
            }
        }

        swapResults(&list[start], &list[backIdx]);

        sortResults(list, start, backIdx - 1);
        sortResults(list, backIdx + 1, end);
    }
}
Exemple #4
0
extern void swDataPrint(SWData* swData) {

    if (TEST_MODE) return;

    double rows = swData->rows;
    double columns = swData->columns;
    double gcups = columns * rows / swData->time * 10e-10;
    printf(
        "Size: %d * %d | Time: %lf | GCuPS: %lf\n", 
        swData->rows, swData->columns, swData->time, gcups
    );
    
    if (!swData->shotgun) {
        sortResults(swData->results, 0, swData->resultNmr - 1);
    }

    int resultIdx;
    for (resultIdx = swData->resultNmr - 1; resultIdx >= 0; --resultIdx) {
        printf("\n");
        swResultPrint(swData->results[resultIdx]);
    }
}
int main(int argc, char * argv[])
{
  copyArgV = argv;
  int i = 0;
  int c = 0;
  int fd = 0;
  int smallCount = 100000;
  int shortFile = 1;

  if(argc < 2)
  {
    fprintf(stderr, "Invlaid number of Arguments. Exiting Program");
    return 1;
  }
  if (pthread_mutex_init(&mu, NULL) != 0)
  {
    fprintf(stderr, "can't init mutex");
  }

  if (pthread_cond_init(&cv, NULL) != 0)
  {
    fprintf(stderr, "can't init condition variable");
  }
  //finds the smallest file, saves it's index
  for(c = 1; c < argc; c++)
  {
    fd = open(argv[c], O_RDONLY);
    if(fd < 0)
    {
      fprintf(stderr, "Could not open file! Invalid file name. Exiting program\n");
      exit(-1);
    }
    else
    {
      int bufferSize = (int)lseek(fd, 0, SEEK_END);
      if(bufferSize == -1)
      {
        fprintf(stderr, "lseek failed. exiting program\n");
        exit(-1);
      }
      fileSize[c] = bufferSize;
      if(bufferSize < smallCount && bufferSize != 0)
      {
        shortFile = c;
        smallCount = bufferSize;
      }
     } 
  }
  //switch shortest file with first file in argument array
  char * temp = argv[shortFile];
  argv[shortFile] = argv[1];
  argv[1] = temp;
                                              
  int numFile = 1;                                                  
  for(numFile = 1; numFile < argc; numFile++)                       
  {             
    int fd = open(argv[numFile], O_RDONLY); //check if file could be opened
    if(fd < 0)
    {
      fprintf(stderr, "Could not open file. Invalid File name. Exiting program\n");
      exit(-1);
    }
    else
    {
      int bufferSize = (int)lseek((int)fd, 0, SEEK_END);
      if(bufferSize == -1)
      {
      fprintf(stderr, "lseek failed. exiting program\n");
        exit(-1);
      }
      if(lseek(fd, 0 , SEEK_SET) == -1)
      {
      fprintf(stderr, "lseek failed. exiting program\n");
        exit(-1);
      }
      if(bufferSize == 0)
      {
        fprintf(stderr, "Empty file, skipping file. The rest of the files will still be processed.\n");
      }
      else
      {
        //FOR LOAD IMBALANCE. Process shorter files serially, larger threads with multiple threads per file
        if(bufferSize >= 500000)
        {
          //multi threaded
          threadCount = 0; 
          pthread_t pt[MAX_THREADS];
          for(i = 0; i < MAX_THREADS; i++)
          {
            threadInfo  * myThread = malloc(sizeof(threadInfo));
            if(myThread == NULL)
            { 
              fprintf(stderr, "malloc failed, exiting program\n");
              exit(-1);
            }
            myThread -> threadID = i;
            myThread -> filePosition = numFile;

            pthread_create(&pt[i], NULL, work, (void*)myThread);
          }
          int w = 0;
          for(w = 0; w < MAX_THREADS; w++)
          {
            if(pthread_join(pt[w], NULL) != 0)
            {
              fprintf(stderr, "Could not join threads!\n");
            }
          }
         checkThreadRef(numFile);
        }
        else
        {
          //serial
          char buffer[bufferSize];
          read(fd, buffer, bufferSize);                            
          createHash(0, bufferSize, buffer, numFile);     
        }
      }
    }
  }                                                                 
  checkRef(argc);
  createResults();
  int numResults = 0;
  i = 0;
  while(results[i].count != 0)
  {
    i++;
  }
  numResults = i;
  sortResults(numResults);
  printResults();
  return 0;
}