C++ (Cpp) cleanIndex示例

示例#1

0

显示文件

文件： makeIndex.c 项目： aedalton16/tinysearchengine

int main(int argc, char** argv)
{
  printf("hi");
  if (!indexerValidateArgs(argc, argv))
    {
      DICTIONARY* index = buildIndexFromDirectory(argv[1]);
      printf("directory: %s \n", argv[1]);
      printf("filename: %s \n", argv[2]);
      saveIndexToFile(index, argv[2]);
      cleanIndex(index);

      if (argc == 5)
	{
	  //    filename= argv[2];
	  //newFile = argv[argc -1]; 
	  //sprintf(path, "%s/%s", targetDir, filename);
	  index = rebuild(argv[3]);
	  saveIndexToFile(index, argv[4]);
	  cleanIndex(index);
    
	}
      return 0;
    }
  return 1; 
}

示例#2

0

显示文件

文件： PoldiResidualCorrelationCore.cpp 项目： DanNixon/mantid

/// Distributes correlation counts over all points that are possible for a given
/// d-value.
void PoldiResidualCorrelationCore::distributeCorrelationCounts(
    const std::vector<double> &correctedCorrelatedIntensities,
    const std::vector<double> &dValues) const {
  const std::vector<double> chopperSlits(m_chopper->slitTimes());

  PARALLEL_FOR_NO_WSP_CHECK()
  for (int k = 0; k < static_cast<int>(m_indices.size()); ++k) {
    for (size_t i = 0; i < dValues.size(); ++i) {
      double d = dValues[dValues.size() - i - 1];
      double dInt = correctedCorrelatedIntensities[i];
      double deltaForD =
          -dInt / m_weightsForD[i] / static_cast<double>(chopperSlits.size());

      for (double chopperSlit : chopperSlits) {
        CountLocator locator = getCountLocator(d, chopperSlit, m_indices[k]);

        int indexDifference = locator.icmax - locator.icmin;

        switch (indexDifference) {
        case 0:
          addToCountData(locator.detectorElement, locator.iicmin,
                         deltaForD * locator.arrivalWindowWidth);
          break;
        case 2: {
          int middleIndex = cleanIndex((locator.icmin + 1), m_timeBinCount);

          if (middleIndex < 0) {
            m_logger.warning()
                << "Inconsistency foun while calculating distribute "
                   "correlation counts for d-value with index "
                << std::to_string(k)
                << ", got middle index: " << std::to_string(middleIndex)
                << ", ignoring it.\n";
            break;
          }
          addToCountData(locator.detectorElement, middleIndex, deltaForD);
        }
        case 1: {
          addToCountData(locator.detectorElement, locator.iicmin,
                         deltaForD * (static_cast<double>(locator.icmin) -
                                      locator.cmin + 1.0));
          addToCountData(
              locator.detectorElement, locator.iicmax,
              deltaForD * (locator.cmax - static_cast<double>(locator.icmax)));
          break;
        }
        default:
          break;
        }
      }
    }
  }
}

示例#3

0

显示文件

文件： indexer.c 项目： somebodyschelsea/search-engine

int main(int argc, char *argv[])
{
// INPUT VARIABLES
	char* program;
	char* target_dir;
	char* output_file_name;
	char* input_file_name;
	char* rewritten_file_name;	

// determines which mode the program is running in
	int indexer_test_flag;

// overall data structure
	INVERTED_INDEX* index;

// these variables handle the scandir results and pulling information from files
	int numfiles = 0;
	struct dirent **files;	
	char* file_name;
	char* file_contents;

// this variable is used for parsing the HTML
	int file_pos;

// variables used for WordNode (word == key) and DocumentNode (doc_id)
	char* word;
	int doc_id;

	indexer_test_flag = 0; // default is basic funcitonality

	program = argv[0];

// if incorrect number of arguments
	if(argc != 3 && argc != 5)
	{
		fprintf(stderr, "%s: The indexer requires either 2 (a target directory and output file name) or 4 (a target directory, output file name, input file name, and a rewritten file name\n", program);

		return 1;
	}

	target_dir = argv[1];
	output_file_name = argv[2];

// if 5 arguments --> TESTING MODE
	if(argc == 5)
	{
		indexer_test_flag = 1;
		input_file_name = argv[3];
		rewritten_file_name = argv[4];
	}

// if the target directory doesn't exist
	if(!directoryExists(target_dir))
	{
		fprintf(stderr, "%s: Invalid target directory %s\n", program, target_dir);
		return 1;
	}
	
	numfiles = getFileList(target_dir, &files);

	chdir(target_dir);

// if there are no files in the target directory
	if(numfiles <= 0)
	{
		fprintf(stderr, "%s: Error with target directory %s'n", program, target_dir);
		return 1;
	}

	index = initializeDict();

// this for loop goes through each file in "files", pulls each word out of the HTML, and updates the index data structure
	for(int i=0; i < numfiles; i++)
	{
		file_name = files[i]->d_name;

// if it's a regular file (to avoid . and .. files)
		if(regularFile(file_name))
		{
			file_contents = NULL;	
			file_contents = readFile(file_name);
			file_pos = 0;
			doc_id = atoi(file_name);

// just in case a 404 wasn't caught by the crawler
			if(file_contents != NULL)
			{
				word = NULL;
				word = malloc(500*sizeof(char));
				MALLOC_CHECK(word);
				BZERO(word, 500*sizeof(char));

// GetNextWord returns the index in file_contents where it stopped parsing, while assigning a new word to the "word"
				while((file_pos = parseHTML(file_contents, word, file_pos)) != -1)
				{
					updateIndex(word, doc_id, index);

					free(word);
					//word = NULL;
					word = malloc(500*sizeof(char));
					MALLOC_CHECK(word);
					BZERO(word, 500*sizeof(char));
				}

				free(word);
			}

			free(file_contents);
		}

		free(files[i]);
	}

	free(files);

// outputs to a file
	saveFile(index, output_file_name);
	cleanIndex(index);

// if it's in testing mode
	if(indexer_test_flag)
	{
		INVERTED_INDEX* newindex;
		newindex = readIndex(input_file_name);
		saveFile(newindex, rewritten_file_name);
		cleanIndex(newindex);
	}
}