int main(int argc, char** argv) { printf("hi"); if (!indexerValidateArgs(argc, argv)) { DICTIONARY* index = buildIndexFromDirectory(argv[1]); printf("directory: %s \n", argv[1]); printf("filename: %s \n", argv[2]); saveIndexToFile(index, argv[2]); cleanIndex(index); if (argc == 5) { // filename= argv[2]; //newFile = argv[argc -1]; //sprintf(path, "%s/%s", targetDir, filename); index = rebuild(argv[3]); saveIndexToFile(index, argv[4]); cleanIndex(index); } return 0; } return 1; }
/// Distributes correlation counts over all points that are possible for a given /// d-value. void PoldiResidualCorrelationCore::distributeCorrelationCounts( const std::vector<double> &correctedCorrelatedIntensities, const std::vector<double> &dValues) const { const std::vector<double> chopperSlits(m_chopper->slitTimes()); PARALLEL_FOR_NO_WSP_CHECK() for (int k = 0; k < static_cast<int>(m_indices.size()); ++k) { for (size_t i = 0; i < dValues.size(); ++i) { double d = dValues[dValues.size() - i - 1]; double dInt = correctedCorrelatedIntensities[i]; double deltaForD = -dInt / m_weightsForD[i] / static_cast<double>(chopperSlits.size()); for (double chopperSlit : chopperSlits) { CountLocator locator = getCountLocator(d, chopperSlit, m_indices[k]); int indexDifference = locator.icmax - locator.icmin; switch (indexDifference) { case 0: addToCountData(locator.detectorElement, locator.iicmin, deltaForD * locator.arrivalWindowWidth); break; case 2: { int middleIndex = cleanIndex((locator.icmin + 1), m_timeBinCount); if (middleIndex < 0) { m_logger.warning() << "Inconsistency foun while calculating distribute " "correlation counts for d-value with index " << std::to_string(k) << ", got middle index: " << std::to_string(middleIndex) << ", ignoring it.\n"; break; } addToCountData(locator.detectorElement, middleIndex, deltaForD); } case 1: { addToCountData(locator.detectorElement, locator.iicmin, deltaForD * (static_cast<double>(locator.icmin) - locator.cmin + 1.0)); addToCountData( locator.detectorElement, locator.iicmax, deltaForD * (locator.cmax - static_cast<double>(locator.icmax))); break; } default: break; } } } } }
int main(int argc, char *argv[]) { // INPUT VARIABLES char* program; char* target_dir; char* output_file_name; char* input_file_name; char* rewritten_file_name; // determines which mode the program is running in int indexer_test_flag; // overall data structure INVERTED_INDEX* index; // these variables handle the scandir results and pulling information from files int numfiles = 0; struct dirent **files; char* file_name; char* file_contents; // this variable is used for parsing the HTML int file_pos; // variables used for WordNode (word == key) and DocumentNode (doc_id) char* word; int doc_id; indexer_test_flag = 0; // default is basic funcitonality program = argv[0]; // if incorrect number of arguments if(argc != 3 && argc != 5) { fprintf(stderr, "%s: The indexer requires either 2 (a target directory and output file name) or 4 (a target directory, output file name, input file name, and a rewritten file name\n", program); return 1; } target_dir = argv[1]; output_file_name = argv[2]; // if 5 arguments --> TESTING MODE if(argc == 5) { indexer_test_flag = 1; input_file_name = argv[3]; rewritten_file_name = argv[4]; } // if the target directory doesn't exist if(!directoryExists(target_dir)) { fprintf(stderr, "%s: Invalid target directory %s\n", program, target_dir); return 1; } numfiles = getFileList(target_dir, &files); chdir(target_dir); // if there are no files in the target directory if(numfiles <= 0) { fprintf(stderr, "%s: Error with target directory %s'n", program, target_dir); return 1; } index = initializeDict(); // this for loop goes through each file in "files", pulls each word out of the HTML, and updates the index data structure for(int i=0; i < numfiles; i++) { file_name = files[i]->d_name; // if it's a regular file (to avoid . and .. files) if(regularFile(file_name)) { file_contents = NULL; file_contents = readFile(file_name); file_pos = 0; doc_id = atoi(file_name); // just in case a 404 wasn't caught by the crawler if(file_contents != NULL) { word = NULL; word = malloc(500*sizeof(char)); MALLOC_CHECK(word); BZERO(word, 500*sizeof(char)); // GetNextWord returns the index in file_contents where it stopped parsing, while assigning a new word to the "word" while((file_pos = parseHTML(file_contents, word, file_pos)) != -1) { updateIndex(word, doc_id, index); free(word); //word = NULL; word = malloc(500*sizeof(char)); MALLOC_CHECK(word); BZERO(word, 500*sizeof(char)); } free(word); } free(file_contents); } free(files[i]); } free(files); // outputs to a file saveFile(index, output_file_name); cleanIndex(index); // if it's in testing mode if(indexer_test_flag) { INVERTED_INDEX* newindex; newindex = readIndex(input_file_name); saveFile(newindex, rewritten_file_name); cleanIndex(newindex); } }