int do_import(std::string file, int sorted, uint_t limit, int &rnadded, int &rnlines) { #if defined USE_CXX_IO std::ifstream fin(file.c_str()); #else FILE *fin = fopen(file.c_str(), "r"); #endif int fd = open(file.c_str(), O_RDONLY); // Potential race condition + not checking for return value if_length = file_size(file.c_str()); DCERR("handle_import::file:"<<file<<endl); if (!fin || !fd) { return -IMPORT_FILE_NOT_FOUND; } else { building = true; int nlines = 0; int foffset = 0; if (if_mmap_addr) { munmap(if_mmap_addr, if_length); } // mmap() the input file in if_mmap_addr = (char*)mmap(NULL, if_length, PROT_READ, MAP_SHARED, fd, 0); if (!if_mmap_addr) { fclose(fin); close(fd); return -IMPORT_FILE_NOT_FOUND; } pm.repr.clear(); char buff[INPUT_LINE_SIZE]; while ( #if defined USE_CXX_IO fin #else !feof(fin) #endif && limit--) { buff[0] = '\0'; #if defined USE_CXX_IO fin.getline(buff, INPUT_LINE_SIZE); const int llen = fin.gcount(); buff[INPUT_LINE_SIZE - 1] = '\0'; #else char *got = fgets(buff, INPUT_LINE_SIZE, fin); if (!got) { break; } const int llen = strlen(buff); if (llen && buff[llen-1] == '\n') { buff[llen-1] = '\0'; } #endif ++nlines; int weight = 0; std::string phrase; StringProxy snippet; InputLineParser(if_mmap_addr, foffset, buff, &weight, &phrase, &snippet).start_parsing(); foffset += llen; if (!phrase.empty()) { str_lowercase(phrase); DCERR("Adding: "<<weight<<", "<<phrase<<", "<<std::string(snippet)<<endl); pm.insert(weight, phrase, snippet); } } fclose(fin); pm.finalize(sorted); vui_t weights; for (size_t i = 0; i < pm.repr.size(); ++i) { weights.push_back(pm.repr[i].weight); } st.initialize(weights); rnadded = weights.size(); rnlines = nlines; building = false; } return 0; }