bool FileWorker::loadPacket(std::vector<std::pair<Token,Tokens>>& formats, const Token& fileName) { xml_document doc; xml_parse_result result = doc.load_file(fileName.c_str()); if (result) { Tokens packet; for (xml_node pkt = doc.child("packet"); pkt; pkt = pkt.next_sibling()) { for (xml_attribute_iterator attribute = ++pkt.attributes_begin(); attribute!=pkt.attributes_end(); ++attribute) packet.push_back(attribute->value()); formats.push_back(make_pair(pkt.attributes_begin()->value(),packet)); packet.clear(); } return true; } else if (result.status == pugi::status_file_not_found) { setGlobalError("Packets file: file not found"); DbgMsg(__FILE__, __LINE__, "Device::loadPacket() load_file() ERROR: file not found\n"); return false; } else { setGlobalError("Packets file: XML parsed with errors"); DbgMsg(__FILE__, __LINE__, "Device::loadPacket() load_file() ERROR: file parsed with errors:\n"); DbgMsg(__FILE__, __LINE__, "Description: %s\n", result.description()); DbgMsg(__FILE__, __LINE__, "Error offset: %s\n", result.offset); return false; } }
int main(int argc, char const *argv[]) { // If there are not enough args, return -1 if (argc < 5) { std::cerr << "Usage: P7 <corpus> <sentence> <dictionary> <n> <threshold> <delta> <model>" << '\n'; return -1; } // Otherwise, collect the function parameters string corpusFileName = argv[1]; string sentenceFileName = argv[2]; string dictionaryFileName = argv[3]; unsigned int n = stoi(argv[4]); unsigned int threshold = stoi(argv[5]); double delta = stod(argv[6]); bool model = stoi(argv[7]); // Capture all tokens Tokens corpusTokens; Tokens sentenceTokens; Tokens dictionaryTokens; read_tokens(corpusFileName, corpusTokens, false); read_tokens(sentenceFileName, sentenceTokens, true); read_tokens(dictionaryFileName, dictionaryTokens, false); if (corpusTokens.size() < n) { std::cerr << "\nInput file '" << corpusFileName << "' is too small to create any nGrams of size " << n; return -1; } if (sentenceTokens.size() < n) { std::cerr << "\nInput file '" << sentenceFileName << "' is too small to create any nGrams of size " << n; return -1; } unordered_map <string, int> vocabulary; unordered_map <string, int> dictionary; vector<Corpus> corpus = getCorpusList(corpusTokens, n); for (auto &word : corpusTokens) { if (vocabulary.count(word) == 0) vocabulary[word] = 1; } for (auto &word : dictionaryTokens) { if (dictionary.count(word) == 0) dictionary[word] = 1; } vector<double> probs; int V = vocabulary.size() + 1; double N = corpusTokens.size(); // Collect sentences vector<Tokens> sentences; Tokens sentence; for (auto &word : sentenceTokens) { if (word == EOS) { sentences.push_back(sentence); sentence.clear(); } else { sentence.push_back(word); } } // Proof sentences for (auto &sentence : sentences) { std::cout << "Sentence:\t"; for (auto &word : sentence) std::cout << word << ' '; std::cout << '\n'; // Check against all words within reasonable distance vector<Tokens> candidateWords; for (auto &word : sentence) { Tokens candidates; for (auto &candidate : dictionary) if (uiLevenshteinDistance(word, candidate.first) <= 1) candidates.push_back(candidate.first); candidateWords.push_back(candidates); } // Check that the produced sentences from the candidate words makes semantic sense vector<Tokens> candidateSentences; // for (auto &words : candidateWords) { // for (auto &word : words) { // Tokens temp = sentence; // temp // candidateSentences.push_back(temp) // } // } for (int i = 0; i < candidateWords.size(); i++) { for (auto &word : candidateWords[i]) { Tokens temp = sentence; temp[i] = word; candidateSentences.push_back(temp); } } double bestProb = -DBL_MAX; Tokens bestSentence; for (auto &sentence : candidateSentences) { double prob = getProb(corpus, sentence, n, delta, N, V, threshold, model); if (prob > bestProb) { bestProb = prob; bestSentence = sentence; } } std::cout << "Suggestion:\t"; for (auto &word : bestSentence) std::cout << word << " "; std::cout << "\n"; } return 0; }