Problem read_problem(std::string const path) { if(path.empty()) return Problem(); Problem prob; FILE *f = open_c_file(path.c_str(), "r"); char line[kMaxLineSize]; uint64_t p = 0; prob.P.push_back(0); for(uint32_t i = 0; fgets(line, kMaxLineSize, f) != nullptr; ++i, ++prob.nr_instance) { char *y_char = strtok(line, " \t"); float const y = (atoi(y_char)>0)? 1.0f : -1.0f; prob.Y.push_back(y); for(; ; ++p) { char *field_char = strtok(nullptr,":"); char *idx_char = strtok(nullptr,":"); char *value_char = strtok(nullptr," \t"); if(field_char == nullptr || *field_char == '\n') break; uint32_t const field = static_cast<uint32_t>(atoi(field_char)); uint32_t const idx = static_cast<uint32_t>(atoi(idx_char)); float const value = static_cast<float>(atof(value_char)); prob.nr_field = std::max(prob.nr_field, field); prob.nr_feature = std::max(prob.nr_feature, idx); prob.JFV.push_back(DNode(field-1, idx-1, value)); } prob.P.push_back(p); } fclose(f); return prob; }
/* ========================================================================== * Take a word and DocumentNode information and a hashtable and try to put * or find the information in the index and update the index appropriately. * * *** Content *** * Case 1: Nothing found in word-hashed slot. Insert new word & doc nodes * Case 2: Linked list of wordnodes hashed to slot. Loop over them, if one * of them is for the parameter-passed word, break and assign a wordnode. If * not, reached end of linked list without finding word (a collision of hash * number). If the word was found, loop over the doc nodes. If one found, * increment docID, if not found, make a new one and set it to the next doc * node. If that doc node doesn't exist for the parameter passed document ID, * make new document node and add to end of document node linked lists. * ========================================================================== */ int reloadIndexHash(char *word, int docID, int freq, HashTable *index) { unsigned long hashNumber = JenkinsHash(word, MAX_HASH_SLOT); DocumentNode *docNode = NULL; WordNode *wordNode = NULL; // Update index backwards, essentially. if (index->table[hashNumber]->hashKey == NULL) { docNode = DNode(docID, freq); wordNode = WNode(docNode, word); index->table[hashNumber]->hashKey = wordNode; // nothing found, slot in new wordnode return 1; } else { // loop in that bucket in index table GenHashTableNode *loopNode = index->table[hashNumber]; WordNode *relevantWordNode = NULL; WordNode *lastWordNode = NULL; while (loopNode->hashKey) { wordNode = loopNode->hashKey; if (!strcmp(wordNode->word, word)) { // if wordnode found in linked list break relevantWordNode = wordNode; break; } if (!loopNode->next) { lastWordNode = wordNode; // last wordnode in linked list. assign wordnode break; } loopNode = loopNode->next; } // end while // depending on outcome of loop: relevantWordNode is a match for the inputted word // or lastWordNode refers to the last word node in the linked list if (relevantWordNode) { docNode = relevantWordNode->page; // loop through that WordNode's list of document nodes while (docNode) { if (docNode->docID == docID) { // found a document node, increment freq. docNode->freq++; return 1; } if (!docNode->next) { // didn't find. make new. DocumentNode *addition = NULL; addition = DNode(docID, freq); docNode->next = addition; return 1; } docNode = docNode->next; } } if (lastWordNode) { // word node not found. make new word node and new doc node GenHashTableNode *nextNode = (GenHashTableNode*)calloc(1, sizeof(GenHashTableNode)); docNode = DNode(docID, freq); WordNode *addition = NULL; addition = WNode(docNode, word); loopNode->next = nextNode; loopNode->next->hashKey = addition; nextNode->next = NULL; return 1; } } // end else return 1; }