Beispiel #1
0
Problem read_problem(std::string const path)
{
    if(path.empty())
        return Problem();
    Problem prob;

    FILE *f = open_c_file(path.c_str(), "r");
    char line[kMaxLineSize];

    uint64_t p = 0;
    prob.P.push_back(0);
    for(uint32_t i = 0; fgets(line, kMaxLineSize, f) != nullptr; ++i, ++prob.nr_instance)
    {
        char *y_char = strtok(line, " \t");
        float const y = (atoi(y_char)>0)? 1.0f : -1.0f;
        prob.Y.push_back(y);

        for(; ; ++p)
        {
            char *field_char = strtok(nullptr,":");
            char *idx_char = strtok(nullptr,":");
            char *value_char = strtok(nullptr," \t");
            if(field_char == nullptr || *field_char == '\n')
                break;
            uint32_t const field = static_cast<uint32_t>(atoi(field_char));
            uint32_t const idx = static_cast<uint32_t>(atoi(idx_char));
            float const value = static_cast<float>(atof(value_char));

            prob.nr_field = std::max(prob.nr_field, field);
            prob.nr_feature = std::max(prob.nr_feature, idx);

            prob.JFV.push_back(DNode(field-1, idx-1, value));
        }
        prob.P.push_back(p);
    }

    fclose(f);

    return prob;
}
Beispiel #2
0
/* ==========================================================================
 * Take a word and DocumentNode information and a hashtable and try to put
 * or find the information in the index and update the index appropriately.
 *
 * *** Content ***
 * Case 1: Nothing found in word-hashed slot. Insert new word & doc nodes
 * Case 2: Linked list of wordnodes hashed to slot. Loop over them, if one
 * of them is for the parameter-passed word, break and assign a wordnode. If
 * not, reached end of linked list without finding word (a collision of hash
 * number). If the word was found, loop over the doc nodes. If one found,
 * increment docID, if not found, make a new one and set it to the next doc
 * node. If that doc node doesn't exist for the parameter passed document ID,
 * make new document node and add to end of document node linked lists.
 * ========================================================================== */
int reloadIndexHash(char *word, int docID, int freq, HashTable *index) {
    unsigned long hashNumber = JenkinsHash(word, MAX_HASH_SLOT);
    
    DocumentNode *docNode = NULL;
    WordNode *wordNode = NULL;
    
    // Update index backwards, essentially.
    if (index->table[hashNumber]->hashKey == NULL) {
        docNode = DNode(docID, freq);
        wordNode = WNode(docNode, word);
        index->table[hashNumber]->hashKey = wordNode;    // nothing found, slot in new wordnode
        return 1;
    }
    
    else {                                  // loop in that bucket in index table
        GenHashTableNode *loopNode = index->table[hashNumber];
        WordNode *relevantWordNode = NULL;
        WordNode *lastWordNode = NULL;
        
        while (loopNode->hashKey) {
            wordNode = loopNode->hashKey;
            if (!strcmp(wordNode->word, word)) {    // if wordnode found in linked list break
                relevantWordNode = wordNode;
                break;
            }
            if (!loopNode->next) {
                lastWordNode = wordNode;        // last wordnode in linked list. assign wordnode
                break;
            }
            loopNode = loopNode->next;
        } // end while
        
        // depending on outcome of loop: relevantWordNode is a match for the inputted word
        // or lastWordNode refers to the last word node in the linked list
        
        if (relevantWordNode) {
            docNode = relevantWordNode->page;
            
            // loop through that WordNode's list of document nodes
            while (docNode) {
                if (docNode->docID == docID) {      // found a document node, increment freq.
                    docNode->freq++;
                    return 1;
                }
                
                if (!docNode->next) {               // didn't find. make new.
                    DocumentNode *addition = NULL;
                    addition = DNode(docID, freq);
                    docNode->next = addition;
                    return 1;
                }
                docNode = docNode->next;
            }
        }
        if (lastWordNode) {      // word node not found. make new word node and new doc node
            GenHashTableNode *nextNode = (GenHashTableNode*)calloc(1, sizeof(GenHashTableNode));
            
            docNode = DNode(docID, freq);
            WordNode *addition = NULL;
            addition = WNode(docNode, word);
            loopNode->next = nextNode;
            loopNode->next->hashKey = addition;
            nextNode->next = NULL;
            return 1;
            
        }
    } // end else
    return 1;
    
}