Beispiel #1
0
int main(int argc, char **argv) 
{
    if (argc != 6) {
        printf("Usage: %s <list.in> <depth> <branching_factor> "
               "<restarts> <tree.out>\n", argv[0]);
        return 1;
    }

    const char *list_in = argv[1];
    int depth = atoi(argv[2]);
    int bf = atoi(argv[3]);
    int restarts = atoi(argv[4]);
    const char *tree_out = argv[5];
    std::ofstream log("debug_VocabLearn.txt");
    log << "Building tree with depth: " << depth << ", branching factor: " << bf << ", and restarts: " << restarts << std::endl;
    printf("Building tree with depth: %d, branching factor: %d, "
           "and restarts: %d\n", depth, bf, restarts);

    FILE *f = fopen(list_in, "r");
    
    std::vector<std::string> key_files;
    char buf[256];
    while (fgets(buf, 256, f)) {
        /* Remove trailing newline */
        if (buf[strlen(buf) - 1] == '\n')
            buf[strlen(buf) - 1] = 0;

        key_files.push_back(std::string(buf));
    }

    fclose(f);

    int num_files = (int) key_files.size();
    unsigned long total_keys = 0;
    for (int i = 0; i < num_files; i++) {
        int num_keys = GetNumberOfKeys(key_files[i].c_str());
        total_keys += num_keys;
    }

    log << "Total number of keys: " << total_keys << std::endl;
    printf("Total number of keys: %lu\n", total_keys);
    fflush(stdout);

    int dim = 128;
    unsigned long long len = (unsigned long long) total_keys * dim;

    int num_arrays = 
        len / MAX_ARRAY_SIZE + ((len % MAX_ARRAY_SIZE) == 0 ? 0 : 1);

    unsigned char **vs = new unsigned char *[num_arrays];

    log << "Allocating " << len << " bytes in total, in " << num_arrays << " arrays" << std::endl;
    printf("Allocating %llu bytes in total, in %d arrays\n", len, num_arrays);

    unsigned long long total = 0;
    for (int i = 0; i < num_arrays; i++) {
        unsigned long long remainder = len - total;
        unsigned long len_curr = MIN(remainder, MAX_ARRAY_SIZE);

        log << "Allocating array of size " << len_curr << std::endl;
        printf("Allocating array of size %lu\n", len_curr);
        fflush(stdout);
        vs[i] = new unsigned char[len_curr];

        remainder -= len_curr;
    }

    /* Create the array of pointers */
    log << "Allocating pointer array of size " << sizeof(unsigned char) * total_keys << std::endl;
    printf("Allocating pointer array of size %lu\n", 4 * total_keys);
    fflush(stdout);

    unsigned char **vp = new unsigned char *[total_keys];
    
    unsigned long off = 0;
    unsigned long curr_key = 0;
    int curr_array = 0;
    for (int i = 0; i < num_files; i++) {
    	log << "Reading key file " << key_files[i] << std::endl;
        //printf("  Reading keyfile %s\n", key_files[i].c_str());
        fflush(stdout);

        unsigned char *keys;
        int num_keys = 0;

        keypt_t *info = NULL;
        num_keys = ReadKeyFileCXX(key_files[i].c_str(), &keys);
        log << "There are " << num_keys << " keys in file " << key_files[i] << std::endl;
//        num_keys = ReadKeyFile(key_files[i].c_str(), &keys);

#ifdef DEBUG
        std::ofstream debug_log("debug_log_2.txt");
        debug_log << num_keys << " " << dim << std::endl;
        for (int i=0; i<num_keys; i++) {
        	for (int k=0; k<dim; k++) {
        		debug_log << (int) keys[dim*i + k] << " ";
        	}
        	debug_log << std::endl;
        }
        debug_log.close();
#endif

        if (num_keys > 0) {
            for (int j = 0; j < num_keys; j++) {
#ifdef DEBUG
            	log << "Copying key " << j << std::endl;
#endif
                for (int k = 0; k < dim; k++) {
#ifdef DEBUG
                	log << "\tReading ";
#endif
                	unsigned char val = keys[j * dim + k];
#ifdef DEBUG
                	log << (int) val << ". Writing ";
#endif
                    vs[curr_array][off + k] = val;
#ifdef DEBUG
                    log << (int) vs[curr_array][off + k] << std::endl;
#endif
                }
#ifdef DEBUG
                log << "vp[curr_key] = vs[curr_array] + off;" << std::endl;
#endif
                vp[curr_key] = vs[curr_array] + off;
                curr_key++;
                off += dim;
                if (off == MAX_ARRAY_SIZE) {
                    off = 0;
                    curr_array++;
                }
            }

            delete [] keys;

            if (info != NULL)
                delete [] info;
        }
    }

    log << "Building tree..." << std::endl;
    VocabTree tree;
    tree.Build(total_keys, dim, depth, bf, restarts, vp);
    tree.Write(tree_out);

    log.close();

    return 0;
}
Beispiel #2
0
int main(int argc, char **argv) 
{
    if (argc != 6) {
        printf("Usage: %s <list.in> <depth> <branching_factor> "
               "<restarts> <tree.out>\n", argv[0]);
        return 1;
    }

    const char *list_in = argv[1];
    int depth = atoi(argv[2]);
    int bf = atoi(argv[3]);
    int restarts = atoi(argv[4]);
    const char *tree_out = argv[5];

    printf("Building tree with depth: %d, branching factor: %d, "
           "and restarts: %d\n", depth, bf, restarts);

    FILE *f = fopen(list_in, "r");
    if (f == NULL) {
      printf("Could not open file: %s\n", list_in);
      return 1;
    }
    
    std::vector<std::string> key_files;
    char buf[256];
    while (fgets(buf, 256, f)) {
        /* Remove trailing newline */
        if (buf[strlen(buf) - 1] == '\n')
            buf[strlen(buf) - 1] = 0;

        key_files.push_back(std::string(buf));
    }

    fclose(f);
    
    int num_files = (int) key_files.size();
    unsigned long total_keys = 0;
    for (int i = 0; i < num_files; i++) {
        int num_keys = GetNumberOfKeys(key_files[i].c_str());
        total_keys += num_keys;
    }

    printf("Total number of keys: %lu\n", total_keys);

    // Reduce the branching factor if need be if there are not
    // enough keys, to avoid problems later.
    if (bf >= (int)total_keys){
      bf = total_keys - 1;
      printf("Reducing the branching factor to: %d\n", bf);
    }
    
    fflush(stdout);

    int dim = 128;
    unsigned long long len = (unsigned long long) total_keys * dim;

    int num_arrays = 
        len / MAX_ARRAY_SIZE + ((len % MAX_ARRAY_SIZE) == 0 ? 0 : 1);

    unsigned char **vs = new unsigned char *[num_arrays];

    printf("Allocating %llu bytes in total, in %d arrays\n", len, num_arrays);

    unsigned long long total = 0;
    for (int i = 0; i < num_arrays; i++) {
        unsigned long long remainder = len - total;
        unsigned long len_curr = MIN(remainder, MAX_ARRAY_SIZE);

        printf("Allocating array of size %lu\n", len_curr);
        fflush(stdout);
        vs[i] = new unsigned char[len_curr];

        remainder -= len_curr;
    }

    /* Create the array of pointers */
    printf("Allocating pointer array of size %lu\n", 4 * total_keys);
    fflush(stdout);

    unsigned char **vp = new unsigned char *[total_keys];
    
    unsigned long off = 0;
    unsigned long curr_key = 0;
    int curr_array = 0;
    for (int i = 0; i < num_files; i++) {
        printf("  Reading keyfile %s\n", key_files[i].c_str());
        fflush(stdout);

        short int *keys;
        int num_keys = 0;

        keypt_t *info = NULL;
        num_keys = ReadKeyFile(key_files[i].c_str(), &keys);

        if (num_keys > 0) {
            for (int j = 0; j < num_keys; j++) {
                for (int k = 0; k < dim; k++) {
                    vs[curr_array][off + k] = keys[j * dim + k];
                }
                
                vp[curr_key] = vs[curr_array] + off;
                curr_key++;
                off += dim;
                if (off == MAX_ARRAY_SIZE) {
                    off = 0;
                    curr_array++;
                }
            }

            delete [] keys;

            if (info != NULL)
                delete [] info;
        }
    }

    VocabTree tree;
    tree.Build(total_keys, dim, depth, bf, restarts, vp);
    tree.Write(tree_out);

    return 0;
}