int libretrodb_create_index(libretrodb_t *db, const char *name, const char *field_name) { int rv; struct node_iter_ctx nictx; struct rmsgpack_dom_value key; libretrodb_index_t idx; struct rmsgpack_dom_value item; struct rmsgpack_dom_value * field; struct bintree tree; libretrodb_cursor_t cur; uint64_t idx_header_offset; void * buff = NULL; uint64_t * buff_u64 = NULL; uint8_t field_size = 0; uint64_t item_loc = libretrodb_tell(db); bintree_new(&tree, node_compare, &field_size); if (libretrodb_cursor_open(db, &cur, NULL) != 0) { rv = -1; goto clean; } key.type = RDT_STRING; key.string.len = strlen(field_name); /* We know we aren't going to change it */ key.string.buff = (char *) field_name; while (libretrodb_cursor_read_item(&cur, &item) == 0) { if (item.type != RDT_MAP) { rv = -EINVAL; printf("Only map keys are supported\n"); goto clean; } field = rmsgpack_dom_value_map_value(&item, &key); if (!field) { rv = -EINVAL; printf("field not found in item\n"); goto clean; } if (field->type != RDT_BINARY) { rv = -EINVAL; printf("field is not binary\n"); goto clean; } if (field->binary.len == 0) { rv = -EINVAL; printf("field is empty\n"); goto clean; } if (field_size == 0) field_size = field->binary.len; else if (field->binary.len != field_size) { rv = -EINVAL; printf("field is not of correct size\n"); goto clean; } buff = malloc(field_size + sizeof(uint64_t)); if (!buff) { rv = -ENOMEM; goto clean; } memcpy(buff, field->binary.buff, field_size); buff_u64 = (uint64_t *)buff + field_size; memcpy(buff_u64, &item_loc, sizeof(uint64_t)); if (bintree_insert(&tree, buff) != 0) { printf("Value is not unique: "); rmsgpack_dom_value_print(field); printf("\n"); rv = -EINVAL; goto clean; } buff = NULL; rmsgpack_dom_value_free(&item); item_loc = libretrodb_tell(db); } (void)rv; (void)idx_header_offset; idx_header_offset = lseek(db->fd, 0, SEEK_END); strncpy(idx.name, name, 50); idx.name[49] = '\0'; idx.key_size = field_size; idx.next = db->count * (field_size + sizeof(uint64_t)); libretrodb_write_index_header(db->fd, &idx); nictx.db = db; nictx.idx = &idx; bintree_iterate(&tree, node_iter, &nictx); bintree_free(&tree); clean: rmsgpack_dom_value_free(&item); if (buff) free(buff); if (cur.is_valid) libretrodb_cursor_close(&cur); return 0; }
void crawl(arguments *arg) { // Add Ctrl+c handling done = 0; signal(SIGINT, finish); global_info global; memset(&global, 0, sizeof(global_info)); char search_name[SEARCH_NAME_LEN]; memset(search_name, '\0', SEARCH_NAME_LEN * sizeof(char)); /* Init before looping starts */ global.out_name = arg->out_file; if (0 == (global.out = fopen(global.out_name, "w+"))) { orcerror("%s (%d) %s\n", strerror(errno), errno, global.out_name); exit(EXIT_FAILURE); } global.job_max = arg->max_events; global.loop = ev_default_loop(0); global.multi = curl_multi_init(); ev_timer_init(&(global.timer_event), socket_action_timer_cb, 0., 0.); global.timer_event.data = &global; curl_multi_setopt(global.multi, CURLMOPT_TIMERFUNCTION, multi_timer_cb); curl_multi_setopt(global.multi, CURLMOPT_TIMERDATA, &global); curl_multi_setopt(global.multi, CURLMOPT_SOCKETFUNCTION, sock_cb); curl_multi_setopt(global.multi, CURLMOPT_SOCKETDATA, &global); bintree_init(&(global.url_tree), bintree_streq, free_tree); global.input.search_name = search_name; global.input.search_name_len = SEARCH_NAME_LEN; global.input.excludes = arg->excludes; global.input.excludes_len = arg->excludes_len; if (!find_search_name(arg->url, search_name , SEARCH_NAME_LEN)) { orcerror("not a valid domain name or ip in: %s\n", arg->url); exit(EXIT_FAILURE); } orcoutc(orc_reset, orc_blue, "Target %s\n", global.input.search_name); add_first_call(arg, &global); struct timeval start; struct timeval stop; /* Lets find some urls */ gettimeofday(&start, 0); ev_loop(global.loop, 0); gettimeofday(&stop, 0); print_stats(&global, &start, &stop); /* Cleanups after looping */ fclose(global.out); free_array_of_charptr_incl(&(global.input.ret), global.input.ret_len); bintree_free(&(global.url_tree)); curl_multi_cleanup(global.multi); char *url_item = 0; int i = 0; while (0 != (url_item = url_get(&global))) { free(url_item); i++; } orcout(orcm_debug, "Freed %d url items.\n", i); }