Exemplo n.º 1
0
int libretrodb_create_index(libretrodb_t *db,
      const char *name, const char *field_name)
{
	int rv;
	struct node_iter_ctx nictx;
	struct rmsgpack_dom_value key;
	libretrodb_index_t idx;
	struct rmsgpack_dom_value item;
	struct rmsgpack_dom_value * field;
	struct bintree tree;
	libretrodb_cursor_t cur;
	uint64_t idx_header_offset;
	void * buff = NULL;
	uint64_t * buff_u64 = NULL;
	uint8_t field_size = 0;
	uint64_t item_loc = libretrodb_tell(db);

	bintree_new(&tree, node_compare, &field_size);

	if (libretrodb_cursor_open(db, &cur, NULL) != 0)
   {
		rv = -1;
		goto clean;
	}

	key.type = RDT_STRING;
	key.string.len = strlen(field_name);

	/* We know we aren't going to change it */
	key.string.buff = (char *) field_name;

	while (libretrodb_cursor_read_item(&cur, &item) == 0)
   {
		if (item.type != RDT_MAP)
      {
			rv = -EINVAL;
			printf("Only map keys are supported\n");
			goto clean;
		}

		field = rmsgpack_dom_value_map_value(&item, &key);

		if (!field)
      {
			rv = -EINVAL;
			printf("field not found in item\n");
			goto clean;
		}

		if (field->type != RDT_BINARY)
      {
			rv = -EINVAL;
			printf("field is not binary\n");
			goto clean;
		}

		if (field->binary.len == 0)
      {
			rv = -EINVAL;
			printf("field is empty\n");
			goto clean;
		}

		if (field_size == 0)
			field_size = field->binary.len;
		else if (field->binary.len != field_size)
      {
			rv = -EINVAL;
			printf("field is not of correct size\n");
			goto clean;
		}

		buff = malloc(field_size + sizeof(uint64_t));
		if (!buff)
      {
			rv = -ENOMEM;
			goto clean;
		}

		memcpy(buff, field->binary.buff, field_size);

		buff_u64 = (uint64_t *)buff + field_size;

		memcpy(buff_u64, &item_loc, sizeof(uint64_t));

		if (bintree_insert(&tree, buff) != 0)
      {
			printf("Value is not unique: ");
			rmsgpack_dom_value_print(field);
			printf("\n");
			rv = -EINVAL;
			goto clean;
		}
		buff = NULL;
		rmsgpack_dom_value_free(&item);
		item_loc = libretrodb_tell(db);
	}

	(void)rv;
	(void)idx_header_offset;

	idx_header_offset = lseek(db->fd, 0, SEEK_END);
	strncpy(idx.name, name, 50);

	idx.name[49] = '\0';
	idx.key_size = field_size;
	idx.next = db->count * (field_size + sizeof(uint64_t));
	libretrodb_write_index_header(db->fd, &idx);

	nictx.db = db;
	nictx.idx = &idx;
	bintree_iterate(&tree, node_iter, &nictx);
	bintree_free(&tree);
clean:
	rmsgpack_dom_value_free(&item);
	if (buff)
		free(buff);
	if (cur.is_valid)
		libretrodb_cursor_close(&cur);
	return 0;
}
Exemplo n.º 2
0
void crawl(arguments *arg) {
    // Add Ctrl+c handling
    done = 0;
    signal(SIGINT, finish);

    global_info global;
    memset(&global, 0, sizeof(global_info));
    char search_name[SEARCH_NAME_LEN];
    memset(search_name, '\0', SEARCH_NAME_LEN * sizeof(char));

    /* Init before looping starts */
    global.out_name = arg->out_file;
    if (0 == (global.out = fopen(global.out_name, "w+"))) {
        orcerror("%s (%d) %s\n", strerror(errno), errno, global.out_name);
        exit(EXIT_FAILURE);
    }

    global.job_max = arg->max_events;
    global.loop = ev_default_loop(0);
    global.multi = curl_multi_init();
    ev_timer_init(&(global.timer_event), socket_action_timer_cb, 0., 0.);
    global.timer_event.data = &global;
    curl_multi_setopt(global.multi, CURLMOPT_TIMERFUNCTION, multi_timer_cb);
    curl_multi_setopt(global.multi, CURLMOPT_TIMERDATA, &global);
    curl_multi_setopt(global.multi, CURLMOPT_SOCKETFUNCTION, sock_cb);
    curl_multi_setopt(global.multi, CURLMOPT_SOCKETDATA, &global);
    bintree_init(&(global.url_tree), bintree_streq, free_tree);

    global.input.search_name = search_name;
    global.input.search_name_len = SEARCH_NAME_LEN;
    global.input.excludes = arg->excludes;
    global.input.excludes_len = arg->excludes_len;

    if (!find_search_name(arg->url, search_name , SEARCH_NAME_LEN))
    {
        orcerror("not a valid domain name or ip in: %s\n", arg->url);
        exit(EXIT_FAILURE);
    }
    orcoutc(orc_reset, orc_blue, "Target %s\n", global.input.search_name);

    add_first_call(arg, &global);

    struct timeval start;
    struct timeval stop;

    /* Lets find some urls */
    gettimeofday(&start, 0);
    ev_loop(global.loop, 0);
    gettimeofday(&stop, 0);

    print_stats(&global, &start, &stop);

    /* Cleanups after looping */
    fclose(global.out);
    free_array_of_charptr_incl(&(global.input.ret), global.input.ret_len);
    bintree_free(&(global.url_tree));
    curl_multi_cleanup(global.multi);

    char *url_item = 0;
    int i = 0;
    while (0 != (url_item = url_get(&global))) {
        free(url_item);
        i++;
    }
    orcout(orcm_debug, "Freed %d url items.\n", i);
}