コード例 #1
0
void next_url_from_engine(std::string src_page)
{
	std::string next_url = get_next_url(src_page);
	if (next_url.empty())
	{
		return;
	}
	else
	{
		DEBUG << "Next_url: " << next_url;
	}

	parser_parse(next_url,[=](std::vector<std::string> urls)
	{
		engine_parsed_from_next_url(next_url,urls);
	});
}
コード例 #2
0
ファイル: stat.c プロジェクト: bojieli/mirrors-log
int main(int argc, char** argv) {
	if (argc < 2) {
		fprintf(stderr, "Usage: ./stat outputfile\n");
		exit(1);
	}
	int fd_out;
	if (!(fd_out = open(argv[1], O_WRONLY | O_CREAT, 0644))) {
		fprintf(stderr, "Cannot open output file!\n");
		exit(1);
	}

	MYSQL mysql;
	MYSQL *sock = mysql_connect_db(&mysql, DBHOST, DBUSER, DBPASS, DBNAME, DBSOCK);
	if (sock == NULL)
		exit(1);

	int rows_num = mysql_result_int(sock, "SELECT COUNT(*) FROM files");
	if (rows_num <= 0) {
		fprintf(stderr, "Abnormal row num of files: %d\n", rows_num);
		exit(1);
	}
#ifdef DEBUG
	fprintf(stderr, "\nTable file1 Rows: %d\n", rows_num);
#endif
#ifdef DEBUGTIME
	int start_time = time(NULL);
	fprintf(stderr, "Start loading files: epoch %d\n", start_time);
#endif
	// we hope the hashlist will have not too much collision
	int slot_num = rows_num * 3;
	list l = init_list(slot_num);
	if (l == NULL) {
		fprintf(stderr, "Error creating hashlist with %d slots\n", slot_num);
		exit(1);
	}

	init_get_next_url(sock, "files");
	char url[512];
	int length;
	while (0 <= (length = get_next_url(url))) {
		if (length > 0)
			add_str(l, slot_num, url, length);
	}

#ifdef DEBUG
	fprintf(stderr, "\n%d slots occupied after loading files\n", occupied_slots_num(l, slot_num));
	fprintf(stderr, "Hash collision %d times\n", hash_collision_count());
	fprintf(stderr, "Repeat string count: %d\n", repeat_str_count());
#endif
#ifdef DEBUGTIME
	fprintf(stderr, "Start loading log @%ds\n", time(NULL) - start_time);
#endif

	init_get_next_url(sock, "log");
	while (0 <= (length = get_next_url(url))) {
		if (length > 0)
			add_counter(l, slot_num, url, length);
	}

#ifdef DEBUG
	fprintf(stderr, "\nNot found strings count: %d\n", notfound_str_count());
	fprintf(stderr, "\nFound strings count: %d\n", found_str_count());
#endif
#ifdef DEBUGTIME
	fprintf(stderr, "Start loading files @%ds\n", time(NULL) - start_time);
#endif

	init_buffered_get_next_row(sock, "SELECT * FROM files");
	MYSQL_ROW file_fields;
	// Do not free() me! It contains many rows and is managed by buffer.
	MYSQL_RES* res;
	while (res = buffered_get_next_row(&file_fields)) {
		// the first field is id, second is url
		unsigned long *lengths = mysql_fetch_lengths(res);
		unsigned long count = get_counter(l, slot_num, file_fields[1], lengths[1]);
		if (unlikely(count == 0)) {
			fprintf(stderr, "\nError: file hash not found: %s\n", file_fields[1]);
			continue;
		}
		char* line;
		// note that the counter includes one in table `files'
		unsigned len = array_implode(&line, mysql_num_fields(res), file_fields, lengths, count-1);
		write(fd_out, line, len);
		free(line);
	}

#ifdef DEBUGTIME
	fprintf(stderr, "\nEND @%ds\n", time(NULL) - start_time);
#endif
	mysql_close(&mysql);
	close(fd_out);
	return 0;
}