int main (int argc, char *argv[]) { double start_time, end_time, delta_time; int64_t count; if (argc < 3) { fprintf(stderr, "usage: %s [options] schema_file mbdump_dir table_names", argv[0]); DIE; } argc--, argv++; strcpy(schema_file, argv[0]); argc--, argv++; strcpy(mbdump_dir, argv[0]); argc--, argv++; test_suite (); mongoc_init (); mongoc_log_set_handler (log_local_handler, NULL); start_time = dtimeofday (); count = execute (argc, &argv[0]); end_time = dtimeofday (); delta_time = end_time - start_time + 0.0000001; fprintf (stderr, "total:\n"); fprintf (stderr, "info: real: %.2f, count: %"PRId64", %"PRId64" docs/sec\n", delta_time, count, (int64_t)round (count/delta_time)); mongoc_cleanup (); return 0; }
void test_suite (mongoc_database_t *db, mongoc_collection_t *collection) { bson_error_t error; bson_t query = BSON_INITIALIZER; int64_t count; bson_t *options, *pipeline; double start_time, end_time, delta_time; mongoc_cursor_t *cursor; count = mongoc_collection_count (collection, MONGOC_QUERY_NONE, &query, 0, 0, NULL, &error); printf ("mongoc_collection_count count: %"PRId64"\n", count); options = BCON_NEW ("cursor", "{", "}", "allowDiskUse", BCON_BOOL (1)); pipeline = BCON_NEW ( "pipeline", "[", "{", "$match", "{", "}", "}", "{", "$project", "{", "text", BCON_INT32 (1), "}", "}", "]" ); start_time = dtimeofday (); cursor = mongoc_collection_aggregate (collection, MONGOC_QUERY_NONE, pipeline, options, NULL); count = mongoc_cursor_dump (cursor); end_time = dtimeofday (); delta_time = end_time - start_time + 0.0000001; printf ("mongoc_cursor_dump: secs: %.2f, count: %"PRId64", %.2f docs/sec\n", delta_time, count, count/delta_time); }
int main (int argc, char *argv[]) { bson_reader_t *reader; const bson_t *b; bson_error_t error; const char *filename; int i, j; double dtime_before, dtime_after, dtime_delta; uint64_t aggregate_count; off_t mark; /* * Print program usage if no arguments are provided. */ if (argc == 1) { fprintf(stderr, "usage: %s FILE...\n", argv[0]); return 1; } /* * Process command line arguments expecting each to be a filename. */ printf("["); for (i = 1; i < argc; i++) { if (i > 1) printf(","); filename = argv[i]; /* * Initialize a new reader for this file descriptor. */ if (!(reader = bson_reader_new_from_file (filename, &error))) { fprintf (stderr, "Failed to open \"%s\": %s\n", filename, error.message); continue; } state = initial_state; dtime_before = dtimeofday(); mark = 0; while ((b = bson_reader_read (reader, NULL))) { off_t pos = bson_reader_tell(reader); state.doc_size_max = MAX(pos - mark, state.doc_size_max); mark = pos; bson_metrics(b, NULL, &state); } dtime_after = dtimeofday(); dtime_delta = MAX(dtime_after - dtime_before, 0.000001); state.bson_type_metrics[BSON_TYPE_MAXKEY].description = "Max key"; state.bson_type_metrics[BSON_TYPE_MINKEY].description = "Min key"; aggregate_count = state.bson_type_metrics[BSON_TYPE_DOCUMENT].count + state.bson_type_metrics[BSON_TYPE_ARRAY].count; qsort(state.bson_type_metrics, 256, sizeof(bson_type_metrics_t), compar_bson_type_metrics); printf("\n {\n"); printf(" \"file\": \"%s\",\n", filename); printf(" \"secs\": %.2f,\n", dtime_delta); printf(" \"docs_per_sec\": %"PRIu64",\n", (uint64_t)round(state.doc_count/dtime_delta)); printf(" \"docs\": %"PRIu64",\n", state.doc_count); printf(" \"elements\": %"PRIu64",\n", state.element_count); printf(" \"elements_per_doc\": %"PRIu64",\n", (uint64_t)round((double)state.element_count/(double)MAX(state.doc_count, 1))); printf(" \"aggregates\": %"PRIu64",\n", aggregate_count); printf(" \"aggregates_per_doc\": %"PRIu64",\n", (uint64_t)round((double)aggregate_count/(double)MAX(state.doc_count, 1))); printf(" \"degree\": %"PRIu64",\n", (uint64_t)round((double)state.element_count/((double)MAX(state.doc_count + aggregate_count, 1)))); printf(" \"doc_size_max\": %"PRIu64",\n", state.doc_size_max); printf(" \"doc_size_average\": %"PRIu64",\n", (uint64_t)round((double)bson_reader_tell(reader)/(double)MAX(state.doc_count, 1))); printf(" \"key_size_average\": %"PRIu64",\n", (uint64_t)round((double)state.key_size_tally/(double)MAX(state.element_count, 1))); printf(" \"string_size_average\": %"PRIu64",\n", (uint64_t)round((double)state.utf8_size_tally/(double)MAX(state.bson_type_metrics[BSON_TYPE_UTF8].count, 1))); printf(" \"percent_by_type\": {\n"); for (j = 0; state.bson_type_metrics[j].count > 0; j++) { bson_type_metrics_t bson_type_metrics = state.bson_type_metrics[j]; printf(" \"%s\": %"PRIu64",\n", bson_type_metrics.description, (uint64_t)round((double)bson_type_metrics.count*100.0/(double)MAX(state.element_count, 1))); } printf(" }\n"); printf(" }"); /* * Cleanup after our reader, which closes the file descriptor. */ bson_reader_destroy (reader); } printf("\n]\n"); return 0; }
int64_t load_table (mongoc_database_t *db, const char *table_name, bson_t *bson_schema) { int64_t ret = true; column_map_t *column_map, *column_map_p; int column_map_size, i; double start_time, end_time, delta_time; FILE *fp; mongoc_collection_t *collection; mongoc_bulk_operation_t *bulk; size_t n_docs = 0; char *token; bson_t bson, reply; int64_t count = 0; bson_error_t error; fprintf (stderr, "load_table table_name: \"%s\"\n", table_name); get_column_map (bson_schema, table_name, &column_map, &column_map_size) || DIE; snprintf (mbdump_file, MAXPATHLEN, "%s/%s", mbdump_dir, table_name); /* fprintf (stderr, "mbdump_file: \"%s\"\n", mbdump_file); */ start_time = dtimeofday (); fp = fopen (mbdump_file, "r"); if (!fp) DIE; collection = mongoc_database_get_collection (db, table_name); bulk = mongoc_collection_create_bulk_operation (collection, true, NULL); bson_init (&bson); while (ret && fgets (buf, BUFSIZ, fp)) { /* fputs (buf, stdout); */ chomp (buf); for (i = 0, column_map_p = column_map, token = strtok_single (buf, "\t"); i < column_map_size; i++, column_map_p++, token = strtok_single (NULL, "\t")) { bool ret; /* fprintf (stderr, "%s: \"%s\" [%d/%d](%s)\n", column_map_p->column_name, token, i, column_map_size, column_map_p->data_type); fflush (stdout); */ ret = (*column_map_p->bson_append_from_s) (&bson, column_map_p->column_name, token); ret || fprintf (stderr, "WARNING: column_map_p->bson_append_from_s failed column %s: \"%s\" [%d/%d](%s)\n", column_map_p->column_name, token, i, column_map_size, column_map_p->data_type); } /* bson_printf ("bson: %s\n", &bson); */ mongoc_bulk_operation_insert (bulk, &bson); bson_reinit (&bson); if (++n_docs == BULK_OPS_SIZE) { ret = mongoc_bulk_operation_execute (bulk, &reply, &error); if (ret) { count += n_docs; if (count % PROGRESS_SIZE == 0) { fputc('.', stdout); fflush(stdout); } } else fprintf (stderr, "mongoc_cursor_bulk_insert execute failure: %s\n", error.message); n_docs = 0; mongoc_bulk_operation_destroy (bulk); bulk = mongoc_collection_create_bulk_operation (collection, true, NULL); } } if (ret && n_docs > 0) { ret = mongoc_bulk_operation_execute (bulk, &reply, &error); if (ret) count += n_docs; else fprintf (stderr, "mongoc_cursor_bulk_insert execute failure: %s\n", error.message); } fputc('.', stdout); fputc('\n', stdout); fflush(stdout); bson_destroy (&bson); mongoc_bulk_operation_destroy (bulk); mongoc_collection_destroy (collection); fclose (fp); end_time = dtimeofday (); delta_time = end_time - start_time + 0.0000001; fprintf (stderr, "info: real: %.2f, count: %"PRId64", %"PRId64" docs/sec\n", delta_time, count, (int64_t)round (count/delta_time)); fflush (stderr); free (column_map); return ret ? count : -1; }