int main(int argc,char *argv[]) { int error=0, subkeys; uint keylen, keylen2=0, inx, doc_cnt=0; float weight= 1.0; double gws, min_gws=0, avg_gws=0; MI_INFO *info; char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN]; ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0; struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ MY_INIT(argv[0]); memset(&main_thread_keycache_var, 0, sizeof(st_keycache_thread_var)); mysql_cond_init(PSI_NOT_INSTRUMENTED, &main_thread_keycache_var.suspend); if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) exit(error); if (count || dump) verbose=0; if (!count && !dump && !lstats && !query) stats=1; if (verbose) setbuf(stdout,NULL); if (argc < 2) usage(); { char *end; inx= (uint) my_strtoll(argv[1], &end, 10); if (*end) usage(); } init_key_cache(dflt_key_cache,MI_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0); if (!(info=mi_open(argv[0], O_RDONLY, HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) { error=my_errno(); goto err; } *buf2=0; aio->info=info; if ((inx >= info->s->base.keys) || !(info->s->keyinfo[inx].flag & HA_FULLTEXT)) { printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename); goto err; } mi_lock_database(info, F_EXTRA_LCK); info->lastpos= HA_OFFSET_ERROR; info->update|= HA_STATE_PREV_FOUND; while (!(error=mi_rnext(info,NULL,inx))) { keylen=*(info->lastkey); subkeys=ft_sintXkorr(info->lastkey+keylen+1); if (subkeys >= 0) ft_floatXget(weight, info->lastkey+keylen+1); my_snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1); my_casedn_str(default_charset_info,buf); total++; lengths[keylen]++; if (count || stats) { if (strcmp(buf, buf2)) { if (*buf2) { uniq++; avg_gws+=gws=GWS_IN_USE; if (count) printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); if (maxlen<keylen2) { maxlen=keylen2; my_stpcpy(buf_maxlen, buf2); } if (max_doc_cnt < doc_cnt) { max_doc_cnt=doc_cnt; my_stpcpy(buf_min_gws, buf2); min_gws=gws; } } my_stpcpy(buf2, buf); keylen2=keylen; doc_cnt=0; } doc_cnt+= (subkeys >= 0 ? 1 : -subkeys); } if (dump) { if (subkeys>=0) printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf); else printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf); } if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) printf("%10ld\r",total); } mi_lock_database(info, F_UNLCK); if (count || stats) { if (*buf2) { uniq++; avg_gws+=gws=GWS_IN_USE; if (count) printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); if (maxlen<keylen2) { maxlen=keylen2; my_stpcpy(buf_maxlen, buf2); } if (max_doc_cnt < doc_cnt) { max_doc_cnt=doc_cnt; my_stpcpy(buf_min_gws, buf2); min_gws=gws; } } } if (stats) { count=0; for (inx=0;inx<256;inx++) { count+=lengths[inx]; if ((ulong) count >= total/2) break; } printf("Total rows: %lu\nTotal words: %lu\n" "Unique words: %lu\nLongest word: %lu chars (%s)\n" "Median length: %u\n" "Average global weight: %f\n" "Most common word: %lu times, weight: %f (%s)\n", (long) info->state->records, total, uniq, maxlen, buf_maxlen, inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws); } if (lstats) { count=0; for (inx=0; inx<256; inx++) { count+=lengths[inx]; if (count && lengths[inx]) printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx, (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count, 100.0*count/total); } } err: if (error && error != HA_ERR_END_OF_FILE) printf("got error %d\n",my_errno()); if (info) mi_close(info); mysql_cond_destroy(&main_thread_keycache_var.suspend); return 0; }
int main(int argc,char *argv[]) { int error=0; uint keylen, keylen2=0, inx, doc_cnt=0; float weight= 1.0; double gws, min_gws=0, avg_gws=0; MARIA_HA *info; char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN]; ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0; struct { MARIA_HA *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ MY_INIT(argv[0]); if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) exit(error); maria_init(); if (count || dump) verbose=0; if (!count && !dump && !lstats && !query) stats=1; if (verbose) setbuf(stdout,NULL); if (argc < 2) usage(); { char *end; inx= (uint) strtoll(argv[1], &end, 10); if (*end) usage(); } init_pagecache(maria_pagecache, PAGE_BUFFER_INIT, 0, 0, MARIA_KEY_BLOCK_LENGTH, 0, MY_WME); if (!(info=maria_open(argv[0], O_RDONLY, HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) { error=my_errno; goto err; } *buf2=0; aio->info=info; if ((inx >= info->s->base.keys) || !(info->s->keyinfo[inx].flag & HA_FULLTEXT)) { printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->s->open_file_name.str); goto err; } maria_lock_database(info, F_EXTRA_LCK); info->cur_row.lastpos= HA_OFFSET_ERROR; info->update|= HA_STATE_PREV_FOUND; while (!(error=maria_rnext(info,NULL,inx))) { FT_WEIGTH subkeys; keylen=*(info->lastkey_buff); subkeys.i= ft_sintXkorr(info->lastkey_buff + keylen + 1); if (subkeys.i >= 0) weight= subkeys.f; #ifdef HAVE_SNPRINTF snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey_buff+1); #else sprintf(buf,"%.*s",(int) keylen,info->lastkey_buff+1); #endif my_casedn_str(default_charset_info,buf); total++; lengths[keylen]++; if (count || stats) { if (strcmp(buf, buf2)) { if (*buf2) { uniq++; avg_gws+=gws=GWS_IN_USE; if (count) printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); if (maxlen<keylen2) { maxlen=keylen2; strmov(buf_maxlen, buf2); } if (max_doc_cnt < doc_cnt) { max_doc_cnt=doc_cnt; strmov(buf_min_gws, buf2); min_gws=gws; } } strmov(buf2, buf); keylen2=keylen; doc_cnt=0; } doc_cnt+= (subkeys.i >= 0 ? 1 : -subkeys.i); } if (dump) { if (subkeys.i >= 0) printf("%9lx %20.7f %s\n", (long) info->cur_row.lastpos,weight,buf); else printf("%9lx => %17d %s\n",(long) info->cur_row.lastpos,-subkeys.i, buf); } if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) printf("%10ld\r",total); } maria_lock_database(info, F_UNLCK); if (count || stats) { if (*buf2) { uniq++; avg_gws+=gws=GWS_IN_USE; if (count) printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); if (maxlen<keylen2) { maxlen=keylen2; strmov(buf_maxlen, buf2); } if (max_doc_cnt < doc_cnt) { max_doc_cnt=doc_cnt; strmov(buf_min_gws, buf2); min_gws=gws; } } } if (stats) { count=0; for (inx=0;inx<256;inx++) { count+=lengths[inx]; if ((ulong) count >= total/2) break; } printf("Total rows: %lu\nTotal words: %lu\n" "Unique words: %lu\nLongest word: %lu chars (%s)\n" "Median length: %u\n" "Average global weight: %f\n" "Most common word: %lu times, weight: %f (%s)\n", (long) info->state->records, total, uniq, maxlen, buf_maxlen, inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws); } if (lstats) { count=0; for (inx=0; inx<256; inx++) { count+=lengths[inx]; if (count && lengths[inx]) printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx, (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count, 100.0*count/total); } } err: if (error && error != HA_ERR_END_OF_FILE) printf("got error %d\n",my_errno); if (info) maria_close(info); maria_end(); return 0; }