forked from gigablast/open-source-search-engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
superMergeTest.cpp
152 lines (146 loc) · 4.12 KB
/
superMergeTest.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#include "gb-include.h"
#include <sys/time.h> // gettimeofday()
#include "RdbList.h"
#include "sort.h"
static int cmp (const void *p1, const void *p2) ;
main ( int argc , char *argv[] ) {
// seed with same value so we get same rand sequence for all
srand ( 1945687 );
// # of keys to in each list
long nk = 200000;
// # keys wanted
long numKeysWanted = 200000;
// get # lists to merge
long numToMerge = atoi ( argv[1] );
// print start time
fprintf (stderr,"smt:: randomizing begin. %li lists of %li keys.\n",
numToMerge, nk);
// make a list of compressed (6 byte) docIds
key_t *keys0 = (key_t *) malloc ( sizeof(key_t) * nk );
key_t *keys1 = (key_t *) malloc ( sizeof(key_t) * nk );
key_t *keys2 = (key_t *) malloc ( sizeof(key_t) * nk );
key_t *keys3 = (key_t *) malloc ( sizeof(key_t) * nk );
// store radnom docIds in this list
unsigned long *p = (unsigned long *) keys0;
// random docIds
for ( long i = 0 ; i < nk ; i++ ) {
*p++ = rand() ;
*p++ = rand() ;
*p++ = rand() ;
}
p = (unsigned long *) keys1;
for ( long i = 0 ; i < nk ; i++ ) {
*p++ = rand() ;
*p++ = rand() ;
*p++ = rand() ;
}
p = (unsigned long *) keys2;
for ( long i = 0 ; i < nk ; i++ ) {
*p++ = rand() ;
*p++ = rand() ;
*p++ = rand() ;
}
p = (unsigned long *) keys3;
for ( long i = 0 ; i < nk ; i++ ) {
*p++ = rand() ;
*p++ = rand() ;
*p++ = rand() ;
}
// sort em up
gbsort ( keys0 , nk , sizeof(key_t) , cmp );
gbsort ( keys1 , nk , sizeof(key_t) , cmp );
gbsort ( keys2 , nk , sizeof(key_t) , cmp );
gbsort ( keys3 , nk , sizeof(key_t) , cmp );
// set lists
RdbList list0;
RdbList list1;
RdbList list2;
RdbList list3;
key_t minKey; minKey.n0 = 0LL; minKey.n1 = 0LL;
key_t maxKey; maxKey.setMax();
list0.set ( (char *)keys0 ,
nk * sizeof(key_t),
nk * sizeof(key_t),
minKey ,
maxKey ,
0 ,
false );
list1.set ( (char *)keys1 ,
nk * sizeof(key_t),
nk * sizeof(key_t),
minKey ,
maxKey ,
0 ,
false );
list2.set ( (char *)keys2 ,
nk * sizeof(key_t),
nk * sizeof(key_t),
minKey ,
maxKey ,
0 ,
false );
list3.set ( (char *)keys3 ,
nk * sizeof(key_t),
nk * sizeof(key_t),
minKey ,
maxKey ,
0 ,
false );
// mergee
RdbList list;
RdbList *lists[2];
lists[0] = &list0;
lists[1] = &list1;
lists[2] = &list2;
lists[3] = &list3;
//list.prepareForMerge ( lists , 3 , numKeysWanted * sizeof(key_t));
list.prepareForMerge (lists,numToMerge,numKeysWanted * sizeof(key_t));
// start time
fprintf(stderr,"starting merge\n");
long long t = gettimeofdayInMilliseconds();
// do it
if ( numToMerge == 2 )
list.superMerge2 ( &list0 ,
&list1 ,
minKey ,
maxKey ,
false );
if ( numToMerge == 3 )
list.superMerge3 ( &list0 ,
&list1 ,
&list2 ,
minKey ,
maxKey );
// completed
long long now = gettimeofdayInMilliseconds();
fprintf(stderr,"smt:: %li list NEW MERGE took %llu ms\n",
numToMerge,now-t);
// time per key
long size = list.getListSize() / sizeof(key_t);
double tt = ((double)(now - t))*1000000.0 / ((double)size);
fprintf (stderr,"smt:: %f nanoseconds per key\n", tt);
// stats
//double d = (1000.0*(double)nk*2.0) / ((double)(now - t));
double d = (1000.0*(double)(size)) / ((double)(now - t));
fprintf (stderr,"smt:: %f cycles per final key\n" ,
400000000.0 / d );
fprintf (stderr,"smt:: we can do %li adds per second\n" ,(long)d);
fprintf (stderr,"smt:: final list size = %li\n",list.getListSize());
// now get list from the old merge routine
RdbList listOld;
listOld.prepareForMerge (lists,numToMerge,numKeysWanted*sizeof(key_t));
t = gettimeofdayInMilliseconds();
listOld.merge_r ( lists , numToMerge , true , minKey , maxKey , false ,
numKeysWanted * sizeof(key_t));
now = gettimeofdayInMilliseconds();
fprintf(stderr,"smt:: %li list OLD MERGE took %llu ms\n",
numToMerge,now-t);
// then compare
// exit gracefully
exit ( 0 );
}
int cmp (const void *h1, const void *h2) {
if ( *(key_t *)h1 < *(key_t *)h2 ) return -1;
if ( *(key_t *)h1 > *(key_t *)h2 ) return 1;
return 0;
}