-
Notifications
You must be signed in to change notification settings - Fork 0
/
htable.c
224 lines (167 loc) · 6.22 KB
/
htable.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#include "htable.h"
void htable_init(htable_ctx_t *ctx) {
int i;
// allocate cache-line aligned memory for the chunks array
int r = posix_memalign((void**)&ctx->chunks, HTABLE_CACHE_LINE_SIZE,
HTABLE_MAX_NR_OF_CHUNKS * HTABLE_CHUNK_SIZE * sizeof(bucket_t));
// allocate handle pointers for chunk retrieval
ctx->handle = (upc_handle_t*)malloc(2 * HTABLE_MAX_NR_OF_CHUNKS * sizeof(upc_handle_t*));
// allocate the shared table
ctx->table = (shared [HTABLE_BLOCK_SIZE] bucket_t*)
upc_all_alloc(THREADS, HTABLE_BLOCK_SIZE * sizeof(bucket_t));
// now allocate the actual handles. We need 2 per chunk..
for (i = 0; i < 2 * HTABLE_MAX_NR_OF_CHUNKS; i++) {
ctx->handle[i] = NULL;
}
}
void htable_free(htable_ctx_t *ctx) {
uint64_t i;
// make sure that all async operations on ctx->table have been completed..
for (i = 0; i < 2 * HTABLE_MAX_NR_OF_CHUNKS; i++) {
if (ctx->handle[i] != NULL) {
upc_sync(ctx->handle[i]);
}
}
upc_free(ctx->table);
free(ctx->handle);
free(ctx->chunks);
}
// This function blocks further execution of the thread until all networking
// operations on chunk 'n' have been completed..
static inline void sync_on_chunk(htable_ctx_t *ctx, uint64_t n) {
int i, index;
for (i = 0; i < 2; i++) {
index = 2 * n + i;
if (ctx->handle[index] != NULL) {
upc_sync(ctx->handle[index]);
ctx->handle[index] = NULL;
}
}
}
// Start an asynchronous query to chunk 'n', starting at bucket 'h'...
// Every call to query_chunk(n) needs a matching call to sync_on_chunk(n)
static inline void query_chunk(htable_ctx_t *ctx, uint64_t h, uint64_t n) {
// make sure that all previous operations on chunk 'n' have been completed..
// note: previous operations from previous calls to find-or-put
sync_on_chunk(ctx, n);
// calculate the indices of the begin- and ending elements of the chunk
uint64_t index1 = h + (n * HTABLE_CHUNK_SIZE);
uint64_t index2 = index1 + HTABLE_CHUNK_SIZE - 1;
// determine the second 'block' for the first and last buckets of the chunk
uint64_t owner1 = HTABLE_THREAD(index1);
uint64_t owner2 = HTABLE_THREAD(index2);
// if the two blocks are different, the query needs to be split-up into two parts...
if (owner1 != owner2) {
// calculate the sizes of each part of the chunk
uint64_t size1 = HTABLE_BLOCK_SIZE - HTABLE_BLOCK(index1);
uint64_t size2 = HTABLE_CHUNK_SIZE - size1;
// perform the actual queries..
ctx->handle[2 * n] = upc_memget_nb(&ctx->chunks[n * HTABLE_CHUNK_SIZE],
&ctx->table[HTABLE_ADDR(index1)], sizeof(bucket_t) * size1);
ctx->handle[2 * n + 1] = upc_memget_nb(&ctx->chunks[n * HTABLE_CHUNK_SIZE + size1],
&ctx->table[HTABLE_ADDR(index1 + size1)], sizeof(bucket_t) * size2);
ADD_TO_ACTUAL_RTRIPS(2);
}
// otherwise, the query can simply be performed without needing to split
else {
ctx->handle[2 * n] = upc_memget_nb(&ctx->chunks[n * HTABLE_CHUNK_SIZE],
&ctx->table[HTABLE_ADDR(index1)], sizeof(bucket_t) * HTABLE_CHUNK_SIZE);
ctx->handle[2 * n + 1] = NULL;
ADD_TO_ACTUAL_RTRIPS(1);
}
}
char htable_find_or_put(htable_ctx_t *ctx, uint64_t data) {
data &= HTABLE_MASK_DATA;
uint64_t h = hash(data);
uint64_t i, j;
query_chunk(ctx, h, 0);
for (i = 0; i < HTABLE_MAX_NR_OF_CHUNKS; i++) {
if (i + 1 < HTABLE_MAX_NR_OF_CHUNKS) {
query_chunk(ctx, h, i + 1);
}
ADD_TO_REQUIRED_RTRIPS(1);
sync_on_chunk(ctx, i);
for (j = 0; j < HTABLE_CHUNK_SIZE; j++) {
uint64_t index = i * HTABLE_CHUNK_SIZE + j;
if (!(ctx->chunks[index] & HTABLE_MASK_OCCUPIED)) {
// try to claim the empty bucket with CAS
bucket_t result = CAS(&ctx->table[HTABLE_ADDR(h + index)],
ctx->chunks[index], data | HTABLE_MASK_OCCUPIED);
// check if the CAS operation succeeded..
if (ctx->chunks[index] == result) {
return HTABLE_INSERTED;
}
// if not, check if some other thread has inserted 'data' in the bucket we wanted to claim..
else if ((result & HTABLE_MASK_DATA) == data) {
return HTABLE_FOUND;
}
}
else if ((ctx->chunks[index] & HTABLE_MASK_DATA) == data) {
return HTABLE_FOUND;
}
}
}
return HTABLE_FULL;
}
void htable_print_info(htable_ctx_t *ctx) {
uint64_t nr_of_buckets = HTABLE_BLOCK_SIZE * THREADS;
uint64_t size_b = sizeof(bucket_t) * nr_of_buckets;
uint64_t size_mb = size_b / (1024 * 1024);
printf("%i/%i - htable initialized\n", MYTHREAD, THREADS);
printf("%i/%i - table size: %lu bytes (%lu MB)\n", MYTHREAD, THREADS, size_b, size_mb);
printf("%i/%i - total number of buckets: %lu\n", MYTHREAD, THREADS, nr_of_buckets);
printf("%i/%i - block size: %lu (%lu MB)\n", MYTHREAD, THREADS, HTABLE_BLOCK_SIZE * sizeof(bucket_t), HTABLE_BLOCK_SIZE * sizeof(bucket_t) / (1024 * 1024));
printf("%i/%i - number of buckets in block: %lu\n", MYTHREAD, THREADS, HTABLE_BLOCK_SIZE);
printf("%i/%i - number of blocks: %i\n", MYTHREAD, THREADS, THREADS);
}
size_t htable_owner(htable_ctx_t *ctx, uint64_t data) {
uint64_t h = hash(data & HTABLE_MASK_DATA);
return upc_threadof(&ctx->table[HTABLE_ADDR(h)]);
}
void htable_test_ownership(htable_ctx_t *ctx) {
uint64_t limit = THREADS * HTABLE_BLOCK_SIZE;
uint64_t i, prev, prev_i, curr;
for (i = 0; i < limit; i++) {
curr = upc_threadof(&ctx->table[i]);
if (i == 0) {
prev = curr;
prev_i = 0;
}
if (prev != curr) {
printf("table[%lu] .. table[%lu] is owned by thread %lu\n", prev_i, i - 1, prev);
prev = curr;
prev_i = i;
}
}
printf("table[%lu] .. table[%lu] is owned by thread %lu\n", prev_i, limit, curr);
}
void htable_test_query_single(htable_ctx_t *ctx) {
uint64_t i;
for (i = 0; i < THREADS * HTABLE_BLOCK_SIZE; i++) {
ctx->table[i] = i;
}
uint64_t index = 27;
query_chunk(ctx, index, 0);
sync_on_chunk(ctx, 0);
printf("query for index %lu: ", index);
for (i = 0; i < HTABLE_CHUNK_SIZE; i++) {
printf("%lu (%lu) - ", ctx->chunks[i],
upc_threadof(&ctx->table[index + i]));
}
printf("\n");
}
void htable_test_query_splitting(htable_ctx_t *ctx) {
uint64_t i;
for (i = 0; i < THREADS * HTABLE_BLOCK_SIZE; i++) {
ctx->table[i] = i;
}
uint64_t index = HTABLE_BLOCK_SIZE - 7;
query_chunk(ctx, index, 0);
sync_on_chunk(ctx, 0);
printf("query for index %lu: ", index);
for (i = 0; i < HTABLE_CHUNK_SIZE; i++) {
printf("%lu (%lu) - ", ctx->chunks[i],
upc_threadof(&ctx->table[index + i]));
}
printf("\n");
}