/
hashmap.hpp
500 lines (396 loc) · 15.1 KB
/
hashmap.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
/*
================================================================================
Class: hash_map<K,V>
* Version: 3
* Author: Ian Clement <ian.clement@johnabbott.qc.ca>
* For: cs406
* Date: 2014/04/09
* Purpose: hash_map implementation of the map ADT.
* Changes from version 2:
1. moved struct back into the hash_map class but using the templates
provided to hash_map.
* Changes from Version 1:
1. added serialization (methods).
2. moved Link struct in it's own header file. Used capital 'L' to
differentiate from link() function.
3. moved exceptions to their own header file.
================================================================================
*/
#ifndef __HASHMAP_HPP__
#define __HASHMAP_HPP__
#include <algorithm>
#include <fstream>
#define WARN_OBJECT_COPY
#ifdef WARN_OBJECT_COPY
#include <iostream>
#endif
#include "exceptions.hpp"
/* forward declarations to allow (de)serialize(..) to be friend functions */
template <class K, class V>
class hash_map;
template <class K, class V>
int serialize(hash_map<K,V>& map, std::fstream& out);
template <class K, class V>
int deserialize(hash_map<K,V>& map, std::fstream& in);
/* constant for serialization */
const long HASHMAP_SERIAL_ID = 116759315;
/* entry struct to contain key/value pairs */
template <class K, class V>
struct entry {
K key;
V value;
entry() {}
entry(K key, V value): key(key), value(value) {}
};
const int DEFAULT_TOTAL_BUCKETS = 100;
const double DEFAULT_FACTOR = 0.75;
template <class K, class V>
class hash_map {
public:
hash_map(int total_buckets = DEFAULT_TOTAL_BUCKETS, double factor = DEFAULT_FACTOR);
~hash_map();
/* copy constructor and `=` operator. */
hash_map(const hash_map<K,V>& rhs);
hash_map<K,V>& operator=(const hash_map<K,V>& rhs);
/*
Method: put
Purpose: Associates the specified value with the specified key in this map.
Pre-conditions: None.
Post-conditions: The value to which key is mapped to is set to value. If the map previously contained a entry
for key (i.e.: contains key(key) is true), the old value is replaced by value.
*/
void put(K key, V value);
/*
Method: get
Purpose: Retrieve the value mapped to by the specified key.
Pre-conditions: The map contains a entry for key.
Post-conditions: Returns the value to which the specified key is mapped.
*/
V get(K key);
/*
Method: remove
Purpose: From this map, remove the entry for the specified key, if it is present.
Pre-conditions: None.
Post-conditions: If there is a entry for key, then it is removed, otherwise the map remains the same.
*/
V remove(K key);
/*
Method: clear
Purpose: Removes all of the entrys from this map.
Pre-conditions: None.
Post-conditions: All entrys are removed from this map.
*/
void clear();
/*
Method: contains_key
Purpose: Determine is a map contains a entry for the specified key.
Pre-conditions: None.
Post-conditions: Returns true if this map contains a entry for the specified key, false otherwise.
*/
bool contains_key(K key);
/*
Method: reset
Purpose: Initialize a traversal of the map.
Pre-conditions:
Post-conditions: If the map contains entrys, the traversal cursor is positioned on the first entry.
Otherwise, the traversal is complete (trivially).
*/
void reset();
/*
Method: has_next
Purpose: Determine if a traversal can continue.
Pre-conditions: The traversal has been initialized and no put or remove operations have been performed
since the initialization.
Post-conditions: Returns true is there are elements left in the traversal, false otherwise.
*/
bool has_next();
/*
Method: next
Purpose: Return the current entry in the traversal, and then advance the traversal
cursor to the next entry in the map.
Pre-conditions: The traversal has been initialized and no add or remove operations have been performed
since the initialization. The traversal still has at least one entry left.
Post-conditions: If there is a next entry, the traversal cursor has advanced to it and it is returned.
At the end of the traversal cursor is undefined, meaning that is no longer refers to a entry.
*/
entry<K,V> next();
/*
Method: is_empty
Purpose: Determine if the map is empty: it contains no key-value entrys.
Pre-conditions: None.
Post-conditions: Returns true if this map contains no key-value entrys, false otherwise.
*/
/*Returns true if this map contains no key-value entrys.*/
bool is_empty();
/*
Method: size
Purpose: Determine the number of key-value entrys in this map.
Pre-conditions: None.
Post-conditions: Returns the number of key-value entrys in this map.
*/
int size() { return map_size; }
/*
Method: get_total_buckets
Purpose: Determine the size of the bucket array.
Pre-conditions: None.
Post-conditions: Returns the size of the bucket array.
*/
int get_total_buckets() { return total_buckets; }
/*
Function: int serialize(const hash_map<K,V> &map, fstream &file)
Purpose: Create a binary serial representation of `map` and write it to `file`.
Pre-conditions: The `file` is open and ready for binary writing.
The `put` cursor is positioned at the end of the file for appending.
Post-condition: The binary serial representation of `map` is written to `file`.
The `put` cursor is positioned at the end of the file.
The total number of bytes written to the file is returned.
*/
friend int serialize<K,V>(hash_map<K,V> &map, std::fstream &file);
/*
Function: int deserialize(hash_map<K,V> &map, fstream &file)
Purpose: Read the binary serial representation of `map` and recreate the hash_map.
Pre-conditions: The `file` is open and ready for binary reading.
The `get` cursor is positioned at the beginning of the serial representation.
Post-condition: The binary serial representation is read from `file` and the hash_map recreated without rehashing each entry.
The `put` cursor is positioned after the serial representation.
The total number of bytes read from the file is returned.
*/
friend int deserialize<K,V>(hash_map<K,V> &map, std::fstream &file);
private:
struct link {
link* next;
entry<K,V> element;
};
void rehash();
/* store as array of link pointers */
link** buckets;
int map_size;
int total_buckets;
double factor;
/* stores the current position in the traversal */
int current_bucket;
link* current;
/* for checking that the map isn't modified (map size) during a traversal */
bool modified;
};
template <class K, class V>
hash_map<K,V>::hash_map(int total_buckets, double factor):
total_buckets(total_buckets),
factor(factor),
map_size(0)
{
// allocate space for buckets
buckets = new link*[total_buckets];
// initialize empty buckets as empty lists
for(int i=0; i<total_buckets; i++)
buckets[i] = NULL;
modified = true;
}
template <class K, class V>
hash_map<K,V>::~hash_map() {
if(buckets != NULL) {
// for each bucket: delete all links from the "head"
for(int i=0; i<total_buckets; i++)
while(buckets[i] != NULL) {
link* tmp = buckets[i];
buckets[i] = buckets[i]->next;
delete tmp;
}
delete [] buckets;
}
}
template <class K, class V>
void hash_map<K,V>::clear() {
for(int i=0; i<total_buckets; i++)
while(buckets[i] != NULL) {
link* tmp = buckets[i];
buckets[i] = buckets[i]->next;
delete tmp;
}
}
/* copy constructor */
template <class K, class V>
hash_map<K,V>::hash_map(const hash_map& rhs) {
#ifdef WARN_OBJECT_COPY
std::cerr << "[Warning] `hash_map` object copied." << std::endl;
#endif
total_buckets = rhs.total_buckets;
factor = rhs.factor;
buckets = new link*[total_buckets];
// copy each linked list
for(int i=0; i<total_buckets; i++) {
if(rhs.buckets[i] == NULL) // empty list
buckets[i] = NULL;
else {
buckets[i] = new link;
// use a builder to construct new list starting from "head"
link* builder = buckets[i];
// for each link in rhs's bucket[i] list
for(link* current = rhs.buckets[i]; current != NULL; current = current->next) {
(builder->element).key = (current->element).key;
(builder->element).value = (current->element).value;
// create a link unless it's the end
builder->next = current->next == NULL ? NULL : new link;
builder = builder->next;
}
}
}
modified = true;
}
template <class K, class V>
hash_map<K, V>& hash_map<K,V>::operator=(const hash_map& rhs) {
/* uses "copy-swap" idiom: http://www.cplusplus.com/articles/y8hv0pDG/ */
// use the copy constructor to create 'tmp'
hash_map<K, V> tmp(rhs);
// swap tmp's fields with the current object's:
std::swap(total_buckets, tmp.total_buckets);
std::swap(factor, tmp.factor);
std::swap(buckets, tmp.buckets);
std::swap(map_size, tmp.map_size);
modified = true;
// return the current object
return *this;
}
template <class K, class V>
bool hash_map<K,V>::contains_key(K key) {
// get index by hashing
int hash_code = ::hash(key);
int index = hash_code % total_buckets;
// search list for link containing key
link* current;
for(current = buckets[index]; current != NULL && (current->element).key != key; current = current->next);
return current != NULL;
}
template <class K, class V>
void hash_map<K,V>::put(K key, V value) {
// get index by hashing
int hash_code = ::hash(key);
int index = hash_code % total_buckets;
// search for the key in the list
link* current;
for(current = buckets[index]; current != NULL && (current->element).key != key; current = current->next);
// key not found: add an entry to the "head" of the list
if(current == NULL) {
current = new link;
(current->element).key = key;
(current->element).value = value;
current->next = buckets[index];
buckets[index] = current;
map_size++;
modified = true;
// added an entry -> rehash
rehash();
}
else // key found: replace value
(current->element).value = value;
}
template <class K, class V>
V hash_map<K,V>::get(K key) {
// get index by hashing
int hash_code = ::hash(key);
int index = hash_code % total_buckets;
// search list for the link containing key
link* current;
for(current = buckets[index]; current != NULL && (current->element).key != key; current = current->next);
// throw if key not found: precondition violated.
if(current == NULL)
throw key_not_found_exception();
return (current->element).value;
}
template <class K, class V>
V hash_map<K,V>::remove(K key) {
// get index by hashing
int hash_code = ::hash(key);
int index = hash_code % total_buckets;
// search list for link containing key, keep track of previous link for deletion.
link* current;
link* previous = NULL;
for(current = buckets[index]; current != NULL && (current->element).key != key; current = current->next)
previous = current;
// key not found: precondition violated.
if(current == NULL)
throw key_not_found_exception();
V tmp = current->element.value;
if(previous == NULL) // remove buckets[i]
buckets[index] = current->next;
else // remove within linked list
previous->next = current->next;
delete current;
map_size--;
modified = true;
return tmp;
}
template <class K, class V>
void hash_map<K,V>::rehash() {
// only rehash when the load factor exceeds the limit
if((double)map_size / (double)total_buckets < factor)
return;
int previous_total_buckets = total_buckets;
link** previous_buckets = buckets;
total_buckets *= 2;
// allocation the new bucket array and initialize to NULLs
buckets = new link*[total_buckets];
for(int i=0; i<total_buckets; i++)
buckets[i] = NULL;
// loop through the previous buckets array and move all links to the new array.
for(int i=0; i<previous_total_buckets; i++) {
if(previous_buckets[i] != NULL) { // skip empty lists
// move each link in the linked list to their new position in the larger bucket array
// recycle the memory by relinking each link.
// careful: list of keys that hashed to a bucket in the previous bucket size will not necessarily be hashed to the same bucket in the new size.
link* current = previous_buckets[i];
while(current != NULL) {
link* tmp = current->next; // store next link since we overwrite `current->next` below
int index = ::hash((current->element).key) % total_buckets;
// place at the head of the list
current->next = buckets[index];
buckets[index] = current;
current = tmp;
}
}
}
modified = true;
}
template <class K, class V>
void hash_map<K,V>::reset() {
// starting from the first bucket, move `current` to the next available link
// if the map is empty current will be NULL
for(current_bucket = 0; current_bucket < total_buckets && buckets[current_bucket] == NULL; current_bucket++);
current = (current_bucket < total_buckets) ? buckets[current_bucket] : NULL;
modified = false;
}
template <class K, class V>
bool hash_map<K,V>::has_next() {
return current != NULL;
}
template <class K, class V>
entry<K,V> hash_map<K,V>::next() {
if(current == NULL || modified)
throw map_traversal_exception();
// to return the current element in the traversal
link* tmp = current;
// if the current list isn't done
if(current->next != NULL)
current = current->next;
else { // move to the next bucket containing links.
for(current_bucket++; current_bucket < total_buckets && buckets[current_bucket] == NULL; current_bucket++);
// set current to NULL if there is no links left
current = (current_bucket < total_buckets) ? buckets[current_bucket] : NULL;
}
return tmp->element;
}
template <class K, class V>
int serialize(hash_map<K,V>& map, std::fstream& out) {
if(!out.is_open())
throw serialize_exception();
// TODO
return -1;
}
template <class K, class V>
int deserialize(hash_map<K,V> &map, std::fstream& in) {
if(!in.is_open())
throw serialize_exception();
// TODO
return -1;
}
#endif