This repository has been archived by the owner on Mar 15, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.c
825 lines (683 loc) · 24.6 KB
/
index.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include "index.h"
#include "stemmer.h"
#include "util.h"
#define MAX_SEARCH_RESULTS 10
void write_index_to_file(index_p index);
void parse_file_for_index(index_p index, char *file);
void load_stopwords();
void release_stopwords();
int is_stopword(char *word);
int find_str(void *objs, int struct_len, char *str, int min, int max);
int find_int(void *objs, int struct_len, int i, int min, int max);
int cmp_doc_found_desc(const void *a, const void *b);
static int nr_stopwords = 0;
static char **stopwords = NULL;
typedef struct doc_found {
int doc_id; // document id
double dist; // euclidian distance to TF-IDF of the words in the queue
unsigned long flag; // if the n-th most significant bit is set, the n-th search term was found in this document (ignoring stopwords)
} doc_found_t, *doc_found_p;
/*
* Loads stopwords array from the stopwords file
*/
void load_stopwords() {
// open file or print error message
FILE *sw_file = fopen("stopwords", "r");
if (!sw_file) {
printf("stopwords file not found.\nCan't remove stopwords!\n");
return;
}
// count number of stopwords
nr_stopwords = 0;
char c;
while ((c = getc(sw_file)) != EOF) {
if (c == '\n') {
nr_stopwords++;
}
}
stopwords = (char**) malloc(sizeof(char *) * nr_stopwords);
rewind(sw_file);
// load stopwords into array
int i;
for (i = 0; i < nr_stopwords; i++) {
stopwords[i] = read_line(sw_file);
}
fclose(sw_file);
}
/*
* Releases the memory allocated for the stopwords array
*/
void release_stopwords() {
if (stopwords) {
int i;
for (i = 0; i < nr_stopwords; i++) {
free(stopwords[i]);
}
free(stopwords);
}
}
/*
* Checks whether a word is a stopwords
*/
int is_stopword(char *word) {
if (!stopwords) {
load_stopwords();
}
return find_str(stopwords, sizeof(char *), word, 0, nr_stopwords - 1) != -1;
}
/*
* Adds a file to the index
*/
index_p add_file(index_p index, char *file) {
// check if file exists and can be read
FILE *f = fopen(file, "r");
if (!f) {
printf("Cannot open %s!\nIndex not updated.\n", file);
return index;
}
fclose(f);
// insert file into file list (alphabetically ordered)
int doc_id = 0;
// always insert temporary search document in the beginning
if (strcmp(file, "._tmp_search_doc")) {
for (doc_id = 0; doc_id < index->nr_docs; doc_id++) {
int cmp = strcmp(index->documents[doc_id].name, file);
if (!cmp) {
printf("%s is already in the filebase.\n", file);
return index;
} else if (0 < cmp) {
// right position in list found
break;
}
}
}
// insert document in list
index = (index_p) realloc(index, sizeof(index_t) + sizeof(indexed_document_t) * (index->nr_docs + 1));
memmove(&index->documents[doc_id+1], &index->documents[doc_id], sizeof(indexed_document_t) * (index->nr_docs - doc_id));
index->documents[doc_id].name = (char *) malloc(strlen(file) + 1);
memcpy(index->documents[doc_id].name, file, strlen(file) + 1);
index->documents[doc_id].nr_words = 0;
index->nr_docs++;
// update indices: increase indices which are greater or equal to doc_id of added document
indexed_word_p w = index->words;
while (w) {
int i;
for (i = 0; i < w->nr_docs; i++) {
if (w->documents[i].id >= doc_id) {
w->documents[i].id++;
}
}
w = w->next;
}
// parse file contents and add words to index
parse_file_for_index(index, file);
write_index_to_file(index);
return index;
}
/*
* Removes a file from index
*/
void remove_file(index_p index, int doc_id) {
// open file or print error message
if (!index->nr_docs) {
printf("Filebase empty!\n");
return;
}
if (doc_id < 0 || doc_id >= index->nr_docs) {
printf("Error: illegal document id. No document removed!\n");
}
// remove document from list in index
free(index->documents[doc_id].name);
memmove(&index->documents[doc_id], &index->documents[doc_id+1], sizeof(indexed_document_t) * (index->nr_docs - 1 - doc_id));
index->nr_docs--;
indexed_word_p w = index->words; // current word
indexed_word_p p = NULL; // previous word
// remove document from the list of each indexed word
while (w) {
// find index of removed document in list (or of first document with higher id)
int i;
int remove = 0;
for (i = 0; i < w->nr_docs; i++) {
if (w->documents[i].id == doc_id) {
w->nr_docs--;
// document found in list, indicate removal
remove = 1;
break;
} else if (w->documents[i].id > doc_id) {
break;
}
}
// reduce document id of all documents with id > removed document id
// and shift array items (in order to remove entry of the document we want to remove) if neccessary
for (; i < w->nr_docs; i++) {
w->documents[i] = w->documents[i+remove];
w->documents[i].id--;
}
if (w->nr_docs == 0) {
// only occurance of this word is in removed document -> remove word from index
if (!p) {
index->words = w->next;
} else {
p->next = w->next;
}
index->nr_words--;
indexed_word_p n = w->next;
free(w->stem);
free(w);
w = n;
} else {
// get next indexed word
p = w;
w = w->next;
}
}
// commit changes to file
write_index_to_file(index);
}
/*
* Searches index for indexed words and returns documents containing these words
*/
index_p search_index(index_p *in, char *query) {
nonalpha_to_space(query);
FILE *search_file = fopen("._tmp_search_doc", "w");
if (!search_file) {
printf("Error: couldn't create temporary file to write.\nUnable to search\n");
return NULL;
}
fprintf(search_file, "%s\n", query);
fclose(search_file);
*in = add_file(*in, "._tmp_search_doc");
index_p index = *in;
// compute TF-IDF vector for search document
double *q_tfidf = (double *) malloc(sizeof(double) * index->nr_words);
// threshold for the search, based on the distance of the search term to the empty document
double euclid_threshold = 0;
// document offset for each word where we need to continue searching (make use of sorted document ids)
int *w_offset = (int *) malloc(sizeof(int) * index->nr_words);
// calculate TF-IDF, threshold and offsets
int wid = 0;
indexed_word_p w = index->words;
while (w) {
if (!w->documents[0].id) {
q_tfidf[wid] = w->documents[0].tf * logf(index->nr_docs / w->nr_docs);
euclid_threshold += q_tfidf[wid] * q_tfidf[wid];
w_offset[wid] = 1;
} else {
q_tfidf[wid] = 0;
w_offset[wid] = 0;
}
w = w->next;
wid++;
}
// threshold is the euclidian distance to the empty document
euclid_threshold = sqrt(euclid_threshold);
// euclidian distance of all documents to the search term (based on TF-IDF)
doc_found_p euclid_dist = (doc_found_p) malloc(sizeof(doc_found_t) * index->nr_docs);
memset(euclid_dist, 0, sizeof(doc_found_t) * index->nr_docs);
// array of all search terms without stopwords
char *words[index->documents[0].nr_words];
memset(words, 0, sizeof(char *) * index->documents[0].nr_words);
// index of last processed word in the search term
int qid = 0;
// compute euclidian distance for all documents; ignore temporary search document at index 0
int d;
int nr_results = 0;
for (d = 1; d < index->nr_docs; d++) {
euclid_dist[nr_results].dist = 0;
euclid_dist[nr_results].flag = 0;
euclid_dist[nr_results].doc_id = d;
// compute TF-IDF for all words and sum the difference to the TF-IDF of the queue in euclid_dist
qid = 0;
int wid = 0;
w = index->words;
while (w) {
int i = w_offset[wid];
if (i < w->nr_docs && w->documents[i].id == d) {
// word occurs in document -> calculate TF-IDF and subtract TF-IDF of queue; then square
euclid_dist[nr_results].dist += pow(w->documents[i].tf * logf(index->nr_docs / w->nr_docs) - q_tfidf[wid], 2);
// update bit mask (set qid-th most significant bit to 1)
if (!w->documents[0].id) {
euclid_dist[nr_results].flag |= 1 << (sizeof(unsigned long) * 8 - 1 - qid);
}
w_offset[wid]++;
} else {
// word doesn't occur in document -> TF-IDF = 0 -> just square TF-IDF of queue
euclid_dist[nr_results].dist += q_tfidf[wid] * q_tfidf[wid];
}
if (!w->documents[0].id) {
// this word is part of the search term
if (!words[qid]) {
words[qid] = w->stem;
}
qid++;
}
w = w->next;
wid++;
}
euclid_dist[nr_results].dist = sqrtf(euclid_dist[nr_results].dist);
// overwrite documents above threshold or without any hits in next iteration
if (euclid_dist[nr_results].flag && euclid_dist[nr_results].dist < euclid_threshold) {
nr_results++;
}
}
free(q_tfidf);
free(w_offset);
// sort documents by euclidian distance to query
qsort(euclid_dist, nr_results, sizeof(doc_found_t), cmp_doc_found_desc);
// create result index
index_p result = (index_p) malloc(sizeof(index_t) + sizeof(indexed_document_t) * MAX_SEARCH_RESULTS);
result->nr_docs = 0;
result->nr_words = 0;
result->words = NULL;
unsigned long last_flag = 0; // flag of last processed document
w = NULL; // current group of documents (of the same (sub-)set of search terms)
indexed_word_p p = NULL; // previous group of documents
// create a index_p struct with the results, each 'word' in this index represents a group of documents which contains the same (sub-)set of search terms
int i;
for (i = 0; i <= MAX_SEARCH_RESULTS && i < nr_results; i++) {
if (euclid_dist[i].flag != last_flag || i == 0) {
// the flag is not equal to previous one => create new 'group' of documents
indexed_word_p w_new = (indexed_word_p) malloc(sizeof(indexed_word_t));
w_new->next = NULL;
w_new->nr_docs = 0;
w_new->stem = (char *) malloc(1);
*w_new->stem = '\0';
// create a string of all search terms found in this document
int k;
for (k = 0; k < index->documents[0].nr_words; k++) {
// check whether k-th most significant bit is set
if (euclid_dist[i].flag & (1 << (sizeof(unsigned long) * 8 - 1 - k))) {
w_new->stem = (char *) realloc(w_new->stem, strlen(w_new->stem) + strlen(words[k]) + 3);
strcat(w_new->stem, words[k]);
strcat(w_new->stem, ", ");
}
}
// remove final ', '
*(w_new->stem + strlen(w_new->stem) - 1) = '\0';
*(w_new->stem + strlen(w_new->stem) - 1) = '\0';
// update pointer to this group
if (!w) {
// first result document: set as first element of linked list
result->words = w_new;
} else {
w->next = w_new;
}
p = w;
w = w_new;
last_flag = euclid_dist[i].flag;
result->nr_words++;
}
// add document to group
w = (indexed_word_p) realloc(w, sizeof(indexed_word_t) + sizeof(doc_t) * (w->nr_docs + 1));
w->documents[w->nr_docs].id = i;
w->documents[w->nr_docs].tf = 0;
w->nr_docs++;
// copy name of the document into result index
char *d = index->documents[euclid_dist[i].doc_id].name;
result->documents[i].name = (char *) malloc(strlen(d) + 10);
sprintf(result->documents[i].name, "%08.5f %s", euclid_dist[i].dist, d);
result->nr_docs++;
// update pointer to this group (needed after realloc)
if (!p) {
result->words = w;
} else {
p->next = w;
}
}
free(euclid_dist);
remove_file(index, 0);
remove("._tmp_search_doc");
return result;
}
/*
* Compares two doc_found structs based on euclidian distance to the search term (1st priority) and the document id (2nd priority; which is the same as comparing the names)
*/
int cmp_doc_found_desc(const void *a, const void *b) {
doc_found_p aa = (doc_found_p) a;
doc_found_p bb = (doc_found_p) b;
if (aa->dist == bb->dist) {
return (aa->doc_id < bb->doc_id) ? -1 : (aa->doc_id > bb->doc_id);
} else {
return (aa->dist < bb->dist) ? -1 : (aa->dist > bb->dist);
}
}
/*
* Regenerates the index based on the files in the filebase
*/
void rebuild_index(index_p index) {
// clear index but keep filebase
indexed_word_p w;
while ((w = index->words)) {
index->words = w->next;
free(w->stem);
free(w);
}
index->nr_words = 0;
// rescan every document
int i;
for (i = 0; i < index->nr_docs; i++) {
index->documents[i].nr_words = 0;
parse_file_for_index(index, index->documents[i].name);
}
// save
write_index_to_file(index);
}
/*
* Parses a file and adds its words to the index
*/
void parse_file_for_index(index_p index, char *file) {
// open file or print error message
FILE *f = fopen(file, "r");
if (!f) {
printf("Cannot open %s!\nIndex not updated.\n", file);
return;
}
// document id = index of document in list of all documents in filebase (alphabetically ordered)
int doc_id = find_str(&index->documents[0].name, sizeof(indexed_document_t), file, 0, index->nr_docs-1);
if (doc_id < 0) {
printf("Error: %s is not in the filebase!\n", file);
return;
}
char *l;
while ((l = read_line(f))) {
// turn non alpha characters into spaces
nonalpha_to_space(l);
char *word = strtok(l, " ");
while (word) {
// ignore stopwords
if (is_stopword(word)) {
word = strtok(NULL, " ");
continue;
}
char *word_stem = stem(word);
if (!strlen(word_stem)) {
word = strtok(NULL, " ");
continue;
}
// insert document into index / add new stem to index
indexed_word_p w = index->words; // current word
indexed_word_p p = NULL; // previous word
int flag = 0;
while (w && !flag) {
int cmp = strcmp(w->stem, word_stem);
if (!cmp) {
// stem is already indexed
flag = 1;
break;
} else if (0 < cmp) {
// stem not indexed yet
flag = 2;
break;
}
p = w;
w = w->next;
}
if (flag == 1) {
// stem indexed, add document to list
int i;
for (i = 0; i < w->nr_docs; i++) {
if (w->documents[i].id == doc_id) {
// document is already indexed for this stem
flag = 0;
break;
} else if (w->documents[i].id > doc_id) {
break;
}
}
// only add document to list if it's not already in the list
if (flag) {
w = (indexed_word_p) realloc(w, sizeof(indexed_word_t) + sizeof(doc_t) * (w->nr_docs + 1));
// update pointer to this group (needed after realloc)
if (!p) {
index->words = w;
} else {
p->next = w;
}
// insert document in list
memmove(&w->documents[i+1], &w->documents[i], sizeof(doc_t) * (w->nr_docs - i));
w->documents[i].id = doc_id;
w->documents[i].tf = 1;
w->nr_docs++;
} else {
// increase counter for number of occurances of this word in this document
w->documents[i].tf++;
}
free(word_stem);
} else {
// stem is not indexed, add it to index
w = (indexed_word_p) malloc(sizeof(indexed_word_t) + sizeof(doc_t));
w->stem = word_stem;
w->nr_docs = 1;
w->documents[0].id = doc_id;
w->documents[0].tf = 1;
index->nr_words++;
// insert this word in linked list
if (!p) {
w->next = index->words;
index->words = w;
} else {
w->next = p->next;
p->next = w;
}
}
// increase counter for total number of words in this document
index->documents[doc_id].nr_words++;
// get next word
word = strtok(NULL, " ");
}
free(l);
}
fclose(f);
// finalize computation of TF
indexed_word_p w = index->words;
while (w) {
int i = find_int(&w->documents[0].id, sizeof(doc_t), doc_id, 0, w->nr_docs - 1);
if (i >= 0) {
w->documents[i].tf /= index->documents[doc_id].nr_words;
}
w = w->next;
}
}
/*
* Writes index to file
*/
void write_index_to_file(index_p index) {
// STEP 1: write filebase to file
FILE *fb_file = fopen("filebase", "w");
if (!fb_file) {
printf("Error: couldn't open filebase file to write.\nUnable to write index to file\n");
return;
}
// each line contains the name (relative path) to one document in the filebase and the number of words in this document
// format: <path/to/file>|<nr_words>
int i;
for (i = 0; i < index->nr_docs; i++) {
fprintf(fb_file, "%s|%d\n", index->documents[i].name, index->documents[i].nr_words);
}
fclose(fb_file);
// STEP 2: write index to file
FILE *index_file = fopen("index", "w");
if (!index_file) {
printf("Error: couldn't open index file to write.\nUnable to write index to file\n");
return;
}
// write one word in each line
// format: <stem>:<n>:doc_id_1/<tf_stem_1>|doc_id_2/tf_stem_2>|..|doc_id_n/<tf_stem_n>
indexed_word_p w = index->words;
while (w) {
fprintf(index_file, "%s:%i:%i/%f", w->stem, w->nr_docs, w->documents[0].id, w->documents[0].tf);
// list all documents containing this word (or variations of it)
int i;
for(i = 1; i < w->nr_docs; i++) {
fprintf(index_file, "|%i/%f", w->documents[i].id, w->documents[i].tf);
}
fprintf(index_file, "\n");
w = w->next;
}
fclose(index_file);
}
/*
* Parses and loads contents of the index file into a index struct
*/
index_p load_index() {
// create index struct
index_p index = (index_p) malloc(sizeof(index_t));
index->words = NULL;
index->nr_docs = 0;
index->nr_words = 0;
// STEP 1: populate list of all documents
FILE *fb_file = fopen("filebase", "r");
if (!fb_file) {
printf("Error: filebase file not found.\nIndex not loaded!\n");
return index;
}
// count total number of documents
char c;
int nr_docs = 0;
while ((c = fgetc(fb_file)) != EOF) {
if (c == '\n') {
nr_docs++;
}
}
rewind(fb_file);
// create index struct
index = (index_p) realloc(index, sizeof(index_t) + sizeof(indexed_document_t) * nr_docs);
// load all documents in a list
int i;
for (i = 0; i < nr_docs; i++) {
char *line = read_line(fb_file);
// copy name to index
char *tmp;
char *doc = strtok(line, "|");
index->documents[i].name = malloc(sizeof(char) * strlen(doc) + 1);
memcpy(index->documents[i].name, doc, strlen(doc) + 1);
// copy number of words to index
doc = strtok(NULL, "|");
index->documents[i].nr_words = strtol(doc, &tmp, 10);
index->nr_docs++;
free(line);
}
fclose(fb_file);
// STEP 2: populate list of all words
FILE * index_file = fopen("index", "r");
if (!index_file) {
printf("Error: index file not found.\nIndex not loaded!\n");
return index;
}
indexed_word_p p = NULL;
char *line, *stem, *docs, *doc, *tmp;
while ((line = read_line(index_file))) {
// get the stem
stem = strtok(line, ":");
// ignore empty lines
if (!stem) {
continue;
}
// get number of documents for this word
int nr_docs = strtol(strtok(NULL, ":"), &tmp, 10);
// create struct for stem
indexed_word_p w = (indexed_word_p) malloc(sizeof(indexed_word_t) + sizeof(doc_t) * nr_docs);
w->stem = (char *) malloc(sizeof(char) * strlen(stem) + 1);
memcpy(w->stem, stem, strlen(stem) + 1);
w->nr_docs = nr_docs;
// insert into index
if (!p) {
index->words = w;
w->next = NULL;
} else {
w->next = p->next;
p->next = w;
}
p = w;
// get list of documents containing this stem
docs = strtok(NULL, ":");
// read each document
doc = strtok(docs, "|");
int i = 0;
while(doc != NULL) {
sscanf(doc, "%i/%lf", &w->documents[i].id, &w->documents[i].tf);
// get next document
doc = strtok(NULL, "|");
i++;
}
index->nr_words++;
free(line);
}
fclose(index_file);
return index;
}
/*
* Frees the memory occupied by a index struct
*/
void close_index(index_p index) {
indexed_word_p w;
int i;
for (i = 0; i < index->nr_docs; i++) {
free(index->documents[i].name);
}
while ((w = index->words)) {
index->words = w->next;
free(w->stem);
free(w);
}
free(index);
}
/*
* Binary search an array for a string
* obj: pointer to an array of pointers to strings
or array of fixed length structs containing a pointer to a string (in this case the obj pointer should point to the string pointer in the first struct)
* struct_len: length of one object in the array in bytes
* str: search string
* min: minimal index to search
* max: maximal index to search
*/
int find_str(void *objs, int struct_len, char *str, int min, int max) {
int middle = (min + max) / 2;
int cmp = strcmp(*((char **) (((char *) objs) + middle * struct_len)), str);
if (!cmp) {
// string found
return middle;
} else if (min != max) {
if (cmp > 0 && min != middle) {
// continue search in left half
return find_str(objs, struct_len, str, min, middle - 1);
} else if (max != middle) {
// continue search in right half
return find_str(objs, struct_len, str, middle + 1, max);
}
}
// finished searching
return -1;
}
/*
* Binary search an array for an integer
* obj: pointer to an array of integers
or array of fixed length structs containing an integer (in this case the obj pointer should point to the integer in the first struct)
* struct_len: length of one object in the array in bytes
* i: search integer
* min: minimal index to search
* max: maximal index to search
*/
int find_int(void *objs, int struct_len, int i, int min, int max) {
int middle = (min + max) / 2;
int cmp_val = *((int *) (((char *) objs) + middle * struct_len));
if (cmp_val == i) {
// string found
return middle;
} else if (min != max) {
if (cmp_val > i && min != middle) {
// continue search in left half
return find_int(objs, struct_len, i, min, middle - 1);
} else if (max != middle) {
// continue search in right half
return find_int(objs, struct_len, i, middle + 1, max);
}
}
// finished searching
return -1;
}