/** * Allocates memory for a blob of the requested size. * * A block of memory holding nr_items of size item_size is created * in the specified MemBlob. * * @param blob The MemBlob in which to place the memory. * @param nr_items The number of items the MemBlob is to hold as data. * @param item_size The size of one item. * @param clear_blob boolean: if true, all bytes in the data space will be initialised to 0 * @return boolean: true 1 if OK, false on error */ int alloc_mblob(MemBlob *blob, int nr_items, int item_size, int clear_blob) { int size; assert((blob != NULL) && "alloc_mblob(): You can't pass a NULL blob!"); assert((item_size >= 0) && "alloc_mblob(): item_size must be >= 0!"); assert((nr_items > 0) && "alloc_mblob(): nr_items must be > 0!"); blob->item_size = item_size; blob->nr_items = nr_items; if (item_size == SIZE_BIT) { size = nr_items / 8; if (size * 8 < nr_items) { size++; /* make room for 'extra' bits */ } } else { size = nr_items * item_size; } blob->size = size; if (clear_blob) { blob->data = (int *) cl_calloc(size, 1); } else { blob->data = (int *) cl_malloc(size); } blob->allocation_method = MALLOCED; blob->writeable = 1; blob->changed = 0; blob->fname = NULL; blob->fsize = 0; blob->offset = 0; return 1; }
int evaluate_target(CorpusList *corp, /* the corpus */ FieldType t_id, /* the field to set */ FieldType base, /* where to start the search */ int inclusive, /* including or excluding the base */ SearchStrategy strategy, /* disambiguation rule: which item */ Constrainttree constr, /* the constraint */ enum ctxtdir direction, /* context direction */ int units, /* number of units */ char *attr_name) /* name of unit */ { Attribute *attr; int *table; Context context; int i, line, lbound, rbound; int excl_start, excl_end; int nr_evals; int percentage, new_percentage; /* for ProgressBar */ /* ------------------------------------------------------------ */ assert(corp); /* consistency check */ assert(t_id == TargetField || t_id == KeywordField || t_id == MatchField || t_id == MatchEndField); if (!constr) { cqpmessage(Error, "Constraing pattern missing in 'set target' command."); return 0; } if (corp->size <= 0) { cqpmessage(Error, "Corpus is empty."); return 0; } /* * check whether the base field specification is ok */ switch(base) { case MatchField: case MatchEndField: if (corp->range == NULL) { cqpmessage(Error, "No ranges for start of search"); return 0; } break; case TargetField: if (corp->targets == NULL) { cqpmessage(Error, "Can't start from base TARGET, none defined"); return 0; } break; case KeywordField: if (corp->keywords == NULL) { cqpmessage(Error, "Can't start from base KEYWORD, none defined"); return 0; } break; default: cqpmessage(Error, "Illegal base field (#%d) in 'set target' command.", base); return 0; } if (units <= 0) { cqpmessage(Error, "Invalid search space (%d units) in 'set target' command.", units); return 0; } /* THIS SHOULD BE UNNECESSARY, BECAUSE THE GRAMMAR MAKES SURE THE SUBCORPUS EXISTS & IS LOADED */ /* if (!access_corpus(corp)) { */ /* cqpmessage(Error, "Can't access named query %s.", corp->name); */ /* return 0; */ /* } */ context.size = units; context.direction = direction; if ((strcasecmp(attr_name, "word") == 0) || (strcasecmp(attr_name, "words") == 0)) { attr = find_attribute(corp->corpus, DEFAULT_ATT_NAME, ATT_POS, NULL); context.type = word; context.attrib = NULL; } else { attr = find_attribute(corp->corpus, attr_name, ATT_STRUC, NULL); context.type = structure; context.attrib = attr; } if (attr == NULL) { cqpmessage(Error, "Can't find attribute %s.%s", corp->mother_name, attr_name); return 0; } if (progress_bar) { progress_bar_clear_line(); progress_bar_message(1, 1, " preparing"); } table = (int *)cl_calloc(corp->size, sizeof(int)); EvaluationIsRunning = 1; nr_evals = 0; percentage = -1; for (line = 0; line < corp->size && EvaluationIsRunning; line++) { if (progress_bar) { new_percentage = floor(0.5 + (100.0 * line) / corp->size); if (new_percentage > percentage) { percentage = new_percentage; progress_bar_percentage(0, 0, percentage); } } table[line] = -1; switch(base) { case MatchField: excl_start = corp->range[line].start; excl_end = corp->range[line].end; if ((corp->range[line].start == corp->range[line].end) || inclusive) { if (calculate_ranges(corp, corp->range[line].start, context, &lbound, &rbound) == False) { Rprintf( "Can't compute boundaries for range #%d", line); lbound = rbound = -1; } } else { int dummy; if (calculate_ranges(corp, corp->range[line].start, context, &lbound, &dummy) == False) { Rprintf( "Can't compute left search space boundary match #%d", line); lbound = rbound = -1; } else if (calculate_ranges(corp, corp->range[line].end, context, &dummy, &rbound) == False) { Rprintf( "Can't compute right search space boundary match #%d", line); lbound = rbound = -1; } } break; case MatchEndField: excl_start = excl_end = corp->range[line].end; if (excl_start >= 0) { if (calculate_ranges(corp, corp->range[line].end, context, &lbound, &rbound) == False) { Rprintf( "Can't compute search space boundaries for match #%d", line); lbound = rbound = -1; } } else lbound = rbound = -1; break; case TargetField: excl_start = excl_end = corp->targets[line]; if (excl_start >= 0) { if (calculate_ranges(corp, corp->targets[line], context, &lbound, &rbound) == False) { Rprintf( "Can't compute search space boundaries for match #%d", line); lbound = rbound = -1; } } else lbound = rbound = -1; break; case KeywordField: excl_start = excl_end = corp->keywords[line]; if (excl_start >= 0) { if (calculate_ranges(corp, corp->keywords[line], context, &lbound, &rbound) == False) { Rprintf( "Can't compute search space boundaries for match #%d", line); lbound = rbound = -1; } } else lbound = rbound = -1; break; default: assert(0 && "Can't be"); return 0; } if ((lbound >= 0) && (rbound >= 0)) { int dist, maxdist; if (direction == left) { rbound = excl_start; if (strategy == SearchNearest) strategy = SearchRightmost; else if (strategy == SearchFarthest) strategy = SearchLeftmost; } else if (direction == right) { lbound = excl_start; if (strategy == SearchNearest) strategy = SearchLeftmost; else if (strategy == SearchFarthest) strategy = SearchRightmost; } switch (strategy) { case SearchFarthest: maxdist = MAX(excl_start - lbound, rbound - excl_start); assert(maxdist >= 0); for (dist = maxdist; dist >= 0; dist--) { i = excl_start - dist; if (i >= lbound && (inclusive || (i < excl_start))) if (eval_bool(constr, NULL, i)) { table[line] = i; break; } i = excl_start + dist; if (i <= rbound && (inclusive || (i > excl_end))) if (eval_bool(constr, NULL, i)) { table[line] = i; break; } nr_evals++; if (nr_evals == 1000) { CheckForInterrupts(); nr_evals = 0; } } break; case SearchNearest: maxdist = MAX(excl_start - lbound, rbound - excl_start); assert(maxdist >= 0); for (dist = 0; dist <= maxdist; dist++) { i = excl_start - dist; if (i >= lbound && (inclusive || (i < excl_start))) if (eval_bool(constr, NULL, i)) { table[line] = i; break; } i = excl_start + dist; if (i <= rbound && (inclusive || (i > excl_end))) if (eval_bool(constr, NULL, i)) { table[line] = i; break; } nr_evals++; if (nr_evals == 1000) { CheckForInterrupts(); nr_evals = 0; } } break; case SearchLeftmost: for (i = lbound; i <= rbound; i++) if (inclusive || (i < excl_start) || (i > excl_end)) { if (eval_bool(constr, NULL, i)) { table[line] = i; break; } nr_evals++; if (nr_evals == 1000) { CheckForInterrupts(); nr_evals = 0; } } break; case SearchRightmost: for (i = rbound; i >= lbound; i--) if (inclusive || (i < excl_start) || (i > excl_end)) { if (eval_bool(constr, NULL, i)) { table[line] = i; break; } nr_evals++; if (nr_evals == 1000) { CheckForInterrupts(); nr_evals = 0; } } break; default: break; } } } if (progress_bar) progress_bar_message(1, 1, " cleaning up"); switch (t_id) { case MatchField: for (i = 0; i < corp->size; i++) { if (table[i] >= 0) corp->range[i].start = table[i]; if (corp->range[i].start > corp->range[i].end) corp->range[i].start = corp->range[i].end; } cl_free(table); break; case MatchEndField: for (i = 0; i < corp->size; i++) { if (table[i] >= 0) corp->range[i].end = table[i]; if (corp->range[i].end < corp->range[i].start) corp->range[i].end = corp->range[i].start; } cl_free(table); break; case TargetField: cl_free(corp->targets); corp->targets = table; break; case KeywordField: cl_free(corp->keywords); corp->keywords = table; break; default: assert(0 && "Can't be"); break; } if (progress_bar) progress_bar_clear_line(); if ((t_id == MatchField) || (t_id == MatchEndField)) RangeSort(corp, 0); /* re-sort corpus if match regions were modified */ touch_corpus(corp); if (!EvaluationIsRunning) { cqpmessage(Warning, "Evaluation interruted: results may be incomplete."); if (which_app == cqp) install_signal_handler(); } EvaluationIsRunning = 0; return 1; }
/** * Validates the REVCORP component of the given attribute. * * This function validates a REVCORP (i.e. an uncompressed index). * It assumes that a lexicon, frequencies and (compressed or * uncompressed) token stream are available for CL access for the * given attribute. * * @param attr The attribute whose REVCORP should be checked. * @return True for all OK, false for a problem. */ int validate_revcorp(Attribute *attr) { Component *revcorp = ensure_component(attr, CompRevCorpus, 0); int *ptab; /* table of index offsets for each lexicon entry */ int lexsize, corpsize; int i, offset, cpos, id; printf(" ? validating %s ... ", cid_name(CompRevCorpus)); fflush(stdout); if (revcorp == NULL) { printf("FAILED (no data)\n"); return 0; } lexsize = cl_max_id(attr); corpsize = cl_max_cpos(attr); if ((lexsize <= 0) || (corpsize <= 0)) { printf("FAILED (corpus access error)\n"); return 0; } if (revcorp->size != corpsize) { printf("FAILED (wrong size)\n"); return 0; } /* init offsets by calculating REVIDX component from token frequencies */ ptab = (int *) cl_calloc(lexsize, sizeof(int)); offset = 0; for (i = 0; i < lexsize; i++) { ptab[i] = offset; offset += cl_id2freq(attr, i); } /* now read token stream, check each token id against REVCORP, and increment its pointer */ for (cpos = 0; cpos < corpsize; cpos++) { id = cl_cpos2id(attr, cpos); if ((id < 0) || (id >= lexsize)) { printf("FAILED (inconsistency in token stream)\n"); cl_free(ptab); return 0; } if (ntohl(revcorp->data.data[ptab[id]]) != cpos) { printf("FAILED\n"); cl_free(ptab); return 0; } ptab[id]++; } /* validate frequencies by comparing final offsets against those calculated from token frequencies */ offset = 0; for (i = 0; i < lexsize; i++) { offset += cl_id2freq(attr, i); if (ptab[i] != offset) { printf("FAILED (token frequencies incorrect)\n"); cl_free(ptab); return 0; } } cl_free(ptab); printf("OK\n"); return 1; }
/** * Compresses the token stream of a p-attribute. * * Three files are created: the compressed token stream, the descriptor block, * and a sync file. * * @param attr The attribute to compress. * @param hc Location for the resulting Huffmann code descriptor block. * @param fname Base filename for the resulting files. */ int compute_code_lengths(Attribute *attr, HCD *hc, char *fname) { int id, i, h; int nr_codes = 0; int *heap = NULL; unsigned *codelength = NULL; /* was char[], probably to save space; but that's unnecessary and makes gcc complain */ int issued_codes[MAXCODELEN]; int next_code[MAXCODELEN]; long sum_bits; Rprintf("COMPRESSING TOKEN STREAM of %s.%s\n", corpus_id_cwb_huffcode, attr->any.name); /* I need the following components: * - CompCorpus * - CompCorpusFreqs * - CompLexicon * - CompLexiconIdx * and want to force the CL to use them rather than compressed data. */ { Component *comp; if ((comp = ensure_component(attr, CompCorpus, 0)) == NULL) { Rprintf( "Computation of huffman codes needs the CORPUS component\n"); rcqp_receive_error(1); } if ((comp = ensure_component(attr, CompLexicon, 0)) == NULL) { Rprintf( "Computation of huffman codes needs the LEXION component\n"); rcqp_receive_error(1); } if ((comp = ensure_component(attr, CompLexiconIdx, 0)) == NULL) { Rprintf( "Computation of huffman codes needs the LEXIDX component\n"); rcqp_receive_error(1); } if ((comp = ensure_component(attr, CompCorpusFreqs, 0)) == NULL) { Rprintf( "Computation of huffman codes needs the FREQS component.\n" "Run 'makeall -r %s -c FREQS %s %s' in order to create it.\n", corpus->registry_dir, corpus->registry_name, attr->any.name); rcqp_receive_error(1); } } /* * strongly follows Witten/Moffat/Bell: ``Managing Gigabytes'', * pp. 335ff. */ hc->size = cl_max_id(attr); /* the size of the attribute (nr of items) */ if ((hc->size <= 0) || (cderrno != CDA_OK)) { cdperror("(aborting) cl_max_id() failed"); rcqp_receive_error(1); } hc->length = cl_max_cpos(attr); /* the length of the attribute (nr of tokens) */ if ((hc->length <= 0) || (cderrno != CDA_OK)) { cdperror("(aborting) cl_max_cpos() failed"); rcqp_receive_error(1); } hc->symbols = NULL; hc->min_codelen = 100; hc->max_codelen = 0; memset((char *)hc->lcount, '\0', MAXCODELEN * sizeof(int)); memset((char *)hc->min_code, '\0', MAXCODELEN * sizeof(int)); memset((char *)hc->symindex, '\0', MAXCODELEN * sizeof(int)); memset((char *)issued_codes, '\0', MAXCODELEN * sizeof(int)); codelength = (unsigned *)cl_calloc(hc->size, sizeof(unsigned)); /* =========================================== make & initialize the heap */ heap = (int *)cl_malloc(hc->size * 2 * sizeof(int)); for (i = 0; i < hc->size; i++) { heap[i] = hc->size + i; heap[hc->size+i] = get_id_frequency(attr, i) + 1; /* add-one trick needed to avoid unsupported Huffman codes > 31 bits for very large corpora of ca. 2 billion words: theoretical optimal code length for hapax legomena in such corpora is ca. 31 bits, and the Huffman algorithm sometimes generates 32-bit codes; with add-one trick, the theoretical optimal code length is always <= 30 bits */ } /* ============================== PROTOCOL ============================== */ if (do_protocol > 0) fprintf(protocol, "Allocated heap with %d cells for %d items\n\n", hc->size * 2, hc->size); if (do_protocol > 2) print_heap(heap, hc->size, "After Initialization"); /* ============================== PROTOCOL ============================== */ /* ================================================== Phase 1 */ h = hc->size; /* * we address the heap in the following manner: when we start array * indices at 1, the left child is at 2i, and the right child is at * 2i+1. So we maintain this scheme and decrement just before * adressing the array. */ /* * construct the initial min-heap */ for (i = hc->size/2; i > 0; i--) { /* do: * bottom up, left to right, * for each root of each subtree, sift if necessary */ sift(heap, h, i); } /* ============================== PROTOCOL ============================== */ if (do_protocol > 2) { print_heap(heap, hc->size, "Initial Min-Heap"); fprintf(protocol, "\n"); } /* ============================== PROTOCOL ============================== */ /* ================================================== Phase 2 */ /* smallest item at top of heap now, remove the two smallest items * and sift, find second smallest by removing top and sifting, as * long as we have more than one root */ while (h > 1) { int pos[2]; for (i = 0; i < 2; i++) { /* remove topmost (i.e. smallest) item */ pos[i] = heap[0]; /* remove and sift, to reobtain heap integrity: move ``last'' * item to top of heap and sift */ heap[0] = heap[--h]; sift(heap, h, 1); } /* ============================== PROTOCOL ============================== */ if (do_protocol > 3) { fprintf(protocol, "Removed smallest item %d with freq %d\n", pos[0], heap[pos[0]]); fprintf(protocol, "Removed 2nd smallest item %d with freq %d\n", pos[1], heap[pos[1]]); } /* ============================== PROTOCOL ============================== */ /* * pos[0] and pos[1] contain pointers to the two smallest items * now. since h was decremented twice, h and h+1 are now empty and * become the accumulated freq of pos[i]. The individual * frequencies are not needed any more, so pointers to h+1 (the * acc freq) are stored there instead (tricky, since freq cell * becomes pointer cell). So, what happens here, is to include a * new element in the heap. */ heap[h] = h+1; heap[h+1] = heap[pos[0]] + heap[pos[1]]; /* accumulated freq */ heap[pos[0]] = heap[pos[1]] = h+1; /* pointers! */ h++; /* we put a new element into heap */ /* * now, swap it up until we reobtain heap integrity */ { register int parent, current; current = h; parent = current >> 1; while ((parent > 0) && (heap[heap[parent-1]] > heap[heap[current-1]])) { int tmp; tmp = heap[parent-1]; heap[parent-1] = heap[current-1]; heap[current-1] = tmp; current = parent; parent = current >> 1; } } } /* ============================== PROTOCOL ============================== */ if (do_protocol > 3) fprintf(protocol, "\n"); /* ============================== PROTOCOL ============================== */ /* ================================================== Phase 3 */ /* compute the code lengths. We don't have any freqs in heap any * more, only pointers to parents */ heap[0] = -1U; /* root has a depth of 0 */ heap[1] = 0; /* we trust in what they say on p. 345 */ for (i = 2; i < hc->size * 2; i++) heap[i] = heap[heap[i]]+1; /* collect the lengths */ sum_bits = 0L; for (i = 0; i < hc->size; i++) { int cl = heap[i+hc->size]; sum_bits += cl * get_id_frequency(attr, i); codelength[i] = cl; if (cl == 0) continue; if (cl > hc->max_codelen) hc->max_codelen = cl; if (cl < hc->min_codelen) hc->min_codelen = cl; hc->lcount[cl]++; } /* ============================== PROTOCOL ============================== */ if (do_protocol > 0) { fprintf(protocol, "Minimal code length: %3d\n", hc->min_codelen); fprintf(protocol, "Maximal code length: %3d\n", hc->max_codelen); fprintf(protocol, "Compressed code len: %10ld bits, %10ld (+1) bytes\n\n\n", sum_bits, sum_bits/8); } /* ============================== PROTOCOL ============================== */ if (hc->max_codelen >= MAXCODELEN) { Rprintf( "Error: Huffman codes too long (%d bits, current maximum is %d bits).\n", hc->max_codelen, MAXCODELEN-1); Rprintf( " Please contact the CWB development team for assistance.\n"); rcqp_receive_error(1); } if ((hc->max_codelen == 0) && (hc->min_codelen == 100)) { Rprintf( "Problem: No output generated -- no items?\n"); nr_codes = 0; } else { hc->min_code[hc->max_codelen] = 0; for (i = hc->max_codelen-1; i > 0; i--) hc->min_code[i] = (hc->min_code[i+1] + hc->lcount[i+1]) >> 1; hc->symindex[hc->min_codelen] = 0; for (i = hc->min_codelen+1; i <= hc->max_codelen; i++) hc->symindex[i] = hc->symindex[i-1] + hc->lcount[i-1]; /* ============================== PROTOCOL ============================== */ if (do_protocol > 0) { int sum_codes = 0; fprintf(protocol, " CL #codes MinCode SymIdx\n"); fprintf(protocol, "----------------------------------------\n"); for (i = hc->min_codelen; i <= hc->max_codelen; i++) { sum_codes += hc->lcount[i]; fprintf(protocol, "%3d %7d %7d %7d\n", i, hc->lcount[i], hc->min_code[i], hc->symindex[i]); } fprintf(protocol, "----------------------------------------\n"); fprintf(protocol, " %7d\n", sum_codes); } /* ============================== PROTOCOL ============================== */ for (i = 0; i < MAXCODELEN; i++) next_code[i] = hc->min_code[i]; /* ============================== PROTOCOL ============================== */ if (do_protocol > 1) { fprintf(protocol, "\n"); fprintf(protocol, " Item f(item) CL Bits Code, String\n"); fprintf(protocol, "------------------------------------" "------------------------------------\n"); } /* ============================== PROTOCOL ============================== */ /* compute and issue codes */ hc->symbols = heap + hc->size; for (i = 0; i < hc->size; i++) { /* we store the code for item i in heap[i] */ heap[i] = next_code[codelength[i]]; next_code[codelength[i]]++; /* ============================== PROTOCOL ============================== */ if (do_protocol > 1) { fprintf(protocol, "%7d %7d %3d %10d ", i, get_id_frequency(attr, i), codelength[i], codelength[i] * get_id_frequency(attr, i)); bprintf(heap[i], codelength[i], protocol); fprintf(protocol, " %7d %s\n", heap[i], get_string_of_id(attr, i)); } /* ============================== PROTOCOL ============================== */ /* and put the item itself in the second half of the table */ heap[hc->size+hc->symindex[codelength[i]]+issued_codes[codelength[i]]] = i; issued_codes[codelength[i]]++; } /* ============================== PROTOCOL ============================== */ if (do_protocol > 1) { fprintf(protocol, "------------------------------------" "------------------------------------\n"); } /* ============================== PROTOCOL ============================== */ /* The work itself -- encode the attribute data */ { char *path; char hcd_path[CL_MAX_LINE_LENGTH]; char huf_path[CL_MAX_LINE_LENGTH]; char sync_path[CL_MAX_LINE_LENGTH]; Component *corp; BFile bfd; FILE *sync; int cl, code, pos; corp = ensure_component(attr, CompCorpus, 0); assert(corp); if (fname) { path = fname; sprintf(hcd_path, "%s.hcd", path); sprintf(huf_path, "%s.huf", path); sprintf(sync_path, "%s.huf.syn", path); } else { path = component_full_name(attr, CompHuffSeq, NULL); assert(path); /* additonal condition (cderrno == CDA_OK) removed, since component_full_name doesn't (re)set cderrno */ strcpy(huf_path, path); path = component_full_name(attr, CompHuffCodes, NULL); assert(path); /* additonal condition (cderrno == CDA_OK) removed, since component_full_name doesn't (re)set cderrno */ strcpy(hcd_path, path); path = component_full_name(attr, CompHuffSync, NULL); assert(path); /* additonal condition (cderrno == CDA_OK) removed, since component_full_name doesn't (re)set cderrno */ strcpy(sync_path, path); } Rprintf("- writing code descriptor block to %s\n", hcd_path); if (!WriteHCD(hcd_path, hc)) { Rprintf( "ERROR: writing %s failed. Aborted.\n", hcd_path); rcqp_receive_error(1); } Rprintf("- writing compressed item sequence to %s\n", huf_path); if (!BFopen(huf_path, "w", &bfd)) { Rprintf( "ERROR: can't create file %s\n", huf_path); perror(huf_path); rcqp_receive_error(1); } Rprintf("- writing sync (every %d tokens) to %s\n", SYNCHRONIZATION, sync_path); if ((sync = fopen(sync_path, "w")) == NULL) { Rprintf( "ERROR: can't create file %s\n", sync_path); perror(sync_path); rcqp_receive_error(1); } for (i = 0; i < hc->length; i++) { /* SYNCHRONIZE */ if ((i % SYNCHRONIZATION) == 0) { if (i > 0) BFflush(&bfd); pos = BFposition(&bfd); NwriteInt(pos, sync); } id = cl_cpos2id(attr, i); if ((id < 0) || (cderrno != CDA_OK)) { cdperror("(aborting) cl_cpos2id() failed"); rcqp_receive_error(1); } else { assert((id >= 0) && (id < hc->size) && "Internal Error"); cl = codelength[id]; code = heap[id]; if (!BFwriteWord((unsigned int)code, cl, &bfd)) { Rprintf( "Error writing code for ID %d (%d, %d bits) at position %d. Aborted.\n", id, code, cl, i); rcqp_receive_error(1); } } } fclose(sync); BFclose(&bfd); } } free(codelength); free(heap); return 1; }
/**************************************************************************************** * Function name - heap_init * * Description - Performs initialization of an allocated heap. * * Input - *h - pointer to an allocated heap * initial_heap_size - initial size to start * increase_step - size to increase heap, when deciding to do so * comparator - user-function, that compares user-objects, kept by the heap * dumper - user-function, that dumps user-objects, kept by the heap * nodes_prealloc - number of hnodes to be pre-allocated at initialization * * Return Code/Output - On success - 0, on error -1 ****************************************************************************************/ int heap_init (heap*const h, size_t initial_heap_size, size_t increase_step, heap_cmp_func comparator, node_dump_func dumper, size_t nodes_prealloc) { size_t i = 0; if (!h) { fprintf(stderr, "%s -error: wrong input\n", __func__); return -1; } memset ((void*)h, 0, sizeof (*h)); if (! (h->heap = calloc (initial_heap_size, sizeof (hnode*))) ) { fprintf(stderr, "%s - error: alloc heap failed\n", __func__); return -1; } /* Alloc array of node-ids */ if (! (h->ids_arr = calloc (initial_heap_size, sizeof (long))) ) { fprintf(stderr, "%s - error: alloc of nodes-ids array failed\n", __func__); return -1; } /* Invalidate node-ids array indexes */ for (i = 0; i < initial_heap_size; i++) { h->ids_arr[i] = -1; /* non-valid id is -1 */ } h->max_heap_size = initial_heap_size; h->heap_increase_step = increase_step; if (0 == comparator) { fprintf(stderr, "%s - error: comparator function should be provided.\n", __func__); return -1; } else { h->fcomp = comparator; } h->fndump = dumper; /* If zero, we do not dump nodes. */ if (!(h->nodes_mpool = cl_calloc (1, sizeof (mpool)))) { fprintf(stderr, "%s - error: mpool allocation failed\n", __func__); return -1; } else { if (mpool_init (h->nodes_mpool, sizeof (hnode), nodes_prealloc) == -1) { fprintf(stderr, "%s - error: mpool_init () - failed\n", __func__); return -1; } } return 0; }