Example #1
0
/**
 * @brief
 * initialize our trust_anchors from ta_PEM
 */
int
ve_trust_init(void)
{
#ifdef TRUST_ANCHOR_STR
	br_x509_certificate *xcs;
#endif
	static int once = -1;
	size_t num;

	if (once >= 0)
		return (once);

	ve_utc_set(time(NULL));
#ifdef BUILD_UTC
	ve_utc_set(BUILD_UTC);		/* just in case */
#endif
	ve_error_set(NULL);		/* make sure it is empty */
#ifdef VE_PCR_SUPPORT
	ve_pcr_init();
#endif

#ifdef TRUST_ANCHOR_STR
	xcs = parse_certificates(__DECONST(unsigned char *, TRUST_ANCHOR_STR),
	    sizeof(TRUST_ANCHOR_STR), &num);
	if (xcs != NULL)
		num = ve_trust_anchors_add(xcs, num);
#endif
	once = (int) VEC_LEN(trust_anchors);

	return (once);
}
Example #2
0
int postings_remove_offsets(struct postings *post) {
    unsigned int i,
                 dgap_bytes,
                 f_dt_bytes,
                 bytes,
                 scanned,
                 scanned_bytes;
    struct postings_node *node;
    struct vec src,
               dst;
    unsigned long int dgap,
                      f_dt;

    /* FIXME: note, this should assert !post->update_required, but due to the
     * way that TREC documents are parsed (basically under the assumption that
     * another one is always coming) we end up with an empty document at the 
     * end */
    assert(!post->update);

    for (i = 0; i < post->tblsize; i++) {
        node = post->hash[i];
        while (node) {
            src.pos = dst.pos = node->vecmem;
            src.end = dst.end = node->vec.pos;

            /* remove offsets by decoding and re-encoding the vector.  Note 
             * that we can read and write from the same vector because the
             * writing always lags the reading. */
            while ((dgap_bytes = vec_vbyte_read(&src, &dgap))
              && (f_dt_bytes = vec_vbyte_read(&src, &f_dt))
              && ((scanned = vec_vbyte_scan(&src, f_dt, &scanned_bytes)) 
                == f_dt)) {

                bytes = vec_vbyte_write(&dst, dgap);
                assert(bytes);
                bytes = vec_vbyte_write(&dst, f_dt);
                assert(bytes);
            }

            assert(dgap_bytes == 0 && VEC_LEN(&src) == 0);
            node->vec.pos = src.pos;
            node = node->next;
        }
    }

    /* note that after this operation the postings are so completely broken 
     * that they're only good for dumping or clearing, although this is not
     * currently enforced. */

    return 1;
}
Example #3
0
static br_x509_pkey *
verify_signer(const char *certs,
    br_name_element *elts, size_t num_elts)
{
	br_x509_certificate *xcs;
	br_x509_pkey *pk;
	size_t num;

	pk = NULL;

	ve_trust_init();
	xcs = read_certificates(certs, &num);
	if (xcs == NULL) {
		ve_error_set("cannot read certificates\n");
		return (NULL);
	}

	/*
	 * Check if either
	 * 1. There is a direct match between cert from forbidden_anchors
	 * and a cert from chain.
	 * 2. CA that signed the chain is found in forbidden_anchors.
	 */
	if (VEC_LEN(forbidden_anchors) > 0)
		pk = verify_signer_xcs(xcs, num, elts, num_elts, &forbidden_anchors);
	if (pk != NULL) {
		ve_error_set("Certificate is on forbidden list\n");
		xfreepkey(pk);
		pk = NULL;
		goto out;
	}

	pk = verify_signer_xcs(xcs, num, elts, num_elts, &trust_anchors);
	if (pk == NULL)
		goto out;

	/*
	 * Check if hash of tbs part of any certificate in chain
	 * is on the forbidden list.
	 */
	if (check_forbidden_digests(xcs, num)) {
		ve_error_set("Certificate hash is on forbidden list\n");
		xfreepkey(pk);
		pk = NULL;
	}
out:
	free_certificates(xcs, num);
	return (pk);
}
Example #4
0
static enum search_ret thresh_decode(struct index *idx, struct query *query,
  unsigned int qterm, unsigned long int docno, 
  struct search_metric_results *results, 
  struct search_list_src *src, unsigned int postings, 
  int opts, struct index_search_opt *opt) {
    struct search_acc_cons *acc = results->acc,
                           **prevptr = &results->acc,
                           dummy;
    unsigned long int f_dt,           /* number of offsets for this document */
                      docno_d;        /* d-gap */

    /* initial number of accumulators */
    unsigned int initial_accs = results->accs,

                 decoded = 0,         /* number of postings decoded */
                 thresh,              /* current discrete threshold */
                 rethresh,            /* distance to recalculation of the 
                                       * threshold */
                 rethresh_dist,
                 bytes,
                 step,
                 missed = 0,        /* number of list entries that didn't match 
                                     * an accumulator */
                 hit = 0;           /* number of entries in both accs and list*/
 
    struct vec v = {NULL, NULL};
    enum search_ret ret;
    int infinite = 0;                 /* whether threshold is infinite */
    float cooc_rate;
    /* METRIC_DECL */

    const unsigned int N = docmap_entries(idx->map);
    double avg_D_terms;
    float w_t;
    float r_dt;

    float r_qt = (((opt->u.okapi_k3.k3) + 1) * (query->term[qterm].f_qt)) / ((opt->u.okapi_k3.k3) + (query->term[qterm].f_qt));
    if (docmap_avg_words(idx->map, &avg_D_terms) != DOCMAP_OK) {
        return SEARCH_EINVAL;
    }


    /* METRIC_PER_CALL */
    w_t = (float) logf((N - (query->term[qterm].f_t) + 0.5F) / ((query->term[qterm].f_t) + 0.5F));
    /* fix for okapi bug, w_t shouldn't be 0 or negative. */
    if (w_t <= 0.0F) {
        /* use a very small increment instead */
        w_t = FLT_EPSILON;
    }
    
    


    rethresh_dist = rethresh = (postings + results->acc_limit - 1) 
      / results->acc_limit;

    if (results->v_t == FLT_MIN) {
        unsigned long int docno_copy = docno;

        /* this should be the first thresholded list, need to estimate 
         * threshold */
        assert(rethresh && rethresh < postings);
        thresh = 0;

        assert(rethresh < postings);
        while (rethresh) {
            while (rethresh && NEXT_DOC(&v, docno, f_dt)) {
                rethresh--;
                SCAN_OFFSETS(src, &v, f_dt);
                if (f_dt > thresh) {
                    thresh = f_dt;
                }
            }

            /* need to read more data, preserving bytes that we already have */
            if (rethresh && (ret = src->readlist(src, VEC_LEN(&v),
                (void **) &v.pos, &bytes)) == SEARCH_OK) {

                v.end = v.pos + bytes;
            } else if (rethresh) {
                assert(ret != SEARCH_FINISH);
                return ret;
            }
        }
        thresh--;

        acc = &dummy;
        acc->acc.docno = UINT_MAX;   /* shouldn't be used */
        acc->acc.weight = 0.0;
        f_dt = thresh;
        /* METRIC_CONTRIB */
        r_dt = ((((opt->u.okapi_k3.k1) + 1) * f_dt)       / ((opt->u.okapi_k3.k1) * ((1 - (opt->u.okapi_k3.b)) + (((opt->u.okapi_k3.b) * (((float) avg_D_terms))) / (float) avg_D_terms)) + f_dt));
        (acc->acc.weight) += r_dt * w_t * r_qt;

        results->v_t = acc->acc.weight;

        /* reset source/vector to start */
        v.pos = v.end = NULL;
        if ((ret = src->reset(src)) != SEARCH_OK) {
            return ret;
        }

        acc = *prevptr;
        docno = docno_copy;
        rethresh = rethresh_dist;
    } else {
        /* translate the existing v_t threshold to an f_dt */
        acc = &dummy;
        acc->acc.docno = UINT_MAX;   /* shouldn't be used */
        f_dt = 0;
        do {
            acc->acc.weight = 0.0;
            f_dt++;
            /* METRIC_CONTRIB */
            r_dt = ((((opt->u.okapi_k3.k1) + 1) * f_dt)       / ((opt->u.okapi_k3.k1) * ((1 - (opt->u.okapi_k3.b)) + (((opt->u.okapi_k3.b) * (((float) avg_D_terms))) / (float) avg_D_terms)) + f_dt));
            (acc->acc.weight) += r_dt * w_t * r_qt;

        } while (acc->acc.weight < results->v_t && f_dt < INF);
        thresh = f_dt; 
        acc = *prevptr;

        if (thresh == INF) {
            /* this is not a sensible term */
            infinite = 1;
            rethresh = postings + 1;
        }
    }

    /* set step to 1/2 of the threshold */
    step = (thresh + 1) / 2;
    step += !step; /* but don't let it become 0 */

    while (1) {
        while (NEXT_DOC(&v, docno, f_dt)) {
            SCAN_OFFSETS(src, &v, f_dt);
            decoded++;

            /* merge into accumulator list */
            while (acc && (docno > acc->acc.docno)) {
                /* perform threshold test */
                if (acc->acc.weight < results->v_t) {
                    /* remove this accumulator */
                    *prevptr = acc->next;
                    objalloc_free(results->alloc, acc);
                    acc = (*prevptr);
                    results->accs--;
                } else {
                    /* retain this accumulator */
                    prevptr = &acc->next;
                    acc = acc->next;
                }
            }

            if (acc && (docno == acc->acc.docno)) {
                /* METRIC_PER_DOC */
                r_dt = ((((opt->u.okapi_k3.k1) + 1) * f_dt)       / ((opt->u.okapi_k3.k1) * ((1 - (opt->u.okapi_k3.b)) + (((opt->u.okapi_k3.b) * (DOCMAP_GET_WORDS(idx->map, acc->acc.docno))) / (float) avg_D_terms)) + f_dt));
                (acc->acc.weight) += r_dt * w_t * r_qt;


                if (acc->acc.weight < results->v_t) {
                    /* remove this accumulator */
                    *prevptr = acc->next;
                    objalloc_free(results->alloc, acc);
                    acc = *prevptr;
                    results->accs--;
                } else {
                    /* go to next accumulator */
                    prevptr = &acc->next;
                    acc = acc->next;
                }
                hit++;
            } else {
                if (f_dt > thresh) {
                    struct search_acc_cons *newacc;
                    assert(!acc || docno < acc->acc.docno); 

                    if ((newacc = objalloc_malloc(results->alloc, 
                      sizeof(*newacc)))) {
                        newacc->acc.docno = docno;
                        newacc->acc.weight = 0.0;
                        newacc->next = acc;
                        acc = newacc;
                        /* note that we have to be careful around here to 
                         * assign newacc to acc before using PER_DOC, 
                         * otherwise we end up with nonsense in some 
                         * accumulators */
                        /* METRIC_PER_DOC */
                        r_dt = ((((opt->u.okapi_k3.k1) + 1) * f_dt)       / ((opt->u.okapi_k3.k1) * ((1 - (opt->u.okapi_k3.b)) + (((opt->u.okapi_k3.b) * (DOCMAP_GET_WORDS(idx->map, acc->acc.docno))) / (float) avg_D_terms)) + f_dt));
                        (acc->acc.weight) += r_dt * w_t * r_qt;

                        *prevptr = newacc;
                        results->accs++;
                    } else {
                        return SEARCH_ENOMEM;
                    }

                    /* go to next accumulator */
                    prevptr = &acc->next;
                    acc = acc->next;
                } else {
                    missed++;
                }
            }

            if (!--rethresh) {
                int estimate;
                unsigned int prev_thresh = thresh;

                estimate = (int) (results->accs 
                  + ((postings - decoded) 
                    * ((float) results->accs - initial_accs)) / decoded);

                if (estimate > TOLERANCE * results->acc_limit) {
                    thresh += step;
                } else if ((estimate < results->acc_limit / TOLERANCE) 
                  && thresh) {
                    if (thresh >= step) {
                        thresh -= step;
                    } else {
                        thresh = 0;
                    }
                }

                step = (step + 1) / 2;
                assert(step);

                /* note that we don't want to recalculate the threshold if it
                 * doesn't change because this involves re-discretising it */
                if (prev_thresh != thresh) {
                    /* recalculate contribution that corresponds to the new 
                     * threshold */
                    f_dt = thresh;
                    if (f_dt) {
                        acc = &dummy;
                        acc->acc.docno = UINT_MAX;   /* shouldn't be used */
                        acc->acc.weight = 0.0;
                        /* METRIC_CONTRIB */
                        r_dt = ((((opt->u.okapi_k3.k1) + 1) * f_dt)       / ((opt->u.okapi_k3.k1) * ((1 - (opt->u.okapi_k3.b)) + (((opt->u.okapi_k3.b) * (((float) avg_D_terms))) / (float) avg_D_terms)) + f_dt));
                        (acc->acc.weight) += r_dt * w_t * r_qt;

                        results->v_t = acc->acc.weight;
                        acc = *prevptr;
                    } else {
                        results->v_t = FLT_MIN;
                    }
                }

                rethresh_dist *= 2;
                rethresh = rethresh_dist;
            }
        }

        /* need to read more data, preserving bytes that we already have */
        if ((ret = src->readlist(src, VEC_LEN(&v),
            (void **) &v.pos, &bytes)) == SEARCH_OK) {

            v.end = v.pos + bytes;
        } else if (ret == SEARCH_FINISH) {
            /* finished, estimate total results count */
            assert(postings == decoded);

            results->total_results += (int) (results->accs - initial_accs);

            /* list entries now divide up into three portions:
             *   - matching an entry in the acc list (hit)
             *   - missed
             *   - added
             *
             * cooccurrance rate is the percentage of list items hit */
            cooc_rate = hit / (float) decoded;

            /* now have sampled co-occurrance rate, use this to estimate 
             * population co-occurrance rate (assuming unbiased sampling) 
             * and then number of results from unrestricted evaluation */
            assert(results->total_results >= results->accs);
            cooc_rate 
              *= (float) results->total_results / (float) results->accs; 
            assert(cooc_rate >= 0.0);
            if (cooc_rate > 1.0) {
                cooc_rate = 1.0;
            }

            /* add number of things we think would have been added from the
             * things that were missed */
            results->total_results += (1 - cooc_rate) * missed;

            /* note that the total results are not an estimate if either there
             * were no accumulators in the list when we started (in which case
             * missed records exactly the number, uh, missing from the
             * accumulators) or there were none missed, in which case the
             * accumulators have fully accounted for everything in this list.
             * In either case, the (1 - cooc_rate) * missed maths above handles
             * it exactly (modulo floating point errors of course). */
            if (initial_accs && missed) {
                results->estimated |= 1;
            }

            if (!VEC_LEN(&v)) {
                if (!infinite) {
                    /* continue threshold evaluation */
                    return SEARCH_OK;
                } else {
                    /* switch to AND processing */
                    return SEARCH_FINISH;
                }
            } else {
                return SEARCH_EINVAL;
            }
        } else {
            return ret;
        }
    }
}
Example #5
0
static enum search_ret and_decode(struct index *idx, struct query *query, 
  unsigned int qterm, unsigned long int docno, 
  struct search_metric_results *results, struct search_list_src *src,
  int opts, struct index_search_opt *opt) {
    struct search_acc_cons *acc = results->acc;
    unsigned long int f_dt,        /* number of offsets for this document */
                      docno_d;     /* d-gap */
    struct vec v = {NULL, NULL};
    unsigned int bytes,
                 missed = 0,       /* number of list entries that didn't match 
                                    * an accumulator */
                 hit = 0,          /* number of entries in both accs and list*/
                 decoded = 0;      /* number of list entries seen */
    enum search_ret ret;
    float cooc_rate;               /* co-occurrance rate for list entries and 
                                    * accumulators */
    /* METRIC_DECL */

    const unsigned int N = docmap_entries(idx->map);
    double avg_D_terms;
    float w_t;
    float r_dt;

    float r_qt = (((opt->u.okapi_k3.k3) + 1) * (query->term[qterm].f_qt)) / ((opt->u.okapi_k3.k3) + (query->term[qterm].f_qt));
    if (docmap_avg_words(idx->map, &avg_D_terms) != DOCMAP_OK) {
        return SEARCH_EINVAL;
    }


    /* METRIC_PER_CALL */
    w_t = (float) logf((N - (query->term[qterm].f_t) + 0.5F) / ((query->term[qterm].f_t) + 0.5F));
    /* fix for okapi bug, w_t shouldn't be 0 or negative. */
    if (w_t <= 0.0F) {
        /* use a very small increment instead */
        w_t = FLT_EPSILON;
    }
    
    


    while (1) {
        while (NEXT_DOC(&v, docno, f_dt)) {
            SCAN_OFFSETS(src, &v, f_dt);
            decoded++;

            /* merge into accumulator list */
            while (acc && (docno > acc->acc.docno)) {
                acc = acc->next;
            }

            if (acc && (docno == acc->acc.docno)) {
                /* METRIC_PER_DOC */
                r_dt = ((((opt->u.okapi_k3.k1) + 1) * f_dt)       / ((opt->u.okapi_k3.k1) * ((1 - (opt->u.okapi_k3.b)) + (((opt->u.okapi_k3.b) * (DOCMAP_GET_WORDS(idx->map, acc->acc.docno))) / (float) avg_D_terms)) + f_dt));
                (acc->acc.weight) += r_dt * w_t * r_qt;


                /* go to next accumulator */
                acc = acc->next;
                hit++;
            } else {
                missed++;
            }
        }

        /* need to read more data, preserving bytes that we already have */
        if ((ret = src->readlist(src, VEC_LEN(&v),
            (void **) &v.pos, &bytes)) == SEARCH_OK) {

            v.end = v.pos + bytes;
        } else if (ret == SEARCH_FINISH) {
            /* finished, estimate number of results */

            /* list entries now divide up into two portions:
             *   - matching an entry in the acc list (hit)
             *   - missed
             *
             * cooccurrance rate is the percentage of list items hit */
            assert(missed + hit == decoded);
            cooc_rate = hit / (float) decoded;

            /* now have sampled co-occurrance rate, use this to estimate 
             * population co-occurrance rate (assuming unbiased sampling) 
             * and then number of results from unrestricted evaluation */
            assert(results->total_results >= results->accs);
            cooc_rate 
              *= (float) results->total_results / (float) results->accs; 
            assert(cooc_rate >= 0.0);
            if (cooc_rate > 1.0) {
                cooc_rate = 1.0;
            }

            /* add number of things we think would have been added from the
             * things that were missed */
            results->total_results += (1 - cooc_rate) * missed;

            if (missed) {
                results->estimated |= 1;
            }

            if (!VEC_LEN(&v)) {
                return SEARCH_OK;
            } else {
                return SEARCH_EINVAL;
            }
        } else {
            return ret;
        }
    }
}
Example #6
0
static enum search_ret or_decode(struct index *idx, struct query *query, 
  unsigned int qterm, unsigned long int docno, 
  struct search_metric_results *results, struct search_list_src *src, 
  int opts, struct index_search_opt *opt) {
    struct search_acc_cons *acc = results->acc,
                           **prevptr = &results->acc;
    unsigned int accs_added = 0;   /* number of accumulators added */
    unsigned long int f_dt,        /* number of offsets for this document */
                      docno_d;     /* d-gap */
    unsigned int bytes;
    struct vec v = {NULL, NULL};
    enum search_ret ret;
    /* METRIC_DECL */

    const unsigned int N = docmap_entries(idx->map);
    double avg_D_terms;
    float w_t;
    float r_dt;

    float r_qt = (((opt->u.okapi_k3.k3) + 1) * (query->term[qterm].f_qt)) / ((opt->u.okapi_k3.k3) + (query->term[qterm].f_qt));
    if (docmap_avg_words(idx->map, &avg_D_terms) != DOCMAP_OK) {
        return SEARCH_EINVAL;
    }


    /* METRIC_PER_CALL */
    w_t = (float) logf((N - (query->term[qterm].f_t) + 0.5F) / ((query->term[qterm].f_t) + 0.5F));
    /* fix for okapi bug, w_t shouldn't be 0 or negative. */
    if (w_t <= 0.0F) {
        /* use a very small increment instead */
        w_t = FLT_EPSILON;
    }
    
    


    while (1) {
        while (NEXT_DOC(&v, docno, f_dt)) {
            SCAN_OFFSETS(src, &v, f_dt);

            /* merge into accumulator list */
            while (acc && (docno > acc->acc.docno)) {
                prevptr = &acc->next;
                acc = acc->next;
            }

            if (acc && (docno == acc->acc.docno)) {
                /* METRIC_PER_DOC */
                r_dt = ((((opt->u.okapi_k3.k1) + 1) * f_dt)       / ((opt->u.okapi_k3.k1) * ((1 - (opt->u.okapi_k3.b)) + (((opt->u.okapi_k3.b) * (DOCMAP_GET_WORDS(idx->map, acc->acc.docno))) / (float) avg_D_terms)) + f_dt));
                (acc->acc.weight) += r_dt * w_t * r_qt;

            } else {
                struct search_acc_cons *newacc;
                assert(!acc || docno < acc->acc.docno); 

                /* allocate a new accumulator (we have reserved allocators
                 * earlier, so this should never fail) */
                newacc = objalloc_malloc(results->alloc, sizeof(*newacc));
                assert(newacc);
                newacc->next = acc;
                acc = newacc;
                acc->acc.docno = docno;
                acc->acc.weight = 0.0;
                /* METRIC_PER_DOC */
                r_dt = ((((opt->u.okapi_k3.k1) + 1) * f_dt)       / ((opt->u.okapi_k3.k1) * ((1 - (opt->u.okapi_k3.b)) + (((opt->u.okapi_k3.b) * (DOCMAP_GET_WORDS(idx->map, acc->acc.docno))) / (float) avg_D_terms)) + f_dt));
                (acc->acc.weight) += r_dt * w_t * r_qt;

                *prevptr = newacc;
                accs_added++;
            }
            assert(acc);

            /* go to next accumulator */
            prevptr = &acc->next;
            acc = acc->next;
        }

        /* need to read more data, preserving bytes that we already have */
        if ((ret = src->readlist(src, VEC_LEN(&v),
            (void **) &v.pos, &bytes)) == SEARCH_OK) {

            v.end = v.pos + bytes;
        } else if (ret == SEARCH_FINISH) {
            /* finished, update number of accumulators */
            results->accs += accs_added;
            results->total_results += accs_added;

            if (!VEC_LEN(&v)) {
                return SEARCH_OK;
            } else {
                return SEARCH_EINVAL;
            }
        } else {
            return ret;
        }
    }
}
Example #7
0
static enum search_ret and_decode(struct index *idx, struct query *query, 
  unsigned int qterm, unsigned long int docno, 
  struct search_metric_results *results, struct search_list_src *src,
  int opts, struct index_search_opt *opt) {
    struct search_acc_cons *acc = results->acc;
    unsigned long int f_dt,        /* number of offsets for this document */
                      docno_d;     /* d-gap */
    struct vec v = {NULL, NULL};
    unsigned int bytes,
                 missed = 0,       /* number of list entries that didn't match 
                                    * an accumulator */
                 hit = 0,          /* number of entries in both accs and list*/
                 decoded = 0;      /* number of list entries seen */
    enum search_ret ret;
    float cooc_rate;               /* co-occurrance rate for list entries and 
                                    * accumulators */
    /* METRIC_DECL */


    /* METRIC_PER_CALL */


    while (1) {
        while (NEXT_DOC(&v, docno, f_dt)) {
            SCAN_OFFSETS(src, &v, f_dt);
            decoded++;

            /* merge into accumulator list */
            while (acc && (docno > acc->acc.docno)) {
                acc = acc->next;
            }

            if (acc && (docno == acc->acc.docno)) {
                /* METRIC_PER_DOC */
                (acc->acc.weight) += (1 + (float) logf((query->term[qterm].f_qt))) * (1 + (float) logf(f_dt));


                /* go to next accumulator */
                acc = acc->next;
                hit++;
            } else {
                missed++;
            }
        }

        /* need to read more data, preserving bytes that we already have */
        if ((ret = src->readlist(src, VEC_LEN(&v),
            (void **) &v.pos, &bytes)) == SEARCH_OK) {

            v.end = v.pos + bytes;
        } else if (ret == SEARCH_FINISH) {
            /* finished, estimate number of results */

            /* list entries now divide up into two portions:
             *   - matching an entry in the acc list (hit)
             *   - missed
             *
             * cooccurrance rate is the percentage of list items hit */
            assert(missed + hit == decoded);
            cooc_rate = hit / (float) decoded;

            /* now have sampled co-occurrance rate, use this to estimate 
             * population co-occurrance rate (assuming unbiased sampling) 
             * and then number of results from unrestricted evaluation */
            assert(results->total_results >= results->accs);
            cooc_rate 
              *= (float) results->total_results / (float) results->accs; 
            assert(cooc_rate >= 0.0);
            if (cooc_rate > 1.0) {
                cooc_rate = 1.0;
            }

            /* add number of things we think would have been added from the
             * things that were missed */
            results->total_results += (1 - cooc_rate) * missed;

            if (missed) {
                results->estimated |= 1;
            }

            if (!VEC_LEN(&v)) {
                return SEARCH_OK;
            } else {
                return SEARCH_EINVAL;
            }
        } else {
            return ret;
        }
    }
}
Example #8
0
static enum search_ret or_decode(struct index *idx, struct query *query, 
  unsigned int qterm, unsigned long int docno, 
  struct search_metric_results *results, struct search_list_src *src, 
  int opts, struct index_search_opt *opt) {
    struct search_acc_cons *acc = results->acc,
                           **prevptr = &results->acc;
    unsigned int accs_added = 0;   /* number of accumulators added */
    unsigned long int f_dt,        /* number of offsets for this document */
                      docno_d;     /* d-gap */
    unsigned int bytes;
    struct vec v = {NULL, NULL};
    enum search_ret ret;
    /* METRIC_DECL */


    /* METRIC_PER_CALL */


    while (1) {
        while (NEXT_DOC(&v, docno, f_dt)) {
            SCAN_OFFSETS(src, &v, f_dt);

            /* merge into accumulator list */
            while (acc && (docno > acc->acc.docno)) {
                prevptr = &acc->next;
                acc = acc->next;
            }

            if (acc && (docno == acc->acc.docno)) {
                /* METRIC_PER_DOC */
                (acc->acc.weight) += (1 + (float) logf((query->term[qterm].f_qt))) * (1 + (float) logf(f_dt));

            } else {
                struct search_acc_cons *newacc;
                assert(!acc || docno < acc->acc.docno); 

                /* allocate a new accumulator (we have reserved allocators
                 * earlier, so this should never fail) */
                newacc = objalloc_malloc(results->alloc, sizeof(*newacc));
                assert(newacc);
                newacc->next = acc;
                acc = newacc;
                acc->acc.docno = docno;
                acc->acc.weight = 0.0;
                /* METRIC_PER_DOC */
                (acc->acc.weight) += (1 + (float) logf((query->term[qterm].f_qt))) * (1 + (float) logf(f_dt));

                *prevptr = newacc;
                accs_added++;
            }
            assert(acc);

            /* go to next accumulator */
            prevptr = &acc->next;
            acc = acc->next;
        }

        /* need to read more data, preserving bytes that we already have */
        if ((ret = src->readlist(src, VEC_LEN(&v),
            (void **) &v.pos, &bytes)) == SEARCH_OK) {

            v.end = v.pos + bytes;
        } else if (ret == SEARCH_FINISH) {
            /* finished, update number of accumulators */
            results->accs += accs_added;
            results->total_results += accs_added;

            if (!VEC_LEN(&v)) {
                return SEARCH_OK;
            } else {
                return SEARCH_EINVAL;
            }
        } else {
            return ret;
        }
    }
}
Example #9
0
enum vocab_ret vocab_decode(struct vocab_vector *vocab, struct vec *v) {
    unsigned long int tmp;
    unsigned int bytes = 0,
                 ret;
    unsigned char byte;

    VALGRIND_CHECK_WRITABLE(vocab, sizeof(*vocab));

    /* if debugging, clear the vocab vector first */
    assert((memset(vocab, 0, sizeof(*vocab)), 1));

    /* check that memory can be accessed, then mark vocab entry as 
     * uninitialised */
    VALGRIND_MAKE_WRITABLE(vocab, sizeof(*vocab));
    VALGRIND_CHECK_READABLE(v->pos, VEC_LEN(v));

    /* first, get first byte which contains attribute, location and type
     * indications */
    if (v->pos < v->end) {
        vec_byte_read(v, (char *) &byte, 1);
        bytes++;
        vocab->attr = byte & BIT_LMASK(2);
        byte >>= 2;
        vocab->location = byte & BIT_LMASK(2);
        byte >>= 2;
        vocab->type = byte;

        if (vocab->attr & VOCAB_ATTRIBUTES_PERLIST) {
            if ((ret = vec_vbyte_read(v, &tmp))) {
                vocab->attribute = (unsigned int) tmp;
                bytes += ret;
            } else {
                if (((unsigned int) VEC_LEN(v)) <= vec_vbyte_len(UINT_MAX)) {
                    v->pos -= bytes;
                    return VOCAB_ENOSPC;
                } else {
                    v->pos -= bytes;
                    return VOCAB_EOVERFLOW;
                }
            }
        }

        /* get common header entries */
        if ((ret = vec_vbyte_read(v, &vocab->size))
          && (bytes += ret)
          && (ret = vec_vbyte_read(v, &vocab->header.doc.docs))
          && (bytes += ret)
          && (ret = vec_vbyte_read(v, &vocab->header.doc.occurs))
          && (bytes += ret)
          && (ret = vec_vbyte_read(v, &vocab->header.doc.last))
          && (bytes += ret)) {
            /* succeeded, do nothing */
        } else {
            if (((unsigned int) VEC_LEN(v)) <= vec_vbyte_len(UINT_MAX)) {
                v->pos -= bytes;
                return VOCAB_ENOSPC;
            } else {
                v->pos -= bytes;
                return VOCAB_EOVERFLOW;
            }
        }

        /* get specific header entries */
        switch (vocab->type) {
        case VOCAB_VTYPE_DOC:
        case VOCAB_VTYPE_DOCWP:
            /* ok, so i cheated a little and just read the common, uh, not
             * common ones above (they're not common because future vector 
             * types might not have them)... */
            break;

        case VOCAB_VTYPE_IMPACT:
            break;

        default: 
            v->pos -= bytes; 
            return VOCAB_EINVAL;
        }

        /* get location */
        switch (vocab->location) {
        case VOCAB_LOCATION_VOCAB:
            if (((unsigned int) VEC_LEN(v)) >= vocab->size) {
                /* note that we increment vector over in-vocab vector so that
                 * successive _decode calls will work as planned */
                vocab->loc.vocab.vec = v->pos;
                v->pos += vocab->size;
                bytes += vocab->size;
            } else {
                v->pos -= bytes; 
                return VOCAB_ENOSPC;
            }
            break;
       
        case VOCAB_LOCATION_FILE:
            if ((ret = vec_vbyte_read(v, &tmp))
              && ((vocab->loc.file.fileno = tmp), (bytes += ret))
              && (ret = vec_vbyte_read(v, &vocab->loc.file.offset))
              && (bytes += ret)
              && (ret = vec_vbyte_read(v, &tmp))
              && ((vocab->loc.file.capacity = tmp), (bytes += ret))) {
                /* succeeded, do nothing */
            } else {
                if (((unsigned int) VEC_LEN(v)) <= vec_vbyte_len(UINT_MAX)) {
                    v->pos -= bytes;
                    return VOCAB_ENOSPC;
                } else {
                    v->pos -= bytes;
                    return VOCAB_EOVERFLOW;
                }
            }
            break;

        default: 
            v->pos -= bytes;
            return VOCAB_EINVAL;
        }

        return VOCAB_OK;
    } else {
Example #10
0
/********************************************************
 * Search for an intersection between a nappe and a ray *
 *******************************************************/
BOOL hit_geo_nappe (GEO *Generic, RAY *Ray, HIT *Hit, PAIR *Bound, void *Info)
{
	GEO_NAPPE	*Geo;
	FCT		*Fct;
	PNT	        *Pnt, *PntA, *PntB;
	VECTOR		Normal;
	VECTOR		Point;
	REAL		a, b, c, u, v, uA, vA, uB, vB, Distance, Real;

  Geo = (GEO_NAPPE *) Generic;

  Fct = (FCT *) Info;
  Distance = VEC_DOT (Ray->Vector, Fct->Normal);
  if (ABS(Distance) < EPSILON)
    return (FALSE);
  Pnt = Geo->TabPnt + Fct->i;
  VEC_SUB (Point, Pnt->Point, Ray->Point);
  Distance = VEC_DOT (Point, Fct->Normal) / Distance ; /*-Epsilon est vire*/
  if (Distance < Bound->u || Distance > Bound->v)
    return (FALSE);
  VEC_LIN (Point, Ray->Point, Distance, Ray->Vector);

  if ((ABS(Fct->Normal.z) > ABS(Fct->Normal.x)) && (ABS(Fct->Normal.z) > ABS(Fct->Normal.y))) {
    u = Point.x - Pnt->Point.x; v = Point.y - Pnt->Point.y;
    PntA = Geo->TabPnt + Fct->j; PntB = Geo->TabPnt + Fct->k;
    uA = PntA->Point.x - Pnt->Point.x; vA = PntA->Point.y - Pnt->Point.y;
    uB = PntB->Point.x - Pnt->Point.x; vB = PntB->Point.y - Pnt->Point.y;
    a = uA*vB - vA*uB; b = (u*vB - v*uB)/a; c = (v*uA-u*vA) / a; a = 1.0-b-c;

    if (a > -EPSILON && b > -EPSILON && c > -EPSILON) {
      VEC_INTER (Normal, a, Pnt->Normal, b, PntA->Normal, c, PntB->Normal);
      Real = VEC_DOT (Ray->Vector, Normal);
      if (Real > 0.0)
	return (FALSE);
      if (Hit) {
        Real = VEC_LEN (Normal); VEC_UNIT (Normal, Real);
        Ray->Distance = Distance; Hit->Point = Point; Hit->Normal = Normal;
	xyz2uv_geo_nappe (Geo, Hit, Fct);
      }
      return (TRUE);
    }

    if (Fct->l == Fct->i) 
      return (FALSE);
    
    PntA = Geo->TabPnt + Fct->l;
    uA = PntA->Point.x - Pnt->Point.x; vA = PntA->Point.y - Pnt->Point.y;
    a = uA*vB - vA*uB; b = (u*vB - v*uB)/a; c = (v*uA - u*vA)/a; a = 1.0-b-c;

    if (a > -EPSILON && b > -EPSILON && c > -EPSILON) {
      VEC_INTER (Normal, a, Pnt->Normal, b, PntA->Normal, c, PntB->Normal);
      Real = VEC_DOT (Ray->Vector, Normal);
      if (Real > 0.0)
	return (FALSE);
      if (Hit) {
        Real = VEC_LEN (Normal); VEC_UNIT (Normal, Real);
        Ray->Distance = Distance; Hit->Point = Point; Hit->Normal = Normal;
	xyz2uv_geo_nappe (Geo, Hit, Fct);
      }
      return (TRUE);
    }
    return (FALSE);
  }

  else if (ABS(Fct->Normal.y) > ABS(Fct->Normal.x)) {
    u = Point.z - Pnt->Point.z; v = Point.x - Pnt->Point.x;
    PntA = Geo->TabPnt + Fct->j; PntB = Geo->TabPnt + Fct->k;
    uA = PntA->Point.z - Pnt->Point.z; vA = PntA->Point.x - Pnt->Point.x;
    uB = PntB->Point.z - Pnt->Point.z; vB = PntB->Point.x - Pnt->Point.x;
    a = uA*vB - vA*uB; b = (u*vB - v*uB)/a; c = (v*uA - u*vA)/a; a = 1.0-b-c;

    if (a > -EPSILON && b > -EPSILON && c > -EPSILON) {
      VEC_INTER (Normal, a, Pnt->Normal, b, PntA->Normal, c, PntB->Normal);
      Real = VEC_DOT (Ray->Vector, Normal);
      if (Real > 0.0)
	return (FALSE);
      if (Hit) {
        Real = VEC_LEN (Normal); VEC_UNIT (Normal, Real);
        Ray->Distance = Distance; Hit->Point = Point; Hit->Normal = Normal;
	xyz2uv_geo_nappe (Geo, Hit, Fct);
      }
      return (TRUE);
    }
    
    if (Fct->l == Fct->i) return (FALSE);

    PntA = Geo->TabPnt + Fct->l;
    uA = PntA->Point.z - Pnt->Point.z; vA = PntA->Point.x - Pnt->Point.x;
    a = uA*vB - vA*uB; b = (u*vB - v*uB)/a; c = (v*uA - u*vA)/a; a = 1.0-b-c;

    if (a > -EPSILON && b > -EPSILON && c > -EPSILON) {
      VEC_INTER (Normal, a, Pnt->Normal, b, PntA->Normal, c, PntB->Normal);
      Real = VEC_DOT (Ray->Vector, Normal);
      if (Real > 0.0)
	return (FALSE);
      if (Hit) {
        Real = VEC_LEN (Normal); VEC_UNIT (Normal, Real);
        Ray->Distance = Distance; Hit->Point = Point; Hit->Normal = Normal;
	xyz2uv_geo_nappe (Geo, Hit, Fct);
      }
      return (TRUE);
    }
    return (FALSE);
  }

  else if (ABS(Fct->Normal.x) > EPSILON) {
    u = Point.y - Pnt->Point.y; v = Point.z - Pnt->Point.z;
    PntA = Geo->TabPnt + Fct->j; PntB = Geo->TabPnt + Fct->k;
    uA = PntA->Point.y - Pnt->Point.y; vA = PntA->Point.z - Pnt->Point.z;
    uB = PntB->Point.y - Pnt->Point.y; vB = PntB->Point.z - Pnt->Point.z;
    a = uA*vB - vA*uB; b = (u*vB - v*uB)/a; c = (v*uA - u*vA)/a; a = 1.0-b-c;

    if (a > -EPSILON && b > -EPSILON && c > -EPSILON) {
      VEC_INTER (Normal, a, Pnt->Normal, b, PntA->Normal, c, PntB->Normal);
      Real = VEC_DOT (Ray->Vector, Normal);
      if (Real > 0.0)
	return (FALSE);
      if (Hit) {
        Real = VEC_LEN (Normal); VEC_UNIT (Normal, Real);
        Ray->Distance = Distance; Hit->Point = Point; Hit->Normal = Normal;
	xyz2uv_geo_nappe (Geo, Hit, Fct);
      }
      return (TRUE);
    }
    
    if (Fct->l == Fct->i) return (FALSE);

    PntA = Geo->TabPnt + Fct->l;
    uA = PntA->Point.y - Pnt->Point.y; vA = PntA->Point.z - Pnt->Point.z;
    a = uA*vB - vA*uB; b = (u*vB - v*uB)/a; c = (v*uA - u*vA)/a; a = 1.0-b-c;

    if (a > -EPSILON && b > -EPSILON && c > -EPSILON) {
      VEC_INTER (Normal, a, Pnt->Normal, b, PntA->Normal, c, PntB->Normal);
      Real = VEC_DOT (Ray->Vector, Normal);
      if (Real > 0.0)
	return (FALSE);
      if (Hit) {
        Real = VEC_LEN (Normal); VEC_UNIT (Normal, Real);
        Ray->Distance = Distance; Hit->Point = Point; Hit->Normal = Normal;
	xyz2uv_geo_nappe (Geo, Hit, Fct);
      }
      return (TRUE);
    }
    return (FALSE);
  }
  return (FALSE);
}
Example #11
0
enum search_ret impact_ord_eval(struct index *idx, struct query *query, 
  struct chash *accumulators, unsigned int acc_limit, struct alloc *alloc, 
  unsigned int mem) {
    double norm_B;
    unsigned int i,
                 terms = 0,
                 blockfine,
                 blocks_read,
                 postings_read = 0,
                 postings = 0,
                 bytes = 0,
                 bytes_read = 0;
    struct term_data *term,
                     *largest;
    struct disksrc *dsrc;

    if (query->terms == 0) {
        /* no terms to process */
        return SEARCH_OK;
    /* allocate space for array */
    } else if (!(term = malloc(sizeof(*term) * query->terms))) {
        return SEARCH_ENOMEM;
    }

    /* sort by selectivity (by inverse t_f) */
    qsort(query->term, query->terms, sizeof(*query->term), f_t_cmp);

    norm_B = pow(idx->impact_stats.w_qt_max / idx->impact_stats.w_qt_min,
        idx->impact_stats.w_qt_min 
          / (idx->impact_stats.w_qt_max - idx->impact_stats.w_qt_min));

    /* initialise data for each query term */
    for (i = 0; i < query->terms; i++) {
        unsigned int termfine;
        double w_qt;

        /* initialise src/vec for term */
        term[i].v.pos = term[i].v.end = NULL;
        term[i].src = NULL;

        w_qt = (1 + log(query->term[i].f_qt)) *
          log(1 + (idx->impact_stats.avg_f_t / query->term[i].f_t));
        w_qt = impact_normalise(w_qt, norm_B, 
            idx->impact_stats.slope, idx->impact_stats.w_qt_max, 
            idx->impact_stats.w_qt_min);
        term[i].w_qt = impact_quantise(w_qt, 
            idx->impact_stats.quant_bits, idx->impact_stats.w_qt_max, 
            idx->impact_stats.w_qt_min);

        /* apply term fine to term impact */
        termfine = (i < 2) ? 0 : i - 2;
        if (termfine < term[i].w_qt) {
            term[i].w_qt -= termfine;
            /* initialise to highest impact, so we'll select and initialise this
             * term before real processing */
            term[i].impact = INT_MAX;
            terms++;
        } else {
            /* we won't use this term */
            term[i].w_qt = 0;
            term[i].impact = 0;
        }
        term[i].blocksize = 0;

        /* XXX */
        postings += query->term[i].f_t;
        bytes += query->term[i].term.vocab.size;
    }

    /* get sources for each term (do this in a seperate loop so we've already
     * excluded lists that we won't use) */
    for (i = 0; i < terms; i++) {
        unsigned int memsize = mem / (terms - i);

        if (memsize > query->term[i].term.vocab.size) {
            memsize = query->term[i].term.vocab.size;
        }

        if (!(term[i].src 
          = search_term_src(idx, &query->term[i].term, alloc, memsize))) {
            source_delete(term, terms);
            free(term);
            return SEARCH_EINVAL;
        }

        mem -= memsize;
    }

    blockfine = blocks_read = 0;
    heap_heapify(term, terms, sizeof(*term), term_data_cmp);

    do {
        largest = heap_pop(term, &terms, sizeof(*term), term_data_cmp);

        if (largest && (largest->impact > blockfine)) {
            postings_read += largest->blocksize;
            if (chash_size(accumulators) < acc_limit) {
                /* reserve enough memory for accumulators and decode */
                if (chash_reserve(accumulators, largest->blocksize) 
                  >= largest->blocksize) {
                    impact_decode_block(accumulators, largest, blockfine);
                } else {
                    assert(!CRASH); ERROR("impact_ord_eval()");
                    source_delete(term, terms);
                    free(term);
                    return SEARCH_EINVAL;
                }
            } else {
                impact_decode_block_and(accumulators, largest, blockfine);
            }

            if (VEC_LEN(&largest->v) < 2 * VEC_VBYTE_MAX) {
                /* need to read more data */
                unsigned int bytes;
                enum search_ret sret;

                if ((sret 
                  = largest->src->readlist(largest->src, VEC_LEN(&largest->v), 
                    (void **) &largest->v.pos, &bytes)) == SEARCH_OK) {

                    /* read succeeded */
                    largest->v.end = largest->v.pos + bytes;
                } else if (sret == SEARCH_FINISH) {
                    if (VEC_LEN(&largest->v) || largest->blocksize) {
                        /* didn't finish properly */
                        assert(!CRASH); ERROR("impact_ord_eval()");
                        source_delete(term, terms);
                        free(term);
                        return SEARCH_EINVAL;
                    }
                    /* otherwise it will be finished below */
                } else {
                    assert(!CRASH); ERROR("impact_ord_eval()");
                    source_delete(term, terms);
                    free(term);
                    return sret;
                }
            }

            if (!largest->blocksize) {
                /* need to read the start of the next block */
                unsigned long int tmp_bsize,
                                  tmp_impact;

                if (vec_vbyte_read(&largest->v, &tmp_bsize)
                  && (vec_vbyte_read(&largest->v, &tmp_impact) 
                    /* second read failed, rewind past first vbyte */
                    || ((largest->v.pos -= vec_vbyte_len(tmp_bsize)), 0))) {

                    blocks_read++;
                    if (blocks_read > terms) {
                        blockfine++;
                    }

                    largest->blocksize = tmp_bsize;
                    largest->impact = (tmp_impact + 1) * largest->w_qt;
                    largest->docno = -1;
                    heap_insert(term, &terms, sizeof(*term), term_data_cmp, 
                      largest);
                } else if (!VEC_LEN(&largest->v)) {
                    /* finished, don't put back on the heap */
                    dsrc = (void *) largest->src; bytes_read += dsrc->pos;
                    largest->src->delet(largest->src);
                    largest->src = NULL;
                } else if (largest->impact != INT_MAX) {
                    /* ensure that this vector is chosen next, as we need the
                     * next impact score */
                    largest->impact = INT_MAX;
                    assert(largest->blocksize == 0);
                    heap_insert(term, &terms, sizeof(*term), term_data_cmp, 
                      largest);
                } else {
                    /* huh? */
                    assert(!CRASH); ERROR("impact_ord_eval()");
                    source_delete(term, terms);
                    free(term);
                    return SEARCH_EINVAL;
                }
            } else {
                heap_insert(term, &terms, sizeof(*term), term_data_cmp, 
                  largest);
            }
        }
    } while (largest && (largest->impact > blockfine));

    for (i = 0; i < terms; i++) {
        dsrc = (void *) term[i].src; bytes_read += dsrc->pos;
    }

    if (largest) {
        largest->src->delet(largest->src);
        largest->src = NULL;
    }

    /* end of ranking */
    source_delete(term, terms);
    free(term);
    return SEARCH_OK;
}
Example #12
0
int postings_dump(struct postings* post, void *buf, unsigned int bufsize, 
  int fd) {
    unsigned int i,
                 j,
                 stopped = 0,
                 pos,                     /* position in current vector */
                 len,                     /* length of current term */
                 wlen,                    /* length of last write */
                 dbufsz;                  /* size of dbuf */
    struct postings_node* node,           /* current node */
                        ** arr;           /* array of postings nodes */
    char *dbuf,                           /* dumping buffer */
         *dbufpos;                        /* position in dumping buffer */
    struct vec v;

    /* FIXME: note, this should assert !post->update_required, but due to the
     * way that TREC documents are parsed (basically under the assumption that
     * another one is always coming) we end up with an empty document at the 
     * end */
    assert(!post->update);

    /* XXX: hack, allocate a big array of postings and then sort them by term.
     * This is so that postings go out sorted by term instead of hash value. */
    if (!(arr = malloc(sizeof(*arr) * post->dterms))) {
        return 0;
    }

    /* the provided buffer is used to dump the postings */
    dbuf = buf;
    dbufsz = bufsize;

    /* copy nodes into array */
    j = 0;
    for (i = 0; i < post->tblsize; i++) {
        node = post->hash[i];
        while (node) {
            /* perform stopping.  Ideally we'd like to stop terms before
             * stemming them, and before they make their way into the postings.
             * However, this means that we have to call the stoplist
             * once-per-term, which makes it a big bottleneck.  We stop here to
             * minimise the performance impact on the most common case, no
             * stopping.  Note that if we really wanted to make stopping 
             * (when actually used) go faster, it would be better to have a
             * sorted stoplist as well, and merge against that rather than 
             * doing one hash lookup per term. */
            if (!post->stop || stop_stop(post->stop, node->term) == STOP_OK) {
                arr[j++] = node;
            } else {
                assert(++stopped);  /* count stopped terms while debugging */
            }
            node = node->next;
        }

        /* reset hash node (memory free'd below) */
        post->hash[i] = NULL;
    }

    assert(j + stopped == post->dterms);
    stopped = 0;

    qsort(arr, post->dterms, sizeof(*arr), post_cmp);

    v.pos = dbuf;
    v.end = dbuf + dbufsz;
    for (i = 0; i < j;) {
        while ((i < post->dterms) 
          && ((len = str_len(arr[i]->term)), 1)
          && (((unsigned int) VEC_LEN(&v)) >= vec_vbyte_len(len) + len 
            + vec_vbyte_len(arr[i]->docs) + vec_vbyte_len(arr[i]->occurs) 
            + vec_vbyte_len(arr[i]->last_docno) 
            + vec_vbyte_len(arr[i]->vec.pos - arr[i]->vecmem))) {

            unsigned int bytes;

            assert(len);
            assert(dbufsz > vec_vbyte_len(len) + len 
              + vec_vbyte_len(arr[i]->docs) + vec_vbyte_len(arr[i]->occurs) 
              + vec_vbyte_len(arr[i]->last_docno)
              + vec_vbyte_len(arr[i]->vec.pos - arr[i]->vecmem));

            /* have enough space, copy stuff into buffer */
            bytes = vec_vbyte_write(&v, len);
            assert(bytes);
            bytes = vec_byte_write(&v, arr[i]->term, len);
            assert(bytes == len);
            bytes = vec_vbyte_write(&v, arr[i]->docs);
            assert(bytes);
            bytes = vec_vbyte_write(&v, arr[i]->occurs);
            assert(bytes);
            bytes = vec_vbyte_write(&v, arr[i]->last_docno);
            assert(bytes);
            bytes = vec_vbyte_write(&v, arr[i]->vec.pos - arr[i]->vecmem);
            assert(bytes);

            /* copy the inverted list in */
            pos = 0;
            while (((unsigned int) VEC_LEN(&v)) 
              < (arr[i]->vec.pos - arr[i]->vecmem) - pos) {

                /* copy last bit we can in */
                pos += vec_byte_write(&v, arr[i]->vecmem + pos, VEC_LEN(&v));

                /* write the buffer out */
                len = v.pos - dbuf;
                dbufpos = dbuf;

                while (len && ((wlen = write(fd, dbufpos, len)) >= 0)) {
                    len -= wlen;
                    dbufpos += wlen;
                }

                if (len) {
                    free(arr);
                    return 0;
                }

                v.pos = dbuf;
                v.end = dbuf + dbufsz;
            }

            /* copy last bit of inverted list in */
            pos += vec_byte_write(&v, arr[i]->vecmem + pos, 
              (arr[i]->vec.pos - arr[i]->vecmem) - pos);
            assert(arr[i]->vecmem + pos == arr[i]->vec.pos);

            free(arr[i]->vecmem);

            i++;
        }

        /* write the buffer out */
        len = v.pos - dbuf;
        dbufpos = dbuf;

        while (len && ((wlen = write(fd, dbufpos, len)) >= 0)) {
            len -= wlen;
            dbufpos += wlen;
        }

        if (len) {
            free(arr);
            return 0;
        }

        v.pos = dbuf;
        v.end = dbuf + dbufsz;
    }

    /* reinitialise hash table */
    post->size = 0;
    post->dterms = 0;
    post->terms = 0;
    post->docs = 0;
    poolalloc_clear(post->string_mem);
    objalloc_clear(post->node_mem);
 
    free(arr);

    return 1;
}
Example #13
0
int postings_update(struct postings* post, struct postings_docstats* stats) {
    struct postings_node* node = post->update;
    unsigned int terms = 0,
                 dterms = 0;
    float weight = 0,
           fdt_log;

    while (node) {
        /* calculate document weight */
        fdt_log = (float) logf((float) node->offsets);
        weight += (1 + fdt_log) * (1 + fdt_log);

        assert(node->offsets);
        assert(node->last_count != node->vecmem);
        assert(node->last_count > node->vecmem);
        assert((post->docno > node->last_docno) || (node->last_docno == -1));

        /* check if we have to correct the count that we wrote before (1) */
        if (node->offsets > 1) {
            unsigned int len = vec_vbyte_len(node->offsets);
            struct vec offsetvec;
 
            /* check if the count will expand in size */
            if (len > 1) {
                /* check if we need to resize the vector */
                if ((VEC_LEN(&node->vec) < len - 1) 
                  /* resize it */
                  && !postings_node_expand(node)) {
                    return 0;
                }

                /* move offsets up to make room for expanded count */
                assert(node->vec.pos > node->last_count);
                assert(node->vecmem < node->last_count);
                memmove(node->last_count + len, node->last_count + 1, 
                  node->vec.pos - (node->last_count + 1));
                node->vec.pos += len - 1;
                post->size += len - 1;
            }

            /* point offsetvec to the correct location to write the correct
             * count */
            offsetvec.pos = node->last_count;
            offsetvec.end = node->vec.end;

            len = vec_vbyte_write(&offsetvec, node->offsets);
            assert(len);
        }

        /* watch for overflow */
        assert(node->occurs + node->offsets > node->occurs);  
        node->occurs += node->offsets;
        terms += node->offsets;
        dterms++;
        node->offsets = 0;

        node->last_docno = post->docno;
        node->last_offset = -1;
        node->last_count = node->vecmem;
        node->docs++;

        node = node->update;
    }

    post->update = NULL;                /* reset update list */
    post->update_required = 0;
    stats->weight = (float) sqrtf(weight);
    stats->terms = terms;
    stats->distinct = dterms;
    return 1;
}
Example #14
0
/*
 * Check if digest of one of the certificates from verified chain
 * is present in the forbidden database.
 * Since UEFI allows to store three types of digests
 * all of them have to be checked separately.
 */
static int
check_forbidden_digests(br_x509_certificate *xcs, size_t num)
{
	unsigned char sha256_digest[br_sha256_SIZE];
	unsigned char sha384_digest[br_sha384_SIZE];
	unsigned char sha512_digest[br_sha512_SIZE];
	void *tbs;
	hash_data *digest;
	br_hash_compat_context ctx;
	const br_hash_class *md;
	size_t tbs_len, i;
	int have_sha256, have_sha384, have_sha512;

	if (VEC_LEN(forbidden_digests) == 0)
		return (0);

	/*
	 * Iterate through certificates, extract their To-Be-Signed section,
	 * and compare its digest against the ones in the forbidden database.
	 */
	while (num--) {
		tbs = X509_to_tbs(xcs[num].data, &tbs_len);
		if (tbs == NULL) {
			printf("Failed to obtain TBS part of certificate\n");
			return (1);
		}
		have_sha256 = have_sha384 = have_sha512 = 0;

		for (i = 0; i < VEC_LEN(forbidden_digests); i++) {
			digest = &VEC_ELT(forbidden_digests, i);
			switch (digest->hash_size) {
			case br_sha256_SIZE:
				if (!have_sha256) {
					have_sha256 = 1;
					md = &br_sha256_vtable;
					md->init(&ctx.vtable);
					md->update(&ctx.vtable, tbs, tbs_len);
					md->out(&ctx.vtable, sha256_digest);
				}
				if (!memcmp(sha256_digest,
					digest->data,
					br_sha256_SIZE))
					return (1);

				break;
			case br_sha384_SIZE:
				if (!have_sha384) {
					have_sha384 = 1;
					md = &br_sha384_vtable;
					md->init(&ctx.vtable);
					md->update(&ctx.vtable, tbs, tbs_len);
					md->out(&ctx.vtable, sha384_digest);
				}
				if (!memcmp(sha384_digest,
					digest->data,
					br_sha384_SIZE))
					return (1);

				break;
			case br_sha512_SIZE:
				if (!have_sha512) {
					have_sha512 = 1;
					md = &br_sha512_vtable;
					md->init(&ctx.vtable);
					md->update(&ctx.vtable, tbs, tbs_len);
					md->out(&ctx.vtable, sha512_digest);
				}
				if (!memcmp(sha512_digest,
					digest->data,
					br_sha512_SIZE))
					return (1);

				break;
			}
		}
	}

	return (0);
}
Example #15
0
/**
 * if we can verify the certificate chain in "certs",
 * return the public key and if "xcp" is !NULL the associated
 * certificate
 */
static br_x509_pkey *
verify_signer_xcs(br_x509_certificate *xcs,
    size_t num,
    br_name_element *elts, size_t num_elts,
    anchor_list *anchors)
{
	br_x509_minimal_context mc;
	br_x509_certificate *xc;
	size_t u;
	cert_list chain = VEC_INIT;
	const br_x509_pkey *tpk;
	br_x509_pkey *pk;
	unsigned int usages;
	int err;

	DEBUG_PRINTF(5, ("verify_signer: %zu certs in chain\n", num));
	VEC_ADDMANY(chain, xcs, num);
	if (VEC_LEN(chain) == 0) {
		ve_error_set("ERROR: no/invalid certificate chain\n");
		return (NULL);
	}

	DEBUG_PRINTF(5, ("verify_signer: %zu trust anchors\n",
		VEC_LEN(*anchors)));

	br_x509_minimal_init(&mc, &br_sha256_vtable,
	    &VEC_ELT(*anchors, 0),
	    VEC_LEN(*anchors));
#ifdef VE_ECDSA_SUPPORT
	br_x509_minimal_set_ecdsa(&mc,
	    &br_ec_prime_i31, &br_ecdsa_i31_vrfy_asn1);
#endif
#ifdef VE_RSA_SUPPORT
	br_x509_minimal_set_rsa(&mc, &br_rsa_i31_pkcs1_vrfy);
#endif
#if defined(UNIT_TEST) && defined(VE_DEPRECATED_RSA_SHA1_SUPPORT)
	/* This is deprecated! do not enable unless you absoultely have to */
	br_x509_minimal_set_hash(&mc, br_sha1_ID, &br_sha1_vtable);
#endif
	br_x509_minimal_set_hash(&mc, br_sha256_ID, &br_sha256_vtable);
#ifdef VE_SHA384_SUPPORT
	br_x509_minimal_set_hash(&mc, br_sha384_ID, &br_sha384_vtable);
#endif
#ifdef VE_SHA512_SUPPORT
	br_x509_minimal_set_hash(&mc, br_sha512_ID, &br_sha512_vtable);
#endif
	br_x509_minimal_set_name_elements(&mc, elts, num_elts);

#ifdef _STANDALONE
	/*
	 * Clock is probably bogus so we use ve_utc.
	 */
	mc.days = (ve_utc / SECONDS_PER_DAY) + X509_DAYS_TO_UTC0;
	mc.seconds = (ve_utc % SECONDS_PER_DAY);
#endif

	mc.vtable->start_chain(&mc.vtable, NULL);
	for (u = 0; u < VEC_LEN(chain); u ++) {
		xc = &VEC_ELT(chain, u);
		mc.vtable->start_cert(&mc.vtable, xc->data_len);
		mc.vtable->append(&mc.vtable, xc->data, xc->data_len);
		mc.vtable->end_cert(&mc.vtable);
		switch (mc.err) {
		case 0:
		case BR_ERR_X509_OK:
		case BR_ERR_X509_EXPIRED:
			break;
		default:
			printf("u=%zu mc.err=%d\n", u, mc.err);
			break;
		}
	}
	err = mc.vtable->end_chain(&mc.vtable);
	pk = NULL;
	if (err) {
		ve_error_set("Validation failed, err = %d", err);
	} else {
		tpk = mc.vtable->get_pkey(&mc.vtable, &usages);
		if (tpk != NULL) {
			pk = xpkeydup(tpk);
		}
	}
	VEC_CLEAR(chain);
	return (pk);
}