Example #1
0
static Py_ssize_t PyJudyIntSet_len(PyObject* set)
{
	Word_t count;
	J1C(count, ((PyJudyIntSet*)set)->s, 0, -1);
	//! 2**31-1 on 32-bit systems, 2**63-1 on 64-bit systems
	return (Py_ssize_t)count;
}
Example #2
0
void
csp_id_set_build_and_keep(struct csp_id_set *set,
                          struct csp_id_set_builder *builder)
{
    int found;
    Word_t count;
    size_t i;
    csp_id id;

    /* First make sure that the `ids` array is large enough to hold all of the
     * ids that have been added to the set. */
    J1C(count, builder->working_set, 0, -1);
    set->count = count;
    csp_id_set_ensure_size(set);

    /* Then fill in the array. */
    i = 0;
    id = 0;
    J1F(found, builder->working_set, id);
    while (found) {
        set->ids[i++] = id;
        J1N(found, builder->working_set, id);
    }

    set->hash = builder->hash;
}
Example #3
0
/**
 * writes statistics about each ngram
 * calculates the IDF for all ngrams
 * writes the frequency of each ngram and the total number of ngrams ....???.... before it, offset into index file.
 */
void writeNgramStats(Pvoid_t *wikiIndex, int articleCount) {
	Word_t totalIndexes = 0;
	Word_t indexes      = 0;
	
	float highestIDF = 0.0;
	float lowestIDF  = log2f ((float) articleCount);
	
	int highestIDFNgram = 0;
	int lowestIDFNgram = 0;
		
	ngramStats currentStats = {0,0,0.0};

	FILE *ngramStatsFile = NULL;
	if (writeFiles) {
		ngramStatsFile = fopen("ngramStatsFile.bin", "wb");
		if (NULL == ngramStatsFile) {
		//	fprintf(stderr, "%s %s\n", "Failed to open ", statsfilename);
			fprintf(stderr, "Error opening NGram Stats File: %m\n");
			exit(1);
		}
	}

	// calculates the IDF and total cumulative article #s preceeding this ngram's block
	// writes the IDF, count of documents this ngram appears in, and the count of article #s preceeding
	// one ngram per iteration
	for (Word_t currentNgram = 0; currentNgram < lastNgram; currentNgram++) {
		J1C (indexes, wikiIndex[currentNgram], 0, -1); //count indices
		if (indexes) { //avoid a divide by zero
			currentStats.IDF = log2f ((float) articleCount / (float) indexes);
			if (currentStats.IDF > highestIDF) {
				highestIDF = currentStats.IDF;
				highestIDFNgram = currentNgram;
			}
			else if (currentStats.IDF < lowestIDF) {
				lowestIDF = currentStats.IDF;
				lowestIDFNgram = currentNgram;
			}
		}
		else {
			currentStats.IDF = 0.0;}
		currentStats.count    = (int) indexes;
		currentStats.offset = (int) totalIndexes;
		if (writeFiles) {fwrite(&currentStats, sizeof(ngramStats), 1, ngramStatsFile);}
		totalIndexes = totalIndexes + indexes;
	}

	if (writeFiles) {fclose (ngramStatsFile);}
	//intToNgram(lowestIDFNgram, char_ngram);
	//intToNgram(highestIDFNgram, char_ngram);
	optionalPrint ("%d %s", highestIDFNgram, "is the ngram with the highest IDF\n");
	optionalPrint ("%d %s", lowestIDFNgram, "is the ngram with the lowest IDF\n");
	optionalPrint ("%.4f %.3f %s", log2f (1.0), log2f ((float) articleCount), "possible range of IDF\n");
	optionalPrint ("%.4f %.3f %s", lowestIDF, highestIDF, "actual range of IDF\n");
}
Example #4
0
/**
 * Frees the Judy arrays that make up the index and gathers some basic information about them.
 */
void freeIndex (Pvoid_t *wikiIndex) {
	Word_t returnCode    = 0;
	Word_t totalIndexes  = 0;
	Word_t indexes       = 0;
	Word_t totalSize     = 0;
	Word_t size          = 0;
	
	// frees the judy arrays in the index and adds the total indexes and the size of each judy array
	// one Judy array per iteration  
	for (int currentNgram = 0; currentNgram < lastNgram; currentNgram++) {
		J1C (indexes, wikiIndex[currentNgram], 0, -1); //count indices
		totalIndexes = totalIndexes + indexes;
		J1MU (size, wikiIndex[currentNgram]); // memory usage
		totalSize = totalSize + size;
		J1FA (returnCode, wikiIndex[currentNgram]); //free
	}
	optionalPrint("%d %s", (int)(totalSize/1048576), "MB of memory used\n");
	optionalPrint("%d", (int)((totalIndexes*4)/1048576));
	optionalPrint(" MB if not compressed\n");	
	optionalPrint("Index freed.\n");
}
Example #5
0
static Pvoid_t ixemes_freq_range(int p, int min_f, int max_f)
{
        Pvoid_t ix = NULL;
        Word_t xid;
        int i, tst;
        for (i = 0; i < dex_section[INVA].nof_entries; i++){

                xid = dex_section[INVA].toc[i].val;
                
                if (xid >= XID_META_FREQUENT_F &&
                    xid <= XID_META_FREQUENT_L)
                        continue;
                
                if (xid <= XID_TOKEN_FREQUENT_L &&
                    xid >= XID_TOKEN_FREQUENT_F)
                        continue;

                const inva_e *e = 
                        (const inva_e*)fetch_item(INVA, i);
                if (e->len > min_f && e->len <= max_f){
                        J1S(tst, ix, xid);
                }
        }

        xid = 0;
        J1F(tst, ix, xid);
        fw_layers[p].min_xid = xid;
        xid = -1;
        J1L(tst, ix, xid);
        fw_layers[p].max_xid = xid;
        
        J1C(tst, ix, 0, -1);
        dub_msg("Layer %u: number of xids %u min %u max %u", p, tst,
                fw_layers[p].min_xid, fw_layers[p].max_xid);

        return ix;
}
Example #6
0
int main(int argc, char **argv)
{
    static const char **fields;
    static uint64_t *lengths;
    dsfmt_t state;
    Pvoid_t uuids = NULL;
    tdb_cons* c = tdb_cons_init();
    test_cons_settings(c);
    uint64_t i, j;
    __uint128_t prev_uuid = 0;
    Word_t key;
    int tst;

    assert(tdb_cons_open(c, argv[1], fields, 0) == 0);
    dsfmt_init_gen_rand(&state, 2489);

    for (i = 0; i < NUM_TRAILS; i++){
        uint8_t uuid[16];
        gen_random_uuid(uuid, &state);
        memcpy(&key, uuid, 8);

        J1S(tst, uuids, key);
        if (!tst){
            printf("half-word collision! change random seed!\n");
            return -1;
        }

        for (j = 0; j < NUM_EVENTS; j++)
            tdb_cons_add(c, uuid, i * 100 + j, fields, lengths);
    }
    J1C(key, uuids, 0, -1);
    assert(key == NUM_TRAILS);
    assert(tdb_cons_finalize(c) == 0);
    tdb_cons_close(c);

    tdb* t = tdb_init();
    assert(tdb_open(t, argv[1]) == 0);

    assert(tdb_num_trails(t) == NUM_TRAILS);
    assert(tdb_num_events(t) == NUM_TRAILS * NUM_EVENTS);

    for (i = 0; i < NUM_TRAILS; i++){
        __uint128_t this_uuid;

        /* uuids must be monotonically increasing */
        memcpy(&this_uuid, tdb_get_uuid(t, i), 16);
        assert(this_uuid > prev_uuid);
        prev_uuid = this_uuid;

        /* remove this uuid from the uuid set and make sure it exists */
        memcpy(&key, &this_uuid, 8);
        J1U(tst, uuids, key);
        assert(tst == 1);
    }

    /* make sure we retrieved all uuids */
    J1C(key, uuids, 0, -1);
    assert(key == 0);

    return 0;
}
Example #7
0
int jtableP_count(jtableP *table) {
	int ret;

	J1C(ret, table->t, 0, -1);
	return ret;
}
Example #8
0
File: udi.c Project: jfmc/yap-6.3
/* index, called from absmi.c
 *
 * Returns:
 *    NULL (yap fallback)       No usable indexing available
 *
 *    Yap_FAILCODE() (fail)     No result found
 *    Yap_CauseListToClause(cl) 1 solution found
 *    Yap_ClauseListCode(cl)    2+ solutions found
 */
yamop *
Yap_udi_search(PredEntry *p)
{
	int r;
	struct ClauseList clauselist;
	UdiPArg parg;
	UdiInfo info;

	/* find our structure*/
	HASH_FIND_UdiInfo(UdiControlBlocks,p,info);
	if (!info || utarray_len(info->args) == 0)
		return NULL;

	if (utarray_len(info->args) == 1){ //simple case no intersection needed
		struct si_callback_h c;

		c.cl = Yap_ClauseListInit(&clauselist);
		c.clauselist = info->clauselist;
		c.pred = info->p;
		if (!c.cl)
			return NULL;

		parg = (UdiPArg) utarray_eltptr(info->args,0);
		r = parg->control->search(parg->idxstr, parg->arg, si_callback, (void *) &c);
		Yap_ClauseListClose(c.cl);

		if (r == -1) {
			Yap_ClauseListDestroy(c.cl);
			return NULL;
		}

		if (Yap_ClauseListCount(c.cl) == 0) {
			Yap_ClauseListDestroy(c.cl);
			return Yap_FAILCODE();
		}
	} else {//intersection needed using Judy1
#ifdef USE_JUDY
		/*TODO: do more tests to this algorithm*/
		int i;
		Pvoid_t tmp = (Pvoid_t) NULL;
		Pvoid_t result = (Pvoid_t) NULL;
		Word_t count = 0L;
		Word_t idx_r = 0L;
		Word_t idx_tmp = 0L;
		int rc = 0;
		yamop **x;

		/*
		 * I will start with the simplest approach
		 * for each index create a set and intersect it with the
		 * next
		 *
		 * In the future it could pay to sort according to index type
		 * to improve intersection part
		 */
		for (i = 0; i < utarray_len(info->args) ; i++) {
			parg = (UdiPArg) utarray_eltptr(info->args,i);
			r = parg->control->search(parg->idxstr, parg->arg, j1_callback, &tmp);
			if (r == -1) /*this arg does not prune search*/
				continue;
			rc ++;
			J1C(count, result, 0, -1);
			if (r == 0) /* this arg gave 0 results -> FAIL*/
			{
				if (count > 0) // clear previous result if they exists
					J1FA(count, result);
				return Yap_FAILCODE();
			}

			if (count == 0) // first result_set
			{
				result = tmp;
				tmp = (Pvoid_t) NULL;
			}
			else /*intersection*/
			{
				idx_tmp = 0L;
				idx_r = 0L;
				J1F(count, result, idx_r); //succeeds one time at least
				assert(count > 0);
				J1F(count, tmp, idx_tmp);  //succeeds one time at least
				assert(count > 0);
				while (count)
				{
					while (idx_r < idx_tmp)
					{
						J1U(count, result, idx_r); //does not belong
						J1N(count, result, idx_r); //next
						if (! count) break;        //end result set
					}
					if(idx_r == idx_tmp)
					{
						J1N(count, result, idx_r); //next
						if (! count) break;        //end result set
						J1N(count, tmp, idx_tmp);  //next tmp
						//if (! count) break;      //end tmp set will break while
					}
					else // (idx_r > idx_tmp)
					{
						idx_tmp = idx_r; // fast forward
						J1F(count, tmp, idx_tmp); // first starting in idx_r
						//if (! count) break; //end tmp set will break while
					}
				}
				J1F(count, result, idx_r); // first starting in idx_r
				//clear up the rest
				while (idx_r > idx_tmp && count) //result has more setted values
				{
					J1U(count, result, idx_r); //does not belong
					J1N(count, result, idx_r); //next
				}
				J1FA(count, tmp); //free tmp
			}
		}
		if (rc == 0) /*no search performed*/
			return NULL;

		J1C(count, result, 0, -1);
		if (count == 0) { /*result set empty -> FAIL */
			J1FA(count, result);
			return Yap_FAILCODE();
		}

		/*convert Juddy1 to clauselist*/
		Yap_ClauseListInit(&clauselist);
		idx_r = 0L;
		J1F(count, result, idx_r);
		while (count)
		{
			x = (yamop **) utarray_eltptr(info->clauselist, idx_r - 1);
			Yap_ClauseListExtend(
					&clauselist,
					*x,
					info->p);
			J1N(count, result, idx_r);
		}
		J1FA(count,result);
			fprintf(stderr,"J1 used space %ld bytes for %d clausules\n",
					count, Yap_ClauseListCount(&clauselist));
		Yap_ClauseListClose(&clauselist);
#else
		fprintf(stderr,"Without libJudy only one argument indexed is allowed."
				"Falling back to Yap Indexing\n");
		return NULL; //NO Judy Available
#endif
	}

	if (Yap_ClauseListCount(&clauselist) == 1)
		return Yap_ClauseListToClause(&clauselist);
	return Yap_ClauseListCode(&clauselist);
}