static DocRepresentation * get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen) { ITEM *item = GETQUERY(query); WordEntry *entry; WordEntryPos *post; int4 dimt, j, i; int len = query->size * 4, cur = 0; DocRepresentation *doc; char *operand; *(uint16 *) POSNULL = lengthof(POSNULL) - 1; doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); operand = GETOPERAND(query); reset_istrue_flag(query); for (i = 0; i < query->size; i++) { if (item[i].type != VAL || item[i].istrue) continue; entry = find_wordentry(txt, query, &(item[i])); if (!entry) continue; if (entry->haspos) { dimt = POSDATALEN(txt, entry); post = POSDATAPTR(txt, entry); } else { dimt = *(uint16 *) POSNULL; post = POSNULL + 1; } while (cur + dimt >= len) { len *= 2; doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); } for (j = 0; j < dimt; j++) { if (j == 0) { ITEM *kptr, *iptr = item + i; int k; doc[cur].needfree = false; doc[cur].nitem = 0; doc[cur].item = (ITEM **) palloc(sizeof(ITEM *) * query->size); for (k = 0; k < query->size; k++) { kptr = item + k; if (k == i || (item[k].type == VAL && compareITEM(&kptr, &iptr, operand) == 0)) { doc[cur].item[doc[cur].nitem] = item + k; doc[cur].nitem++; kptr->istrue = 1; } } } else { doc[cur].needfree = false; doc[cur].nitem = doc[cur - 1].nitem; doc[cur].item = doc[cur - 1].item; } doc[cur].pos = WEP_GETPOS(post[j]); doc[cur].wclass = WEP_GETWEIGHT(post[j]); cur++; } } *doclen = cur; if (cur > 0) { if (cur > 1) qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR); return doc; } pfree(doc); return NULL; }
static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) { WordEntry *entry; WordEntryPos *post; int4 dimt, j, i; float res = 0.0; ITEM **item; int size = q->size; *(uint16 *) POSNULL = lengthof(POSNULL) - 1; item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); for (i = 0; i < size; i++) { float resj, wjm; int4 jm; entry = find_wordentry(t, q, item[i]); if (!entry) continue; if (entry->haspos) { dimt = POSDATALEN(t, entry); post = POSDATAPTR(t, entry); } else { dimt = *(uint16 *) POSNULL; post = POSNULL + 1; } resj = 0.0; wjm = -1.0; jm = 0; for (j = 0; j < dimt; j++) { resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); if (wpos(post[j]) > wjm) { wjm = wpos(post[j]); jm = j; } } /* limit (sum(i/i^2),i->inf) = pi^2/6 resj = sum(wi/i^2),i=1,noccurence, wi - should be sorted desc, don't sort for now, just choose maximum weight. This should be corrected Oleg Bartunov */ res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; } if (size > 0) res = res / size; pfree(item); return res; }
static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q) { uint16 **pos; int i, k, l, p; WordEntry *entry; WordEntryPos *post, *ct; int4 dimt, lenct, dist; float res = -1.0; ITEM **item; int size = q->size; item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); if (size < 2) { pfree(item); return calc_rank_or(w, t, q); } pos = (uint16 **) palloc(sizeof(uint16 *) * q->size); memset(pos, 0, sizeof(uint16 *) * q->size); *(uint16 *) POSNULL = lengthof(POSNULL) - 1; WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1); for (i = 0; i < size; i++) { entry = find_wordentry(t, q, item[i]); if (!entry) continue; if (entry->haspos) pos[i] = (uint16 *) _POSDATAPTR(t, entry); else pos[i] = (uint16 *) POSNULL; dimt = *(uint16 *) (pos[i]); post = (WordEntryPos *) (pos[i] + 1); for (k = 0; k < i; k++) { if (!pos[k]) continue; lenct = *(uint16 *) (pos[k]); ct = (WordEntryPos *) (pos[k] + 1); for (l = 0; l < dimt; l++) { for (p = 0; p < lenct; p++) { dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL))) { float curw; if (!dist) dist = MAXENTRYPOS; curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); } } } } } pfree(pos); pfree(item); return res; }
static DocRepresentation * get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) { QueryItem *item = GETQUERY(qr->query); WordEntry *entry, *firstentry; WordEntryPos *post; int32 dimt, j, i, nitem; int len = qr->query->size * 4, cur = 0; DocRepresentation *doc; char *operand; doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); operand = GETOPERAND(qr->query); for (i = 0; i < qr->query->size; i++) { QueryOperand *curoperand; if (item[i].type != QI_VAL) continue; curoperand = &item[i].qoperand; if (QR_GET_OPERAND_EXISTS(qr, &item[i])) continue; firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem); if (!entry) continue; while (entry - firstentry < nitem) { if (entry->haspos) { dimt = POSDATALEN(txt, entry); post = POSDATAPTR(txt, entry); } else { dimt = POSNULL.npos; post = POSNULL.pos; } while (cur + dimt >= len) { len *= 2; doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); } for (j = 0; j < dimt; j++) { if (j == 0) { int k; doc[cur].nitem = 0; doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size); for (k = 0; k < qr->query->size; k++) { QueryOperand *kptr = &item[k].qoperand; QueryOperand *iptr = &item[i].qoperand; if (k == i || (item[k].type == QI_VAL && compareQueryOperand(&kptr, &iptr, operand) == 0)) { /* * if k == i, we've already checked above that * it's type == Q_VAL */ doc[cur].item[doc[cur].nitem] = item + k; doc[cur].nitem++; QR_SET_OPERAND_EXISTS(qr, item + k); } } } else { doc[cur].nitem = doc[cur - 1].nitem; doc[cur].item = doc[cur - 1].item; } doc[cur].pos = WEP_GETPOS(post[j]); doc[cur].wclass = WEP_GETWEIGHT(post[j]); cur++; } entry++; } } *doclen = cur; if (cur > 0) { qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR); return doc; } pfree(doc); return NULL; }
static float calc_rank_or(float *w, TSVector t, TSQuery q) { WordEntry *entry, *firstentry; WordEntryPos *post; int32 dimt, j, i, nitem; float res = 0.0; QueryOperand **item; int size = q->size; item = SortAndUniqItems(q, &size); for (i = 0; i < size; i++) { float resj, wjm; int32 jm; firstentry = entry = find_wordentry(t, q, item[i], &nitem); if (!entry) continue; while (entry - firstentry < nitem) { if (entry->haspos) { dimt = POSDATALEN(t, entry); post = POSDATAPTR(t, entry); } else { dimt = POSNULL.npos; post = POSNULL.pos; } resj = 0.0; wjm = -1.0; jm = 0; for (j = 0; j < dimt; j++) { resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); if (wpos(post[j]) > wjm) { wjm = wpos(post[j]); jm = j; } } /* limit (sum(i/i^2),i->inf) = pi^2/6 resj = sum(wi/i^2),i=1,noccurence, wi - should be sorted desc, don't sort for now, just choose maximum weight. This should be corrected Oleg Bartunov */ res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; entry++; } } if (size > 0) res = res / size; pfree(item); return res; }
static float calc_rank_and(float *w, TSVector t, TSQuery q) { WordEntryPosVector **pos; int i, k, l, p; WordEntry *entry, *firstentry; WordEntryPos *post, *ct; int32 dimt, lenct, dist, nitem; float res = -1.0; QueryOperand **item; int size = q->size; item = SortAndUniqItems(q, &size); if (size < 2) { pfree(item); return calc_rank_or(w, t, q); } pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size); WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1); for (i = 0; i < size; i++) { firstentry = entry = find_wordentry(t, q, item[i], &nitem); if (!entry) continue; while (entry - firstentry < nitem) { if (entry->haspos) pos[i] = _POSVECPTR(t, entry); else pos[i] = &POSNULL; dimt = pos[i]->npos; post = pos[i]->pos; for (k = 0; k < i; k++) { if (!pos[k]) continue; lenct = pos[k]->npos; ct = pos[k]->pos; for (l = 0; l < dimt; l++) { for (p = 0; p < lenct; p++) { dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL))) { float curw; if (!dist) dist = MAXENTRYPOS; curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); } } } } entry++; } } pfree(pos); pfree(item); return res; }