/* * check weight info */ static bool checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) { WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16)); uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len))); while (len--) { if (item->weight & (1 << ptr->weight)) return true; ptr++; } return false; }
static void desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData) { int i; char *walbuf = ((char *) insertData) + sizeof(ginxlogRecompressDataLeaf); appendStringInfo(buf, " %d segments:", (int) insertData->nactions); for (i = 0; i < insertData->nactions; i++) { uint8 a_segno = *((uint8 *) (walbuf++)); uint8 a_action = *((uint8 *) (walbuf++)); uint16 nitems = 0; int newsegsize = 0; if (a_action == GIN_SEGMENT_INSERT || a_action == GIN_SEGMENT_REPLACE) { newsegsize = SizeOfGinPostingList((GinPostingList *) walbuf); walbuf += SHORTALIGN(newsegsize); } if (a_action == GIN_SEGMENT_ADDITEMS) { memcpy(&nitems, walbuf, sizeof(uint16)); walbuf += sizeof(uint16); walbuf += nitems * sizeof(ItemPointerData); } switch(a_action) { case GIN_SEGMENT_ADDITEMS: appendStringInfo(buf, " %d (add %d items)", a_segno, nitems); break; case GIN_SEGMENT_DELETE: appendStringInfo(buf, " %d (delete)", a_segno); break; case GIN_SEGMENT_INSERT: appendStringInfo(buf, " %d (insert)", a_segno); break; case GIN_SEGMENT_REPLACE: appendStringInfo(buf, " %d (replace)", a_segno); break; default: appendStringInfo(buf, " %d unknown action %d ???", a_segno, a_action); /* cannot decode unrecognized actions further */ return; } } }
/* * Sometimes we reduce the number of posting list items in a tuple after * having built it with GinFormTuple. This function adjusts the size * fields to match. */ void GinShortenTuple(IndexTuple itup, uint32 nipd) { uint32 newsize; Assert(nipd <= GinGetNPosting(itup)); newsize = MAXALIGN(SHORTALIGN(GinGetOrigSizePosting(itup)) + sizeof(ItemPointerData) * nipd); Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK)); itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; GinSetNPosting(itup, nipd); }
/* * Form a tuple for entry tree. * * On leaf pages, Index tuple has non-traditional layout. Tuple may contain * posting list or root blocknumber of posting tree. * Macros: GinIsPostingTree(itup) / GinSetPostingTree(itup, blkno) * 1) Posting list * - itup->t_info & INDEX_SIZE_MASK contains total size of tuple as usual * - ItemPointerGetBlockNumber(&itup->t_tid) contains original * size of tuple (without posting list). * Macros: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n) * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number * of elements in posting list (number of heap itempointers) * Macros: GinGetNPosting(itup) / GinSetNPosting(itup,n) * - After standard part of tuple there is a posting list, ie, array * of heap itempointers * Macros: GinGetPosting(itup) * 2) Posting tree * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of * root of posting tree * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number * GIN_TREE_POSTING, which distinguishes this from posting-list case * * Attributes of an index tuple are different for single and multicolumn index. * For single-column case, index tuple stores only value to be indexed. * For multicolumn case, it stores two attributes: column number of value * and value. */ IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd) { bool isnull[2] = {FALSE, FALSE}; IndexTuple itup; if (ginstate->oneCol) itup = index_form_tuple(ginstate->origTupdesc, &key, isnull); else { Datum datums[2]; datums[0] = UInt16GetDatum(attnum); datums[1] = key; itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull); } GinSetOrigSizePosting(itup, IndexTupleSize(itup)); if (nipd > 0) { uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd); if (newsize >= INDEX_SIZE_MASK) return NULL; if (newsize > TOAST_INDEX_TARGET && nipd > 1) return NULL; itup = repalloc(itup, newsize); /* set new size */ itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; if (ipd) memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd); GinSetNPosting(itup, nipd); } else { GinSetNPosting(itup, 0); } return itup; }
/* * check weight info */ static bool checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item) { WordEntryPosVector *posvec; WordEntryPos *ptr; uint16 len; posvec = (WordEntryPosVector *) (chkval->values + SHORTALIGN(val->pos + val->len)); len = posvec->npos; ptr = posvec->pos; while (len--) { if (item->weight & (1 << WEP_GETWEIGHT(*ptr))) return true; ptr++; } return false; }
/* * Redo recompression of posting list. Doing all the changes in-place is not * always possible, because it might require more space than we've on the page. * Instead, once modification is required we copy unprocessed tail of the page * into separately allocated chunk of memory for further reading original * versions of segments. Thanks to that we don't bother about moving page data * in-place. */ static void ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data) { int actionno; int segno; GinPostingList *oldseg; Pointer segmentend; char *walbuf; int totalsize; Pointer tailCopy = NULL; Pointer writePtr; Pointer segptr; /* * If the page is in pre-9.4 format, convert to new format first. */ if (!GinPageIsCompressed(page)) { ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page); int nuncompressed = GinPageGetOpaque(page)->maxoff; int npacked; /* * Empty leaf pages are deleted as part of vacuum, but leftmost and * rightmost pages are never deleted. So, pg_upgrade'd from pre-9.4 * instances might contain empty leaf pages, and we need to handle * them correctly. */ if (nuncompressed > 0) { GinPostingList *plist; plist = ginCompressPostingList(uncompressed, nuncompressed, BLCKSZ, &npacked); totalsize = SizeOfGinPostingList(plist); Assert(npacked == nuncompressed); memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize); } else { totalsize = 0; } GinDataPageSetDataSize(page, totalsize); GinPageSetCompressed(page); GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber; } oldseg = GinDataLeafPageGetPostingList(page); writePtr = (Pointer) oldseg; segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page); segno = 0; walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf); for (actionno = 0; actionno < data->nactions; actionno++) { uint8 a_segno = *((uint8 *) (walbuf++)); uint8 a_action = *((uint8 *) (walbuf++)); GinPostingList *newseg = NULL; int newsegsize = 0; ItemPointerData *items = NULL; uint16 nitems = 0; ItemPointerData *olditems; int nolditems; ItemPointerData *newitems; int nnewitems; int segsize; /* Extract all the information we need from the WAL record */ if (a_action == GIN_SEGMENT_INSERT || a_action == GIN_SEGMENT_REPLACE) { newseg = (GinPostingList *) walbuf; newsegsize = SizeOfGinPostingList(newseg); walbuf += SHORTALIGN(newsegsize); } if (a_action == GIN_SEGMENT_ADDITEMS) { memcpy(&nitems, walbuf, sizeof(uint16)); walbuf += sizeof(uint16); items = (ItemPointerData *) walbuf; walbuf += nitems * sizeof(ItemPointerData); } /* Skip to the segment that this action concerns */ Assert(segno <= a_segno); while (segno < a_segno) { /* * Once modification is started and page tail is copied, we've * to copy unmodified segments. */ segsize = SizeOfGinPostingList(oldseg); if (tailCopy) { Assert(writePtr + segsize < PageGetSpecialPointer(page)); memcpy(writePtr, (Pointer) oldseg, segsize); } writePtr += segsize; oldseg = GinNextPostingListSegment(oldseg); segno++; } /* * ADDITEMS action is handled like REPLACE, but the new segment to * replace the old one is reconstructed using the old segment from * disk and the new items from the WAL record. */ if (a_action == GIN_SEGMENT_ADDITEMS) { int npacked; olditems = ginPostingListDecode(oldseg, &nolditems); newitems = ginMergeItemPointers(items, nitems, olditems, nolditems, &nnewitems); Assert(nnewitems == nolditems + nitems); newseg = ginCompressPostingList(newitems, nnewitems, BLCKSZ, &npacked); Assert(npacked == nnewitems); newsegsize = SizeOfGinPostingList(newseg); a_action = GIN_SEGMENT_REPLACE; } segptr = (Pointer) oldseg; if (segptr != segmentend) segsize = SizeOfGinPostingList(oldseg); else { /* * Positioned after the last existing segment. Only INSERTs * expected here. */ Assert(a_action == GIN_SEGMENT_INSERT); segsize = 0; } /* * We're about to start modification of the page. So, copy tail of the * page if it's not done already. */ if (!tailCopy && segptr != segmentend) { int tailSize = segmentend - segptr; tailCopy = (Pointer) palloc(tailSize); memcpy(tailCopy, segptr, tailSize); segptr = tailCopy; oldseg = (GinPostingList *) segptr; segmentend = segptr + tailSize; } switch (a_action) { case GIN_SEGMENT_DELETE: segptr += segsize; segno++; break; case GIN_SEGMENT_INSERT: /* copy the new segment in place */ Assert(writePtr + newsegsize <= PageGetSpecialPointer(page)); memcpy(writePtr, newseg, newsegsize); writePtr += newsegsize; break; case GIN_SEGMENT_REPLACE: /* copy the new version of segment in place */ Assert(writePtr + newsegsize <= PageGetSpecialPointer(page)); memcpy(writePtr, newseg, newsegsize); writePtr += newsegsize; segptr += segsize; segno++; break; default: elog(ERROR, "unexpected GIN leaf action: %u", a_action); } oldseg = (GinPostingList *) segptr; } /* Copy the rest of unmodified segments if any. */ segptr = (Pointer) oldseg; if (segptr != segmentend && tailCopy) { int restSize = segmentend - segptr; Assert(writePtr + restSize <= PageGetSpecialPointer(page)); memcpy(writePtr, segptr, restSize); writePtr += restSize; } totalsize = writePtr - (Pointer) GinDataLeafPageGetPostingList(page); GinDataPageSetDataSize(page, totalsize); }
/* * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs. * * Currently MULTI_INSERT will always contain the full tuples. */ static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) { XLogReaderState *r = buf->record; xl_heap_multi_insert *xlrec; int i; char *data; char *tupledata; Size tuplelen; RelFileNode rnode; xlrec = (xl_heap_multi_insert *) XLogRecGetData(r); /* only interested in our database */ XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL); if (rnode.dbNode != ctx->slot->data.database) return; /* output plugin doesn't look for this origin, no need to queue */ if (FilterByOrigin(ctx, XLogRecGetOrigin(r))) return; tupledata = XLogRecGetBlockData(r, 0, &tuplelen); data = tupledata; for (i = 0; i < xlrec->ntuples; i++) { ReorderBufferChange *change; xl_multi_insert_tuple *xlhdr; int datalen; ReorderBufferTupleBuf *tuple; change = ReorderBufferGetChange(ctx->reorder); change->action = REORDER_BUFFER_CHANGE_INSERT; change->origin_id = XLogRecGetOrigin(r); memcpy(&change->data.tp.relnode, &rnode, sizeof(RelFileNode)); /* * CONTAINS_NEW_TUPLE will always be set currently as multi_insert * isn't used for catalogs, but better be future proof. * * We decode the tuple in pretty much the same way as DecodeXLogTuple, * but since the layout is slightly different, we can't use it here. */ if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE) { change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder); tuple = change->data.tp.newtuple; /* not a disk based tuple */ ItemPointerSetInvalid(&tuple->tuple.t_self); xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data); data = ((char *) xlhdr) + SizeOfMultiInsertTuple; datalen = xlhdr->datalen; /* * We can only figure this out after reassembling the * transactions. */ tuple->tuple.t_tableOid = InvalidOid; tuple->tuple.t_data = &tuple->t_data.header; tuple->tuple.t_len = datalen + SizeofHeapTupleHeader; memset(&tuple->t_data.header, 0, SizeofHeapTupleHeader); memcpy((char *) &tuple->t_data.header + SizeofHeapTupleHeader, (char *) data, datalen); data += datalen; tuple->t_data.header.t_infomask = xlhdr->t_infomask; tuple->t_data.header.t_infomask2 = xlhdr->t_infomask2; tuple->t_data.header.t_hoff = xlhdr->t_hoff; } /* * Reset toast reassembly state only after the last row in the last * xl_multi_insert_tuple record emitted by one heap_multi_insert() * call. */ if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI && (i + 1) == xlrec->ntuples) change->data.tp.clear_toast_afterwards = true; else change->data.tp.clear_toast_afterwards = false; ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change); } Assert(data == tupledata + tuplelen); }
/* * Encode a posting list. * * The encoded list is returned in a palloc'd struct, which will be at most * 'maxsize' bytes in size. The number items in the returned segment is * returned in *nwritten. If it's not equal to nipd, not all the items fit * in 'maxsize', and only the first *nwritten were encoded. * * The allocated size of the returned struct is short-aligned, and the padding * byte at the end, if any, is zero. */ GinPostingList * ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize, int *nwritten) { uint64 prev; int totalpacked = 0; int maxbytes; GinPostingList *result; unsigned char *ptr; unsigned char *endptr; maxsize = SHORTALIGN_DOWN(maxsize); result = palloc(maxsize); maxbytes = maxsize - offsetof(GinPostingList, bytes); Assert(maxbytes > 0); /* Store the first special item */ result->first = ipd[0]; prev = itemptr_to_uint64(&result->first); ptr = result->bytes; endptr = result->bytes + maxbytes; for (totalpacked = 1; totalpacked < nipd; totalpacked++) { uint64 val = itemptr_to_uint64(&ipd[totalpacked]); uint64 delta = val - prev; Assert(val > prev); if (endptr - ptr >= 6) encode_varbyte(delta, &ptr); else { /* * There are less than 6 bytes left. Have to check if the next * item fits in that space before writing it out. */ unsigned char buf[6]; unsigned char *p = buf; encode_varbyte(delta, &p); if (p - buf > (endptr - ptr)) break; /* output is full */ memcpy(ptr, buf, p - buf); ptr += (p - buf); } prev = val; } result->nbytes = ptr - result->bytes; /* * If we wrote an odd number of bytes, zero out the padding byte at the * end. */ if (result->nbytes != SHORTALIGN(result->nbytes)) result->bytes[result->nbytes] = 0; if (nwritten) *nwritten = totalpacked; Assert(SizeOfGinPostingList(result) <= maxsize); /* * Check that the encoded segment decodes back to the original items. */ #if defined (CHECK_ENCODING_ROUNDTRIP) { int ndecoded; ItemPointer tmp = ginPostingListDecode(result, &ndecoded); int i; Assert(ndecoded == totalpacked); for (i = 0; i < ndecoded; i++) Assert(memcmp(&tmp[i], &ipd[i], sizeof(ItemPointerData)) == 0); pfree(tmp); } #endif return result; }
/* * make value of tsvector, given parsed text */ TSVector make_tsvector(ParsedText *prs) { int i, j, lenstr = 0, totallen; TSVector in; WordEntry *ptr; char *str; int stroff; prs->curwords = uniqueWORD(prs->words, prs->curwords); for (i = 0; i < prs->curwords; i++) { lenstr += prs->words[i].len; if (prs->words[i].alen) { lenstr = SHORTALIGN(lenstr); lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); } } if (lenstr > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS))); totallen = CALCDATASIZE(prs->curwords, lenstr); in = (TSVector) palloc0(totallen); SET_VARSIZE(in, totallen); in->size = prs->curwords; ptr = ARRPTR(in); str = STRPTR(in); stroff = 0; for (i = 0; i < prs->curwords; i++) { ptr->len = prs->words[i].len; ptr->pos = stroff; memcpy(str + stroff, prs->words[i].word, prs->words[i].len); stroff += prs->words[i].len; pfree(prs->words[i].word); if (prs->words[i].alen) { int k = prs->words[i].pos.apos[0]; WordEntryPos *wptr; if (k > 0xFFFF) elog(ERROR, "positions array too long"); ptr->haspos = 1; stroff = SHORTALIGN(stroff); *(uint16 *) (str + stroff) = (uint16) k; wptr = POSDATAPTR(in, ptr); for (j = 0; j < k; j++) { WEP_SETWEIGHT(wptr[j], 0); WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]); } stroff += sizeof(uint16) + k * sizeof(WordEntryPos); pfree(prs->words[i].pos.apos); } else ptr->haspos = 0; ptr++; } pfree(prs->words); return in; }
Datum tsvector_concat(PG_FUNCTION_ARGS) { TSVector in1 = PG_GETARG_TSVECTOR(0); TSVector in2 = PG_GETARG_TSVECTOR(1); TSVector out; WordEntry *ptr; WordEntry *ptr1, *ptr2; WordEntryPos *p; int maxpos = 0, i, j, i1, i2, dataoff, output_bytes, output_size; char *data, *data1, *data2; /* Get max position in in1; we'll need this to offset in2's positions */ ptr = ARRPTR(in1); i = in1->size; while (i--) { if ((j = POSDATALEN(in1, ptr)) != 0) { p = POSDATAPTR(in1, ptr); while (j--) { if (WEP_GETPOS(*p) > maxpos) maxpos = WEP_GETPOS(*p); p++; } } ptr++; } ptr1 = ARRPTR(in1); ptr2 = ARRPTR(in2); data1 = STRPTR(in1); data2 = STRPTR(in2); i1 = in1->size; i2 = in2->size; /* * Conservative estimate of space needed. We might need all the data in * both inputs, and conceivably add a pad byte before position data for * each item where there was none before. */ output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2; out = (TSVector) palloc0(output_bytes); SET_VARSIZE(out, output_bytes); /* * We must make out->size valid so that STRPTR(out) is sensible. We'll * collapse out any unused space at the end. */ out->size = in1->size + in2->size; ptr = ARRPTR(out); data = STRPTR(out); dataoff = 0; while (i1 && i2) { int cmp = compareEntry(data1, ptr1, data2, ptr2); if (cmp < 0) { /* in1 first */ ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } else if (cmp > 0) { /* in2 first */ ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } else { ptr->haspos = ptr1->haspos | ptr2->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { if (ptr1->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); if (ptr2->haspos) dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); } else /* must have ptr2->haspos */ { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } } ptr++; ptr1++; ptr2++; i1--; i2--; } } while (i1) { ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } while (i2) { ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } /* * Instead of checking each offset individually, we check for overflow of * pos fields once at the end. */ if (dataoff > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS))); /* * Adjust sizes (asserting that we didn't overrun the original estimates) * and collapse out any unused array entries. */ output_size = ptr - ARRPTR(out); Assert(output_size <= out->size); out->size = output_size; if (data != STRPTR(out)) memmove(STRPTR(out), data, dataoff); output_bytes = CALCDATASIZE(out->size, dataoff); Assert(output_bytes <= VARSIZE(out)); SET_VARSIZE(out, output_bytes); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); PG_RETURN_POINTER(out); }
Datum tsvector_concat(PG_FUNCTION_ARGS) { TSVector in1 = PG_GETARG_TSVECTOR(0); TSVector in2 = PG_GETARG_TSVECTOR(1); TSVector out; WordEntry *ptr; WordEntry *ptr1, *ptr2; WordEntryPos *p; int maxpos = 0, i, j, i1, i2, dataoff; char *data, *data1, *data2; ptr = ARRPTR(in1); i = in1->size; while (i--) { if ((j = POSDATALEN(in1, ptr)) != 0) { p = POSDATAPTR(in1, ptr); while (j--) { if (WEP_GETPOS(*p) > maxpos) maxpos = WEP_GETPOS(*p); p++; } } ptr++; } ptr1 = ARRPTR(in1); ptr2 = ARRPTR(in2); data1 = STRPTR(in1); data2 = STRPTR(in2); i1 = in1->size; i2 = in2->size; /* conservative estimate of space needed */ out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2)); SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2)); out->size = in1->size + in2->size; ptr = ARRPTR(out); data = STRPTR(out); dataoff = 0; while (i1 && i2) { int cmp = compareEntry(data1, ptr1, data2, ptr2); if (cmp < 0) { /* in1 first */ ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } else if (cmp > 0) { /* in2 first */ ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } else { ptr->haspos = ptr1->haspos | ptr2->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { if (ptr1->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); if (ptr2->haspos) dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); } else /* must have ptr2->haspos */ { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } } ptr++; ptr1++; ptr2++; i1--; i2--; } } while (i1) { ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } while (i2) { ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } /* * Instead of checking each offset individually, we check for overflow of * pos fields once at the end. */ if (dataoff > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS))); out->size = ptr - ARRPTR(out); SET_VARSIZE(out, CALCDATASIZE(out->size, dataoff)); if (data != STRPTR(out)) memmove(STRPTR(out), data, dataoff); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); PG_RETURN_POINTER(out); }
Datum tsvectorin(PG_FUNCTION_ARGS) { char *buf = PG_GETARG_CSTRING(0); TSVectorParseState state; WordEntryIN *arr; int totallen; int arrlen; /* allocated size of arr */ WordEntry *inarr; int len = 0; TSVector in; int i; char *token; int toklen; WordEntryPos *pos; int poslen; char *strbuf; int stroff; /* * Tokens are appended to tmpbuf, cur is a pointer to the end of used * space in tmpbuf. */ char *tmpbuf; char *cur; int buflen = 256; /* allocated size of tmpbuf */ state = init_tsvector_parser(buf, false, false); arrlen = 64; arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen); cur = tmpbuf = (char *) palloc(buflen); while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL)) { if (toklen >= MAXSTRLEN) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long (%ld bytes, max %ld bytes)", (long) toklen, (long) (MAXSTRLEN - 1)))); if (cur - tmpbuf > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)", (long) (cur - tmpbuf), (long) MAXSTRPOS))); /* * Enlarge buffers if needed */ if (len >= arrlen) { arrlen *= 2; arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen); } while ((cur - tmpbuf) + toklen >= buflen) { int dist = cur - tmpbuf; buflen *= 2; tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); cur = tmpbuf + dist; } arr[len].entry.len = toklen; arr[len].entry.pos = cur - tmpbuf; memcpy((void *) cur, (void *) token, toklen); cur += toklen; if (poslen != 0) { arr[len].entry.haspos = 1; arr[len].pos = pos; arr[len].poslen = poslen; } else { arr[len].entry.haspos = 0; arr[len].pos = NULL; arr[len].poslen = 0; } len++; } close_tsvector_parser(state); if (len > 0) len = uniqueentry(arr, len, tmpbuf, &buflen); else buflen = 0; if (buflen > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS))); totallen = CALCDATASIZE(len, buflen); in = (TSVector) palloc0(totallen); SET_VARSIZE(in, totallen); in->size = len; inarr = ARRPTR(in); strbuf = STRPTR(in); stroff = 0; for (i = 0; i < len; i++) { memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len); arr[i].entry.pos = stroff; stroff += arr[i].entry.len; if (arr[i].entry.haspos) { if (arr[i].poslen > 0xFFFF) elog(ERROR, "positions array too long"); /* Copy number of positions */ stroff = SHORTALIGN(stroff); *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen; stroff += sizeof(uint16); /* Copy positions */ memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos)); stroff += arr[i].poslen * sizeof(WordEntryPos); pfree(arr[i].pos); } inarr[i] = arr[i].entry; } Assert((strbuf + stroff - (char *) in) == totallen); PG_RETURN_TSVECTOR(in); }
/* * make value of tsvector */ static tsvector * makevalue(PRSTEXT * prs) { int4 i, j, lenstr = 0, totallen; tsvector *in; WordEntry *ptr; char *str, *cur; prs->curwords = uniqueWORD(prs->words, prs->curwords); for (i = 0; i < prs->curwords; i++) { lenstr += SHORTALIGN(prs->words[i].len); if (prs->words[i].alen) lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); } totallen = CALCDATASIZE(prs->curwords, lenstr); in = (tsvector *) palloc(totallen); memset(in, 0, totallen); in->len = totallen; in->size = prs->curwords; ptr = ARRPTR(in); cur = str = STRPTR(in); for (i = 0; i < prs->curwords; i++) { ptr->len = prs->words[i].len; if (cur - str > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("value is too big"))); ptr->pos = cur - str; memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len); pfree(prs->words[i].word); cur += SHORTALIGN(prs->words[i].len); if (prs->words[i].alen) { WordEntryPos *wptr; ptr->haspos = 1; *(uint16 *) cur = prs->words[i].pos.apos[0]; wptr = POSDATAPTR(in, ptr); for (j = 0; j < *(uint16 *) cur; j++) { wptr[j].weight = 0; wptr[j].pos = prs->words[i].pos.apos[j + 1]; } cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); pfree(prs->words[i].pos.apos); } else ptr->haspos = 0; ptr++; } pfree(prs->words); return in; }
Datum tsvector_in(PG_FUNCTION_ARGS) { char *buf = PG_GETARG_CSTRING(0); TI_IN_STATE state; WordEntryIN *arr; WordEntry *inarr; int4 len = 0, totallen = 64; tsvector *in; char *tmpbuf, *cur; int4 i, buflen = 256; state.prsbuf = buf; state.len = 32; state.word = (char *) palloc(state.len); state.oprisdelim = false; arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); cur = tmpbuf = (char *) palloc(buflen); while (gettoken_tsvector(&state)) { if (len >= totallen) { totallen *= 2; arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen); } while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen) { int4 dist = cur - tmpbuf; buflen *= 2; tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); cur = tmpbuf + dist; } if (state.curpos - state.word >= MAXSTRLEN) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("word is too long"))); arr[len].entry.len = state.curpos - state.word; if (cur - tmpbuf > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("too long value"))); arr[len].entry.pos = cur - tmpbuf; memcpy((void *) cur, (void *) state.word, arr[len].entry.len); cur += arr[len].entry.len; if (state.alen) { arr[len].entry.haspos = 1; arr[len].pos = state.pos; } else arr[len].entry.haspos = 0; len++; } pfree(state.word); if (len > 0) len = uniqueentry(arr, len, tmpbuf, &buflen); else buflen=0; totallen = CALCDATASIZE(len, buflen); in = (tsvector *) palloc(totallen); memset(in, 0, totallen); in->len = totallen; in->size = len; cur = STRPTR(in); inarr = ARRPTR(in); for (i = 0; i < len; i++) { memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); arr[i].entry.pos = cur - STRPTR(in); cur += SHORTALIGN(arr[i].entry.len); if (arr[i].entry.haspos) { memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos)); cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos); pfree(arr[i].pos); } memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry)); } pfree(tmpbuf); pfree(arr); PG_RETURN_POINTER(in); }
static int uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) { WordEntryIN *ptr, *res; res = a; if (l == 1) { if (a->entry.haspos) { *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos)); *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos); } return l; } ptr = a + 1; BufferStr = buf; qsort((void *) a, l, sizeof(WordEntryIN), compareentry); while (ptr - a < l) { if (!(ptr->entry.len == res->entry.len && strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) { if (res->entry.haspos) { *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); } *outbuflen += SHORTALIGN(res->entry.len); res++; memcpy(res, ptr, sizeof(WordEntryIN)); } else if (ptr->entry.haspos) { if (res->entry.haspos) { int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos); res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos)); memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]), &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos)); *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos); pfree(ptr->pos); } else { res->entry.haspos = 1; res->pos = ptr->pos; } } ptr++; } if (res->entry.haspos) { *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); } *outbuflen += SHORTALIGN(res->entry.len); return res + 1 - a; }
static void ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data) { int actionno; int segno; GinPostingList *oldseg; Pointer segmentend; char *walbuf; int totalsize; /* * If the page is in pre-9.4 format, convert to new___ format first. */ if (!GinPageIsCompressed(page)) { ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page); int nuncompressed = GinPageGetOpaque(page)->maxoff; int npacked; GinPostingList *plist; plist = ginCompressPostingList(uncompressed, nuncompressed, BLCKSZ, &npacked); Assert(npacked == nuncompressed); totalsize = SizeOfGinPostingList(plist); memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize); GinDataPageSetDataSize(page, totalsize); GinPageSetCompressed(page); GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber; } oldseg = GinDataLeafPageGetPostingList(page); segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page); segno = 0; walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf); for (actionno = 0; actionno < data->nactions; actionno++) { uint8 a_segno = *((uint8 *) (walbuf++)); uint8 a_action = *((uint8 *) (walbuf++)); GinPostingList *newseg = NULL; int newsegsize = 0; ItemPointerData *items = NULL; uint16 nitems = 0; ItemPointerData *olditems; int nolditems; ItemPointerData *newitems; int nnewitems; int segsize; Pointer segptr; int szleft; /* Extract all the information we need from the WAL record */ if (a_action == GIN_SEGMENT_INSERT || a_action == GIN_SEGMENT_REPLACE) { newseg = (GinPostingList *) walbuf; newsegsize = SizeOfGinPostingList(newseg); walbuf += SHORTALIGN(newsegsize); } if (a_action == GIN_SEGMENT_ADDITEMS) { memcpy(&nitems, walbuf, sizeof(uint16)); walbuf += sizeof(uint16); items = (ItemPointerData *) walbuf; walbuf += nitems * sizeof(ItemPointerData); } /* Skip to the segment that this action concerns */ Assert(segno <= a_segno); while (segno < a_segno) { oldseg = GinNextPostingListSegment(oldseg); segno++; } /* * ADDITEMS action is handled like REPLACE, but the new___ segment to * replace the old one is reconstructed using the old segment from * disk and the new___ items from the WAL record. */ if (a_action == GIN_SEGMENT_ADDITEMS) { int npacked; olditems = ginPostingListDecode(oldseg, &nolditems); newitems = ginMergeItemPointers(items, nitems, olditems, nolditems, &nnewitems); Assert(nnewitems == nolditems + nitems); newseg = ginCompressPostingList(newitems, nnewitems, BLCKSZ, &npacked); Assert(npacked == nnewitems); newsegsize = SizeOfGinPostingList(newseg); a_action = GIN_SEGMENT_REPLACE; } segptr = (Pointer) oldseg; if (segptr != segmentend) segsize = SizeOfGinPostingList(oldseg); else { /* * Positioned after the last existing segment. Only INSERTs * expected here. */ Assert(a_action == GIN_SEGMENT_INSERT); segsize = 0; } szleft = segmentend - segptr; switch (a_action) { case GIN_SEGMENT_DELETE: memmove(segptr, segptr + segsize, szleft - segsize); segmentend -= segsize; segno++; break; case GIN_SEGMENT_INSERT: /* make room for the new___ segment */ memmove(segptr + newsegsize, segptr, szleft); /* copy the new___ segment in place */ memcpy(segptr, newseg, newsegsize); segmentend += newsegsize; segptr += newsegsize; break; case GIN_SEGMENT_REPLACE: /* shift the segments that follow */ memmove(segptr + newsegsize, segptr + segsize, szleft - segsize); /* copy the replacement segment in place */ memcpy(segptr, newseg, newsegsize); segmentend -= segsize; segmentend += newsegsize; segptr += newsegsize; segno++; break; default: elog(ERROR, "unexpected GIN leaf action: %u", a_action); } oldseg = (GinPostingList *) segptr; } totalsize = segmentend - (Pointer) GinDataLeafPageGetPostingList(page); GinDataPageSetDataSize(page, totalsize); }
Datum tsvectorrecv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); TSVector vec; int i; int32 nentries; int datalen; /* number of bytes used in the variable size * area after fixed size TSVector header and * WordEntries */ Size hdrlen; Size len; /* allocated size of vec */ bool needSort = false; nentries = pq_getmsgint(buf, sizeof(int32)); if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry))) elog(ERROR, "invalid size of tsvector"); hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries; len = hdrlen * 2; /* times two to make room for lexemes */ vec = (TSVector) palloc0(len); vec->size = nentries; datalen = 0; for (i = 0; i < nentries; i++) { const char *lexeme; uint16 npos; size_t lex_len; lexeme = pq_getmsgstring(buf); npos = (uint16) pq_getmsgint(buf, sizeof(uint16)); /* sanity checks */ lex_len = strlen(lexeme); if (lex_len > MAXSTRLEN) elog(ERROR, "invalid tsvector: lexeme too long"); if (datalen > MAXSTRPOS) elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded"); if (npos > MAXNUMPOS) elog(ERROR, "unexpected number of tsvector positions"); /* * Looks valid. Fill the WordEntry struct, and copy lexeme. * * But make sure the buffer is large enough first. */ while (hdrlen + SHORTALIGN(datalen + lex_len) + (npos + 1) * sizeof(WordEntryPos) >= len) { len *= 2; vec = (TSVector) repalloc(vec, len); } vec->entries[i].haspos = (npos > 0) ? 1 : 0; vec->entries[i].len = lex_len; vec->entries[i].pos = datalen; memcpy(STRPTR(vec) + datalen, lexeme, lex_len); datalen += lex_len; if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0) needSort = true; /* Receive positions */ if (npos > 0) { uint16 j; WordEntryPos *wepptr; /* * Pad to 2-byte alignment if necessary. Though we used palloc0 * for the initial allocation, subsequent repalloc'd memory areas * are not initialized to zero. */ if (datalen != SHORTALIGN(datalen)) { *(STRPTR(vec) + datalen) = '\0'; datalen = SHORTALIGN(datalen); } memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16)); wepptr = POSDATAPTR(vec, &vec->entries[i]); for (j = 0; j < npos; j++) { wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos)); if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1])) elog(ERROR, "position information is misordered"); } datalen += (npos + 1) * sizeof(WordEntry); } } SET_VARSIZE(vec, hdrlen + datalen); if (needSort) qsort_arg((void *) ARRPTR(vec), vec->size, sizeof(WordEntry), compareentry, (void *) STRPTR(vec)); PG_RETURN_TSVECTOR(vec); }
/* * Form a tuple for entry tree. * * If the tuple would be too big to be stored, function throws a suitable * error if errorTooBig is TRUE, or returns NULL if errorTooBig is FALSE. * * See src/backend/access/gin/README for a description of the index tuple * format that is being built here. We build on the assumption that we * are making a leaf-level key entry containing a posting list of nipd items. * If the caller is actually trying to make a posting-tree entry, non-leaf * entry, or pending-list entry, it should pass dataSize = 0 and then overwrite * the t_tid fields as necessary. In any case, 'data' can be NULL to skip * filling in the posting list; the caller is responsible for filling it * afterwards if data = NULL and nipd > 0. */ IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, Pointer data, Size dataSize, int nipd, bool errorTooBig) { Datum datums[2]; bool isnull[2]; IndexTuple itup; uint32 newsize; /* Build the basic tuple: optional column number, plus key datum */ if (ginstate->oneCol) { datums[0] = key; isnull[0] = (category != GIN_CAT_NORM_KEY); } else { datums[0] = UInt16GetDatum(attnum); isnull[0] = false; datums[1] = key; isnull[1] = (category != GIN_CAT_NORM_KEY); } itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull); /* * Determine and store offset to the posting list, making sure there is * room for the category byte if needed. * * Note: because index_form_tuple MAXALIGNs the tuple size, there may well * be some wasted pad space. Is it worth recomputing the data length to * prevent that? That would also allow us to Assert that the real data * doesn't overlap the GinNullCategory byte, which this code currently * takes on faith. */ newsize = IndexTupleSize(itup); if (IndexTupleHasNulls(itup)) { uint32 minsize; Assert(category != GIN_CAT_NORM_KEY); minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory); newsize = Max(newsize, minsize); } newsize = SHORTALIGN(newsize); GinSetPostingOffset(itup, newsize); GinSetNPosting(itup, nipd); /* * Add space needed for posting list, if any. Then check that the tuple * won't be too big to store. */ newsize += dataSize; newsize = MAXALIGN(newsize); if (newsize > GinMaxItemSize) { if (errorTooBig) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %zu exceeds maximum %zu for index \"%s\"", (Size) newsize, (Size) GinMaxItemSize, RelationGetRelationName(ginstate->index)))); pfree(itup); return NULL; } /* * Resize tuple if needed */ if (newsize != IndexTupleSize(itup)) { itup = repalloc(itup, newsize); /* * PostgreSQL 9.3 and earlier did not clear this new space, so we * might find uninitialized padding when reading tuples from disk. */ memset((char *) itup + IndexTupleSize(itup), 0, newsize - IndexTupleSize(itup)); /* set new size in tuple header */ itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; } /* * Copy in the posting list, if provided */ if (data) { char *ptr = GinGetPosting(itup); memcpy(ptr, data, dataSize); } /* * Insert category byte, if needed */ if (category != GIN_CAT_NORM_KEY) { Assert(IndexTupleHasNulls(itup)); GinSetNullCategory(itup, ginstate, category); } return itup; }
/* * Sort an array of WordEntryIN, remove duplicates. * *outbuflen receives the amount of space needed for strings and positions. */ static int uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen) { int buflen; WordEntryIN *ptr, *res; Assert(l >= 1); if (l > 1) qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf); buflen = 0; res = a; ptr = a + 1; while (ptr - a < l) { if (!(ptr->entry.len == res->entry.len && strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) { /* done accumulating data into *res, count space needed */ buflen += res->entry.len; if (res->entry.haspos) { res->poslen = uniquePos(res->pos, res->poslen); buflen = SHORTALIGN(buflen); buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16); } res++; if (res != ptr) memcpy(res, ptr, sizeof(WordEntryIN)); } else if (ptr->entry.haspos) { if (res->entry.haspos) { /* append ptr's positions to res's positions */ int newlen = ptr->poslen + res->poslen; res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos)); memcpy(&res->pos[res->poslen], ptr->pos, ptr->poslen * sizeof(WordEntryPos)); res->poslen = newlen; pfree(ptr->pos); } else { /* just give ptr's positions to pos */ res->entry.haspos = 1; res->pos = ptr->pos; res->poslen = ptr->poslen; } } ptr++; } /* count space needed for last item */ buflen += res->entry.len; if (res->entry.haspos) { res->poslen = uniquePos(res->pos, res->poslen); buflen = SHORTALIGN(buflen); buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16); } *outbuflen = buflen; return res + 1 - a; }
/* * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs. * * Currently MULTI_INSERT will always contain the full tuples. */ static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) { XLogRecord *r = &buf->record; xl_heap_multi_insert *xlrec; int i; char *data; bool isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0; xlrec = (xl_heap_multi_insert *) buf->record_data; /* only interested in our database */ if (xlrec->node.dbNode != ctx->slot->data.database) return; data = buf->record_data + SizeOfHeapMultiInsert; /* * OffsetNumbers (which are not of interest to us) are stored when * XLOG_HEAP_INIT_PAGE is not set -- skip over them. */ if (!isinit) data += sizeof(OffsetNumber) * xlrec->ntuples; for (i = 0; i < xlrec->ntuples; i++) { ReorderBufferChange *change; xl_multi_insert_tuple *xlhdr; int datalen; ReorderBufferTupleBuf *tuple; change = ReorderBufferGetChange(ctx->reorder); change->action = REORDER_BUFFER_CHANGE_INSERT; memcpy(&change->data.tp.relnode, &xlrec->node, sizeof(RelFileNode)); /* * CONTAINS_NEW_TUPLE will always be set currently as multi_insert * isn't used for catalogs, but better be future proof. * * We decode the tuple in pretty much the same way as DecodeXLogTuple, * but since the layout is slightly different, we can't use it here. */ if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) { HeapTupleHeader header; xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data); data = ((char *) xlhdr) + SizeOfMultiInsertTuple; datalen = xlhdr->datalen; change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder, datalen); tuple = change->data.tp.newtuple; header = tuple->tuple.t_data; /* not a disk based tuple */ ItemPointerSetInvalid(&tuple->tuple.t_self); /* * We can only figure this out after reassembling the * transactions. */ #if 0 tuple->tuple.t_tableOid = InvalidOid; #endif tuple->tuple.t_len = datalen + offsetof(HeapTupleHeaderData, t_bits); memset(header, 0, offsetof(HeapTupleHeaderData, t_bits)); memcpy((char *) tuple->tuple.t_data + offsetof(HeapTupleHeaderData, t_bits), (char *) data, datalen); data += datalen; header->t_infomask = xlhdr->t_infomask; header->t_infomask2 = xlhdr->t_infomask2; header->t_hoff = xlhdr->t_hoff; } /* * Reset toast reassembly state only after the last row in the last * xl_multi_insert_tuple record emitted by one heap_multi_insert() * call. */ if (xlrec->flags & XLOG_HEAP_LAST_MULTI_INSERT && (i + 1) == xlrec->ntuples) change->data.tp.clear_toast_afterwards = true; else change->data.tp.clear_toast_afterwards = false; ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change); } }
/* * Form a tuple for entry tree. * * If the tuple would be too big to be stored, function throws a suitable * error if errorTooBig is TRUE, or returns NULL if errorTooBig is FALSE. * * On leaf pages, Index tuple has non-traditional layout. Tuple may contain * posting list or root blocknumber of posting tree. * Macros: GinIsPostingTree(itup) / GinSetPostingTree(itup, blkno) * 1) Posting list * - itup->t_info & INDEX_SIZE_MASK contains total size of tuple as usual * - ItemPointerGetBlockNumber(&itup->t_tid) contains original * size of tuple (without posting list). * Macros: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n) * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number * of elements in posting list (number of heap itempointers) * Macros: GinGetNPosting(itup) / GinSetNPosting(itup,n) * - After standard part of tuple there is a posting list, ie, array * of heap itempointers * Macros: GinGetPosting(itup) * 2) Posting tree * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of * root of posting tree * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number * GIN_TREE_POSTING, which distinguishes this from posting-list case * * Attributes of an index tuple are different for single and multicolumn index. * For single-column case, index tuple stores only value to be indexed. * For multicolumn case, it stores two attributes: column number of value * and value. */ IndexTuple GinFormTuple(Relation index, GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd, bool errorTooBig) { bool isnull[2] = {FALSE, FALSE}; IndexTuple itup; uint32 newsize; if (ginstate->oneCol) itup = index_form_tuple(ginstate->origTupdesc, &key, isnull); else { Datum datums[2]; datums[0] = UInt16GetDatum(attnum); datums[1] = key; itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull); } GinSetOrigSizePosting(itup, IndexTupleSize(itup)); if (nipd > 0) { newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd); if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize)) { if (errorTooBig) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %lu exceeds maximum %lu for index \"%s\"", (unsigned long) newsize, (unsigned long) Min(INDEX_SIZE_MASK, GinMaxItemSize), RelationGetRelationName(index)))); return NULL; } itup = repalloc(itup, newsize); /* set new size */ itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; if (ipd) memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd); GinSetNPosting(itup, nipd); } else { /* * Gin tuple without any ItemPointers should be large enough to keep * one ItemPointer, to prevent inconsistency between * ginHeapTupleFastCollect and ginEntryInsert called by * ginHeapTupleInsert. ginHeapTupleFastCollect forms tuple without * extra pointer to heap, but ginEntryInsert (called for pending list * cleanup during vacuum) will form the same tuple with one * ItemPointer. */ newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData)); if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize)) { if (errorTooBig) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %lu exceeds maximum %lu for index \"%s\"", (unsigned long) newsize, (unsigned long) Min(INDEX_SIZE_MASK, GinMaxItemSize), RelationGetRelationName(index)))); return NULL; } GinSetNPosting(itup, 0); } return itup; }