/* * SQL function json_populate_record * * set fields in a record from the argument json * * Code adapted shamelessly from hstore's populate_record * which is in turn partly adapted from record_out. * * The json is decomposed into a hash table, in which each * field in the record is then looked up by name. */ Datum json_populate_record(PG_FUNCTION_ARGS) { Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); text *json; bool use_json_as_text; HTAB *json_hash; HeapTupleHeader rec; Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTupleData tuple; HeapTuple rettuple; RecordIOData *my_extra; int ncolumns; int i; Datum *values; bool *nulls; char fname[NAMEDATALEN]; JsonHashEntry hashentry; use_json_as_text = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2); if (!type_is_rowtype(argtype)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("first argument must be a rowtype"))); if (PG_ARGISNULL(0)) { if (PG_ARGISNULL(1)) PG_RETURN_NULL(); rec = NULL; /* * have no tuple to look at, so the only source of type info is the * argtype. The lookup_rowtype_tupdesc call below will error out if we * don't have a known composite type oid here. */ tupType = argtype; tupTypmod = -1; } else { rec = PG_GETARG_HEAPTUPLEHEADER(0); if (PG_ARGISNULL(1)) PG_RETURN_POINTER(rec); /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); } json = PG_GETARG_TEXT_P(1); json_hash = get_json_object_as_hash(json, "json_populate_record", use_json_as_text); /* * if the input json is empty, we can only skip the rest if we were passed * in a non-null record, since otherwise there may be issues with domain * nulls. */ if (hash_get_num_entries(json_hash) == 0 && rec) PG_RETURN_POINTER(rec); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; if (rec) { /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; } /* * We arrange to look up the needed I/O info just once per series of * calls, assuming the record type doesn't change underneath us. */ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns != ncolumns) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; my_extra->record_type = InvalidOid; my_extra->record_typmod = 0; } if (my_extra->record_type != tupType || my_extra->record_typmod != tupTypmod) { MemSet(my_extra, 0, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra->record_type = tupType; my_extra->record_typmod = tupTypmod; my_extra->ncolumns = ncolumns; } values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); if (rec) { /* Break down the tuple into fields */ heap_deform_tuple(&tuple, tupdesc, values, nulls); } else { for (i = 0; i < ncolumns; ++i) { values[i] = (Datum) 0; nulls[i] = true; } } for (i = 0; i < ncolumns; ++i) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; char *value; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) { nulls[i] = true; continue; } memset(fname, 0, NAMEDATALEN); strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN); hashentry = hash_search(json_hash, fname, HASH_FIND, NULL); /* * we can't just skip here if the key wasn't found since we might have * a domain to deal with. If we were passed in a non-null record * datum, we assume that the existing values are valid (if they're * not, then it's not our fault), but if we were passed in a null, * then every field which we don't populate needs to be run through * the input function just in case it's a domain type. */ if (hashentry == NULL && rec) continue; /* * Prepare to convert the column value from text */ if (column_info->column_type != column_type) { getTypeInputInfo(column_type, &column_info->typiofunc, &column_info->typioparam); fmgr_info_cxt(column_info->typiofunc, &column_info->proc, fcinfo->flinfo->fn_mcxt); column_info->column_type = column_type; } if (hashentry == NULL || hashentry->isnull) { /* * need InputFunctionCall to happen even for nulls, so that domain * checks are done */ values[i] = InputFunctionCall(&column_info->proc, NULL, column_info->typioparam, tupdesc->attrs[i]->atttypmod); nulls[i] = true; } else { value = hashentry->val; values[i] = InputFunctionCall(&column_info->proc, value, column_info->typioparam, tupdesc->attrs[i]->atttypmod); nulls[i] = false; } } rettuple = heap_form_tuple(tupdesc, values, nulls); ReleaseTupleDesc(tupdesc); PG_RETURN_DATUM(HeapTupleGetDatum(rettuple)); }
/* * _hash_first() -- Find the first item in a scan. * * Find the first item in the index that * satisfies the qualification associated with the scan descriptor. On * success, the page containing the current index tuple is read locked * and pinned, and the scan's opaque data entry is updated to * include the buffer. */ bool _hash_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; ScanKey cur; uint32 hashkey; Bucket bucket; BlockNumber blkno; BlockNumber oldblkno = InvalidBuffer; bool retry = false; Buffer buf; Buffer metabuf; Page page; HashPageOpaque opaque; HashMetaPage metap; IndexTuple itup; ItemPointer current; OffsetNumber offnum; pgstat_count_index_scan(rel); current = &(so->hashso_curpos); ItemPointerSetInvalid(current); /* * We do not support hash scans with no index qualification, because we * would have to read the whole index rather than just one bucket. That * creates a whole raft of problems, since we haven't got a practical way * to lock all the buckets against splits or compactions. */ if (scan->numberOfKeys < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("hash indexes do not support whole-index scans"))); /* There may be more than one index qual, but we hash only the first */ cur = &scan->keyData[0]; /* We support only single-column hash indexes */ Assert(cur->sk_attno == 1); /* And there's only one operator strategy, too */ Assert(cur->sk_strategy == HTEqualStrategyNumber); /* * If the constant in the index qual is NULL, assume it cannot match any * items in the index. */ if (cur->sk_flags & SK_ISNULL) return false; /* * Okay to compute the hash key. We want to do this before acquiring any * locks, in case a user-defined hash function happens to be slow. * * If scankey operator is not a cross-type comparison, we can use the * cached hash function; otherwise gotta look it up in the catalogs. * * We support the convention that sk_subtype == InvalidOid means the * opclass input type; this is a hack to simplify life for ScanKeyInit(). */ if (cur->sk_subtype == rel->rd_opcintype[0] || cur->sk_subtype == InvalidOid) hashkey = _hash_datum2hashkey(rel, cur->sk_argument); else hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument, cur->sk_subtype); so->hashso_sk_hash = hashkey; /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); /* * Loop until we get a lock on the correct target bucket. */ for (;;) { /* * Compute the target bucket number, and convert to block number. */ bucket = _hash_hashkey2bucket(hashkey, metap->hashm_maxbucket, metap->hashm_highmask, metap->hashm_lowmask); blkno = BUCKET_TO_BLKNO(metap, bucket); /* Release metapage lock, but keep pin. */ _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK); /* * If the previous iteration of this loop locked what is still the * correct target bucket, we are done. Otherwise, drop any old lock * and lock what now appears to be the correct bucket. */ if (retry) { if (oldblkno == blkno) break; _hash_droplock(rel, oldblkno, HASH_SHARE); } _hash_getlock(rel, blkno, HASH_SHARE); /* * Reacquire metapage lock and check that no bucket split has taken * place while we were awaiting the bucket lock. */ _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ); oldblkno = blkno; retry = true; } /* done with the metapage */ _hash_dropbuf(rel, metabuf); /* Update scan opaque state to show we have lock on the bucket */ so->hashso_bucket = bucket; so->hashso_bucket_valid = true; so->hashso_bucket_blkno = blkno; /* Fetch the primary bucket page for the bucket */ buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(opaque->hasho_bucket == bucket); /* If a backwards scan is requested, move to the end of the chain */ if (ScanDirectionIsBackward(dir)) { while (BlockNumberIsValid(opaque->hasho_nextblkno)) _hash_readnext(rel, &buf, &page, &opaque); } /* Now find the first tuple satisfying the qualification */ if (!_hash_step(scan, &buf, dir)) return false; /* if we're here, _hash_step found a valid tuple */ offnum = ItemPointerGetOffsetNumber(current); _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); so->hashso_heappos = itup->t_tid; return true; }
/* * record_out - output routine for any composite type. */ Datum record_out(PG_FUNCTION_ARGS) { HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTupleData tuple; RecordIOData *my_extra; bool needComma = false; int ncolumns; int i; Datum *values; bool *nulls; StringInfoData buf; /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; /* * We arrange to look up the needed I/O info just once per series of * calls, assuming the record type doesn't change underneath us. */ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns != ncolumns) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; my_extra->record_type = InvalidOid; my_extra->record_typmod = 0; } if (my_extra->record_type != tupType || my_extra->record_typmod != tupTypmod) { MemSet(my_extra, 0, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra->record_type = tupType; my_extra->record_typmod = tupTypmod; my_extra->ncolumns = ncolumns; } values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); /* Break down the tuple into fields */ heap_deform_tuple(&tuple, tupdesc, values, nulls); /* And build the result string */ initStringInfo(&buf); appendStringInfoChar(&buf, '('); for (i = 0; i < ncolumns; i++) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; char *value; char *tmp; bool nq; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) continue; if (needComma) appendStringInfoChar(&buf, ','); needComma = true; if (nulls[i]) { /* emit nothing... */ continue; } /* * Convert the column value to text */ if (column_info->column_type != column_type) { bool typIsVarlena; getTypeOutputInfo(column_type, &column_info->typiofunc, &typIsVarlena); fmgr_info_cxt(column_info->typiofunc, &column_info->proc, fcinfo->flinfo->fn_mcxt); column_info->column_type = column_type; } value = OutputFunctionCall(&column_info->proc, values[i]); /* Detect whether we need double quotes for this value */ nq = (value[0] == '\0'); /* force quotes for empty string */ for (tmp = value; *tmp; tmp++) { char ch = *tmp; if (ch == '"' || ch == '\\' || ch == '(' || ch == ')' || ch == ',' || isspace((unsigned char) ch)) { nq = true; break; } } /* And emit the string */ if (nq) appendStringInfoChar(&buf, '"'); for (tmp = value; *tmp; tmp++) { char ch = *tmp; if (ch == '"' || ch == '\\') appendStringInfoChar(&buf, ch); appendStringInfoChar(&buf, ch); } if (nq) appendStringInfoChar(&buf, '"'); } appendStringInfoChar(&buf, ')'); pfree(values); pfree(nulls); ReleaseTupleDesc(tupdesc); PG_RETURN_CSTRING(buf.data); }
Datum hstore_from_record(PG_FUNCTION_ARGS) { HeapTupleHeader rec; int4 buflen; HStore *out; Pairs *pairs; Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTupleData tuple; RecordIOData *my_extra; int ncolumns; int i, j; Datum *values; bool *nulls; if (PG_ARGISNULL(0)) { Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); /* * have no tuple to look at, so the only source of type info is the * argtype. The lookup_rowtype_tupdesc call below will error out if we * don't have a known composite type oid here. */ tupType = argtype; tupTypmod = -1; rec = NULL; } else { rec = PG_GETARG_HEAPTUPLEHEADER(0); /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); } tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; /* * We arrange to look up the needed I/O info just once per series of * calls, assuming the record type doesn't change underneath us. */ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns != ncolumns) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; my_extra->record_type = InvalidOid; my_extra->record_typmod = 0; } if (my_extra->record_type != tupType || my_extra->record_typmod != tupTypmod) { MemSet(my_extra, 0, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra->record_type = tupType; my_extra->record_typmod = tupTypmod; my_extra->ncolumns = ncolumns; } pairs = palloc(ncolumns * sizeof(Pairs)); if (rec) { /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; #ifdef PGXC tuple.t_xc_node_id = 0; #endif tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); /* Break down the tuple into fields */ heap_deform_tuple(&tuple, tupdesc, values, nulls); } else { values = NULL; nulls = NULL; } for (i = 0, j = 0; i < ncolumns; ++i) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; char *value; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) continue; pairs[j].key = NameStr(tupdesc->attrs[i]->attname); pairs[j].keylen = hstoreCheckKeyLen(strlen(NameStr(tupdesc->attrs[i]->attname))); if (!nulls || nulls[i]) { pairs[j].val = NULL; pairs[j].vallen = 4; pairs[j].isnull = true; pairs[j].needfree = false; ++j; continue; } /* * Convert the column value to text */ if (column_info->column_type != column_type) { bool typIsVarlena; getTypeOutputInfo(column_type, &column_info->typiofunc, &typIsVarlena); fmgr_info_cxt(column_info->typiofunc, &column_info->proc, fcinfo->flinfo->fn_mcxt); column_info->column_type = column_type; } value = OutputFunctionCall(&column_info->proc, values[i]); pairs[j].val = value; pairs[j].vallen = hstoreCheckValLen(strlen(value)); pairs[j].isnull = false; pairs[j].needfree = false; ++j; } ncolumns = hstoreUniquePairs(pairs, j, &buflen); out = hstorePairs(pairs, ncolumns, buflen); ReleaseTupleDesc(tupdesc); PG_RETURN_POINTER(out); }
/* ---------- * toast_flatten_tuple_attribute - * * If a Datum is of composite type, "flatten" it to contain no toasted fields. * This must be invoked on any potentially-composite field that is to be * inserted into a tuple. Doing this preserves the invariant that toasting * goes only one level deep in a tuple. * ---------- */ Datum toast_flatten_tuple_attribute(Datum value, Oid typeId, int32 typeMod) { TupleDesc tupleDesc; HeapTupleHeader olddata; HeapTupleHeader new_data; int32 new_len; HeapTupleData tmptup; Form_pg_attribute *att; int numAttrs; int i; bool need_change = false; bool has_nulls = false; Datum toast_values[MaxTupleAttributeNumber]; bool toast_isnull[MaxTupleAttributeNumber]; bool toast_free[MaxTupleAttributeNumber]; /* * See if it's a composite type, and get the tupdesc if so. */ tupleDesc = lookup_rowtype_tupdesc_noerror(typeId, typeMod, true); if (tupleDesc == NULL) return value; /* not a composite type */ tupleDesc = CreateTupleDescCopy(tupleDesc); att = tupleDesc->attrs; numAttrs = tupleDesc->natts; /* * Break down the tuple into fields. */ olddata = DatumGetHeapTupleHeader(value); Assert(typeId == HeapTupleHeaderGetTypeId(olddata)); Assert(typeMod == HeapTupleHeaderGetTypMod(olddata)); /* Build a temporary HeapTuple control structure */ tmptup.t_len = HeapTupleHeaderGetDatumLength(olddata); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = olddata; Assert(numAttrs <= MaxTupleAttributeNumber); heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull); memset(toast_free, 0, numAttrs * sizeof(bool)); for (i = 0; i < numAttrs; i++) { /* * Look at non-null varlena attributes */ if (toast_isnull[i]) has_nulls = true; else if (att[i]->attlen == -1) { varattrib *new_value; new_value = (varattrib *) DatumGetPointer(toast_values[i]); if (VARATT_IS_EXTENDED(new_value)) { new_value = heap_tuple_untoast_attr(new_value); toast_values[i] = PointerGetDatum(new_value); toast_free[i] = true; need_change = true; } } } /* * If nothing to untoast, just return the original tuple. */ if (!need_change) { FreeTupleDesc(tupleDesc); return value; } /* * Calculate the new size of the tuple. Header size should not change, * but data size might. */ new_len = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) new_len += BITMAPLEN(numAttrs); if (olddata->t_infomask & HEAP_HASOID) new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); new_len += heap_compute_data_size(tupleDesc, toast_values, toast_isnull); new_data = (HeapTupleHeader) palloc0(new_len); /* * Put the tuple header and the changed values into place */ memcpy(new_data, olddata, olddata->t_hoff); HeapTupleHeaderSetDatumLength(new_data, new_len); heap_fill_tuple(tupleDesc, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); /* * Free allocated temp values */ for (i = 0; i < numAttrs; i++) if (toast_free[i]) pfree(DatumGetPointer(toast_values[i])); FreeTupleDesc(tupleDesc); return PointerGetDatum(new_data); }
/* * record_cmp() * Internal comparison function for records. * * Returns -1, 0 or 1 * * Do not assume that the two inputs are exactly the same record type; * for instance we might be comparing an anonymous ROW() construct against a * named composite type. We will compare as long as they have the same number * of non-dropped columns of the same types. */ static int record_cmp(FunctionCallInfo fcinfo) { HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0); HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1); int result = 0; Oid tupType1; Oid tupType2; int32 tupTypmod1; int32 tupTypmod2; TupleDesc tupdesc1; TupleDesc tupdesc2; HeapTupleData tuple1; HeapTupleData tuple2; int ncolumns1; int ncolumns2; RecordCompareData *my_extra; int ncols; Datum *values1; Datum *values2; bool *nulls1; bool *nulls2; int i1; int i2; int j; /* Extract type info from the tuples */ tupType1 = HeapTupleHeaderGetTypeId(record1); tupTypmod1 = HeapTupleHeaderGetTypMod(record1); tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1); ncolumns1 = tupdesc1->natts; tupType2 = HeapTupleHeaderGetTypeId(record2); tupTypmod2 = HeapTupleHeaderGetTypMod(record2); tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2); ncolumns2 = tupdesc2->natts; /* Build temporary HeapTuple control structures */ tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; /* * We arrange to look up the needed comparison info just once per series * of calls, assuming the record types don't change underneath us. */ ncols = Max(ncolumns1, ncolumns2); my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns < ncols) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(RecordCompareData) - sizeof(ColumnCompareData) + ncols * sizeof(ColumnCompareData)); my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; my_extra->ncolumns = ncols; my_extra->record1_type = InvalidOid; my_extra->record1_typmod = 0; my_extra->record2_type = InvalidOid; my_extra->record2_typmod = 0; } if (my_extra->record1_type != tupType1 || my_extra->record1_typmod != tupTypmod1 || my_extra->record2_type != tupType2 || my_extra->record2_typmod != tupTypmod2) { MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData)); my_extra->record1_type = tupType1; my_extra->record1_typmod = tupTypmod1; my_extra->record2_type = tupType2; my_extra->record2_typmod = tupTypmod2; } /* Break down the tuples into fields */ values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum)); nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool)); heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1); values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum)); nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool)); heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2); /* * Scan corresponding columns, allowing for dropped columns in different * places in the two rows. i1 and i2 are physical column indexes, j is * the logical column index. */ i1 = i2 = j = 0; while (i1 < ncolumns1 || i2 < ncolumns2) { TypeCacheEntry *typentry; Oid collation; FunctionCallInfoData locfcinfo; int32 cmpresult; /* * Skip dropped columns */ if (i1 < ncolumns1 && tupdesc1->attrs[i1]->attisdropped) { i1++; continue; } if (i2 < ncolumns2 && tupdesc2->attrs[i2]->attisdropped) { i2++; continue; } if (i1 >= ncolumns1 || i2 >= ncolumns2) break; /* we'll deal with mismatch below loop */ /* * Have two matching columns, they must be same type */ if (tupdesc1->attrs[i1]->atttypid != tupdesc2->attrs[i2]->atttypid) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot compare dissimilar column types %s and %s at record column %d", format_type_be(tupdesc1->attrs[i1]->atttypid), format_type_be(tupdesc2->attrs[i2]->atttypid), j + 1))); /* * If they're not same collation, we don't complain here, but the * comparison function might. */ collation = tupdesc1->attrs[i1]->attcollation; if (collation != tupdesc2->attrs[i2]->attcollation) collation = InvalidOid; /* * Lookup the comparison function if not done already */ typentry = my_extra->columns[j].typentry; if (typentry == NULL || typentry->type_id != tupdesc1->attrs[i1]->atttypid) { typentry = lookup_type_cache(tupdesc1->attrs[i1]->atttypid, TYPECACHE_CMP_PROC_FINFO); if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("could not identify a comparison function for type %s", format_type_be(typentry->type_id)))); my_extra->columns[j].typentry = typentry; } /* * We consider two NULLs equal; NULL > not-NULL. */ if (!nulls1[i1] || !nulls2[i2]) { if (nulls1[i1]) { /* arg1 is greater than arg2 */ result = 1; break; } if (nulls2[i2]) { /* arg1 is less than arg2 */ result = -1; break; } /* Compare the pair of elements */ InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2, collation, NULL, NULL); locfcinfo.arg[0] = values1[i1]; locfcinfo.arg[1] = values2[i2]; locfcinfo.argnull[0] = false; locfcinfo.argnull[1] = false; locfcinfo.isnull = false; cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo)); if (cmpresult < 0) { /* arg1 is less than arg2 */ result = -1; break; } else if (cmpresult > 0) { /* arg1 is greater than arg2 */ result = 1; break; } } /* equal, so continue to next column */ i1++, i2++, j++; } /* * If we didn't break out of the loop early, check for column count * mismatch. (We do not report such mismatch if we found unequal column * values; is that a feature or a bug?) */ if (result == 0) { if (i1 != ncolumns1 || i2 != ncolumns2) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot compare record types with different numbers of columns"))); } pfree(values1); pfree(nulls1); pfree(values2); pfree(nulls2); ReleaseTupleDesc(tupdesc1); ReleaseTupleDesc(tupdesc2); /* Avoid leaking memory when handed toasted input. */ PG_FREE_IF_COPY(record1, 0); PG_FREE_IF_COPY(record2, 1); return result; }
Datum gistrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey key = (ScanKey) PG_GETARG_POINTER(1); GISTScanOpaque so; int i; so = (GISTScanOpaque) scan->opaque; if (so != NULL) { /* rescan an existing indexscan --- reset state */ gistfreestack(so->stack); so->stack = NULL; /* drop pins on buffers -- no locks held */ if (BufferIsValid(so->curbuf)) { ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; } } else { /* initialize opaque data */ so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); so->stack = NULL; so->tempCxt = createTempGistContext(); so->curbuf = InvalidBuffer; so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); initGISTstate(so->giststate, scan->indexRelation); scan->opaque = so; } /* * Clear all the pointers. */ ItemPointerSetInvalid(&so->curpos); so->nPageData = so->curPageData = 0; so->qual_ok = true; /* Update scan key, if a new one is given */ if (key && scan->numberOfKeys > 0) { memmove(scan->keyData, key, scan->numberOfKeys * sizeof(ScanKeyData)); /* * Modify the scan key so that all the Consistent method is called for * all comparisons. The original operator is passed to the Consistent * function in the form of its strategy number, which is available * from the sk_strategy field, and its subtype from the sk_subtype * field. * * Next, if any of keys is a NULL and that key is not marked with * SK_SEARCHNULL/SK_SEARCHNOTNULL then nothing can be found (ie, we * assume all indexable operators are strict). */ for (i = 0; i < scan->numberOfKeys; i++) { ScanKey skey = &(scan->keyData[i]); skey->sk_func = so->giststate->consistentFn[skey->sk_attno - 1]; if (skey->sk_flags & SK_ISNULL) { if (!(skey->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL))) so->qual_ok = false; } } } PG_RETURN_VOID(); }
/* * Helper method used to insert a new minipage entry in the block * directory relation. Refer to AppendOnlyBlockDirectory_InsertEntry() * for more details. */ static bool insert_new_entry( AppendOnlyBlockDirectory *blockDirectory, int columnGroupNo, int64 firstRowNum, int64 fileOffset, int64 rowCount, MinipagePerColumnGroup *minipageInfo) { MinipageEntry *entry = NULL; int lastEntryNo; if (rowCount == 0) return false; if (blockDirectory->blkdirRel == NULL || blockDirectory->blkdirIdx == NULL) return false; Assert(minipageInfo->numMinipageEntries <= (uint32)NUM_MINIPAGE_ENTRIES); lastEntryNo = minipageInfo->numMinipageEntries - 1; if (lastEntryNo >= 0) { entry = &(minipageInfo->minipage->entry[lastEntryNo]); Assert(entry->firstRowNum < firstRowNum); Assert(entry->fileOffset < fileOffset); if (gp_blockdirectory_entry_min_range > 0 && fileOffset - entry->fileOffset < gp_blockdirectory_entry_min_range) return true; /* Update the rowCount in the latest entry */ Assert(entry->rowCount <= firstRowNum - entry->firstRowNum); ereportif(Debug_appendonly_print_blockdirectory, LOG, (errmsg("Append-only block directory update entry: " "(firstRowNum, columnGroupNo, fileOffset, rowCount) = (" INT64_FORMAT ", %d, " INT64_FORMAT ", " INT64_FORMAT ") at index %d to " "(firstRowNum, columnGroupNo, fileOffset, rowCount) = (" INT64_FORMAT ", %d, " INT64_FORMAT ", " INT64_FORMAT ")", entry->firstRowNum, columnGroupNo, entry->fileOffset, entry->rowCount, minipageInfo->numMinipageEntries - 1, entry->firstRowNum, columnGroupNo, entry->fileOffset, firstRowNum - entry->firstRowNum))); entry->rowCount = firstRowNum - entry->firstRowNum; } if (minipageInfo->numMinipageEntries >= (uint32)gp_blockdirectory_minipage_size) { write_minipage(blockDirectory, columnGroupNo, minipageInfo); /* Set tupleTid to invalid */ ItemPointerSetInvalid(&minipageInfo->tupleTid); /* * Clear out the entries. */ MemSet(minipageInfo->minipage->entry, 0, minipageInfo->numMinipageEntries * sizeof(MinipageEntry)); minipageInfo->numMinipageEntries = 0; } Assert(minipageInfo->numMinipageEntries < (uint32)gp_blockdirectory_minipage_size); entry = &(minipageInfo->minipage->entry[minipageInfo->numMinipageEntries]); entry->firstRowNum = firstRowNum; entry->fileOffset = fileOffset; entry->rowCount = rowCount; minipageInfo->numMinipageEntries++; ereportif(Debug_appendonly_print_blockdirectory, LOG, (errmsg("Append-only block directory insert entry: " "(firstRowNum, columnGroupNo, fileOffset, rowCount) = (" INT64_FORMAT ", %d, " INT64_FORMAT ", " INT64_FORMAT ") at index %d", entry->firstRowNum, columnGroupNo, entry->fileOffset, entry->rowCount, minipageInfo->numMinipageEntries - 1))); return true; }
/* * heap_formtuple * * construct a tuple from the given values[] and nulls[] arrays * * Null attributes are indicated by a 'n' in the appropriate byte * of nulls[]. Non-null attributes are indicated by a ' ' (space). * * OLD API with char 'n'/' ' convention for indicating nulls. * This is deprecated and should not be used in new code, but we keep it * around for use by old add-on modules. */ HeapTuple heap_formtuple(TupleDesc tupleDescriptor, Datum *values, char *nulls) { HeapTuple tuple; /* return tuple */ HeapTupleHeader td; /* tuple data */ Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; int numberOfAttributes = tupleDescriptor->natts; int i; if (numberOfAttributes > MaxTupleAttributeNumber) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_COLUMNS), errmsg("number of columns (%d) exceeds limit (%d)", numberOfAttributes, MaxTupleAttributeNumber))); /* * Check for nulls and embedded tuples; expand any toasted attributes in * embedded tuples. This preserves the invariant that toasting can only * go one level deep. * * We can skip calling toast_flatten_tuple_attribute() if the attribute * couldn't possibly be of composite type. All composite datums are * varlena and have alignment 'd'; furthermore they aren't arrays. Also, * if an attribute is already toasted, it must have been sent to disk * already and so cannot contain toasted attributes. */ for (i = 0; i < numberOfAttributes; i++) { if (nulls[i] != ' ') hasnull = true; else if (att[i]->attlen == -1 && att[i]->attalign == 'd' && att[i]->attndims == 0 && !VARATT_IS_EXTENDED(values[i])) { values[i] = toast_flatten_tuple_attribute(values[i], att[i]->atttypid, att[i]->atttypmod); } } /* * Determine total space needed */ len = offsetof(HeapTupleHeaderData, t_bits); if (hasnull) len += BITMAPLEN(numberOfAttributes); if (tupleDescriptor->tdhasoid) len += sizeof(Oid); hoff = len = MAXALIGN(len); /* align user data safely */ data_len = ComputeDataSize(tupleDescriptor, values, nulls); len += data_len; /* * Allocate and zero the space needed. Note that the tuple body and * HeapTupleData management structure are allocated in one chunk. */ tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + len); tuple->t_data = td = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); /* * And fill in the information. Note we fill the Datum fields even though * this tuple may never become a Datum. */ tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); HeapTupleHeaderSetDatumLength(td, len); HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid); HeapTupleHeaderSetTypMod(td, tupleDescriptor->tdtypmod); HeapTupleHeaderSetNatts(td, numberOfAttributes); td->t_hoff = hoff; if (tupleDescriptor->tdhasoid) /* else leave infomask = 0 */ td->t_infomask = HEAP_HASOID; heap_fill_tuple(tupleDescriptor, values, nulls, (char *) td + hoff, data_len, &td->t_infomask, (hasnull ? td->t_bits : NULL)); return tuple; }
static void *uri_char(HeapTupleHeader ud, bool hdr, bool term) { TupleDesc td; HeapTupleData tuple; Datum d[URI_LEN]; bool n[URI_LEN]; text *scheme = NULL, *host = NULL, *path = NULL; int16 port; char portbuf[8]; unsigned schemelen = 0, hostlen = 0, portlen = 0, pathlen = 0; unsigned len; void *out; char *p; td = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(ud), HeapTupleHeaderGetTypMod(ud)); tuple.t_len = HeapTupleHeaderGetDatumLength(ud); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = ud; heap_deform_tuple(&tuple, td, d, n); ReleaseTupleDesc(td); if (!n[URI_SCHEME]) { scheme = DatumGetTextP(d[URI_SCHEME]); schemelen = VARSIZE_ANY_EXHDR(scheme); } if (!n[URI_HOST]) { host = DatumGetTextP(d[URI_HOST]); hostlen = VARSIZE_ANY_EXHDR(host); } if (!n[URI_PORT]) { port = DatumGetInt16(d[URI_PORT]); portlen = snprintf(portbuf, sizeof(portbuf)-1, ":%hu", port); } if (!n[URI_PATH]) { path = DatumGetTextP(d[URI_PATH]); pathlen = VARSIZE_ANY_EXHDR(path); } len = (hdr ? VARHDRSZ : 0) + schemelen + (scheme ? 3 : 0) + hostlen + portlen + pathlen + term; out = palloc(len); if (hdr) SET_VARSIZE(out, len); p = hdr ? VARDATA(out) : out; if (scheme) { memcpy(p, VARDATA(scheme), schemelen); p += schemelen; *p++ = ':'; *p++ = '/'; *p++ = '/'; } if (host) { domainname_flip(p, VARDATA(host), hostlen); p += hostlen; } memcpy(p, portbuf, portlen); p += portlen; if (path) { memcpy(p, VARDATA(path), pathlen); p += pathlen; } if (term) *p = '\0'; return out; }
Datum serialize_record( PG_FUNCTION_ARGS ) { // FILE* log; // log = fopen("/var/lib/postgresql/serializer.log", "a"); HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); HeapTupleData tuple; bool needComma = false; int i; Datum *values; bool *nulls; StringInfoData buf; char *conversion_buf; /* Extract type info from the tuple itself */ Oid tupType = HeapTupleHeaderGetTypeId(rec); int32 tupTypmod = HeapTupleHeaderGetTypMod(rec); TupleDesc tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); int ncolumns = tupdesc->natts; /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; // fprintf(log, "Doing serialize_record\n"); // fflush(log); values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); /* Break down the tuple into fields */ heap_deform_tuple(&tuple, tupdesc, values, nulls); /* And build the result string */ initStringInfo(&buf); appendStringInfoChar(&buf, '{'); for (i = 0; i < ncolumns; i++) { Oid column_type = tupdesc->attrs[ i ]->atttypid; char *value; char *column_name; char type_category; HeapTuple type_tuple; FmgrInfo flinfo; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) continue; if (nulls[i]) { /* emit nothing... */ continue; } if (needComma) appendStringInfoChar(&buf, ','); needComma = true; /* obtain column name */ column_name = SPI_fname( tupdesc, i + 1 ); /* obtain type information from pg_catalog */ type_tuple = SearchSysCache1( TYPEOID, ObjectIdGetDatum(column_type) ); if (!HeapTupleIsValid( type_tuple )) elog(ERROR, "cache lookup failed for relation %u", column_type); type_category = ((Form_pg_type) GETSTRUCT( type_tuple ))->typcategory; ReleaseSysCache( type_tuple ); /* append column name */ appendStringInfoChar(&buf, '"'); appendStringInfoString(&buf, column_name); appendStringInfoString(&buf, "\":"); switch( type_category ) { // http://www.postgresql.org/docs/current/static/catalog-pg-type.html#CATALOG-TYPCATEGORY-TABLE case 'A': //array //call to serialize_array( ... ) MemSet( &flinfo, 0, sizeof( flinfo ) ); flinfo.fn_addr = serialize_array; flinfo.fn_nargs = 1; flinfo.fn_mcxt = fcinfo->flinfo->fn_mcxt; value = PG_TEXT_DATUM_GET_CSTR( FunctionCall1( &flinfo, values[ i ] ) ); appendStringInfoString(&buf, value); break; case 'C': //composite //recursive call to serialize_record( ... ) MemSet( &flinfo, 0, sizeof( flinfo ) ); flinfo.fn_addr = serialize_record; flinfo.fn_nargs = 1; flinfo.fn_mcxt = fcinfo->flinfo->fn_mcxt; value = PG_TEXT_DATUM_GET_CSTR( FunctionCall1( &flinfo, values[ i ] ) ); appendStringInfoString(&buf, value); break; case 'N': //numeric conversion_buf = NULL; // get column text value // fprintf(log, "Calling ConvertToText\n"); // fflush(log); value = ConvertToText( values[ i ], column_type, fcinfo->flinfo->fn_mcxt, &conversion_buf ); // fprintf(log, "ConvertToText succeded\n"); // fflush(log); appendStringInfoString(&buf, value); // fprintf(log, "append.... succeded\n"); // fflush(log); if(conversion_buf != NULL) { pfree(conversion_buf); conversion_buf = NULL; } break; case 'B': //boolean appendStringInfoString(&buf, // get column boolean value DatumGetBool( values[ i ] ) ? "true" : "false" ); break; default: //another conversion_buf = NULL; // get column text value // fprintf(log, "Calling ConvertToText\n"); // fflush(log); value = ConvertToText( values[ i ], column_type, fcinfo->flinfo->fn_mcxt, &conversion_buf ); // fprintf(log, "ConvertToText succeded\n"); // fflush(log); appendStringInfoQuotedString(&buf, value); // fprintf(log, "append.... succeded\n"); // fflush(log); if(conversion_buf != NULL) { pfree(conversion_buf); conversion_buf = NULL; } } } appendStringInfoChar(&buf, '}'); pfree(values); pfree(nulls); ReleaseTupleDesc(tupdesc); // fclose(log); PG_RETURN_TEXT_P( PG_CSTR_GET_TEXT( buf.data ) ); }
void yorder_get_order(Datum eorder,Torder *orderp) { bool isnull; HeapTupleHeader tuple = ((HeapTupleHeader) PG_DETOAST_DATUM(eorder)); Oid tupType; int32 tupTypmod; TupleDesc tupDesc; HeapTupleData tmptup; BOX *p; tupType = HeapTupleHeaderGetTypeId(tuple); tupTypmod = HeapTupleHeaderGetTypMod(tuple); tupDesc = lookup_rowtype_tupdesc(tupType, tupTypmod); tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = tuple; orderp->type = DatumGetInt32(heap_getattr(&tmptup,1,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field type is null in yorder_get_order"))); if(!ORDER_TYPE_IS_VALID(orderp->type)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("order type incorrect in yorder_get_order"))); orderp->id = DatumGetInt32(heap_getattr(&tmptup,2,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field id is null in yorder_get_order"))); orderp->own = DatumGetInt32(heap_getattr(&tmptup,3,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field own is null in yorder_get_order"))); orderp->oid = DatumGetInt32(heap_getattr(&tmptup,4,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field oid is null in yorder_get_order"))); orderp->qtt_requ = DatumGetInt64(heap_getattr(&tmptup,5,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field qtt_requ is null in yorder_get_order"))); //orderp->qua_requ = (HStore *) PG_DETOAST_DATUM(heap_getattr(&tmptup,6,tupDesc,&isnull)); orderp->qua_requ = (Datum) PG_DETOAST_DATUM(heap_getattr(&tmptup,6,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field qua_requ is null in yorder_get_order"))); orderp->qtt_prov = DatumGetInt64(heap_getattr(&tmptup,7,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field qtt_prov is null in yorder_get_order"))); //orderp->qua_prov = (HStore *) PG_DETOAST_DATUM(heap_getattr(&tmptup,8,tupDesc,&isnull)); orderp->qua_prov = (Datum) PG_DETOAST_DATUM(heap_getattr(&tmptup,8,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field qua_prov is null in yorder_get_order"))); orderp->qtt = DatumGetInt64(heap_getattr(&tmptup,9,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field qtt is null in yorder_get_order"))); // pos_requ box, p = DatumGetBoxP(heap_getattr(&tmptup,10,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field pos_requ is null in yorder_get_order"))); GL_CHECK_BOX_S0(p); orderp->pos_requ.x = p->low.x; orderp->pos_requ.y = p->low.y; // pos_prov box, p = DatumGetBoxP(heap_getattr(&tmptup,11,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field pos_prov is null in yorder_get_order"))); GL_CHECK_BOX_S0(p); orderp->pos_prov.x = p->low.x; orderp->pos_prov.y = p->low.y; // dist flat orderp->dist = DatumGetFloat8(heap_getattr(&tmptup,12,tupDesc,&isnull)); if(isnull) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("the field dist is null in yorder_get_order"))); ReleaseTupleDesc(tupDesc); return; }
/* ---------------------------------------------------------------- * ExecInitExternalScan * ---------------------------------------------------------------- */ ExternalScanState * ExecInitExternalScan(ExternalScan *node, EState *estate, int eflags) { ResultRelSegFileInfo *segfileinfo = NULL; ExternalScanState *externalstate; Relation currentRelation; FileScanDesc currentScanDesc; Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); /* * create state structure */ externalstate = makeNode(ExternalScanState); externalstate->ss.ps.plan = (Plan *) node; externalstate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &externalstate->ss.ps); /* * initialize child expressions */ externalstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) externalstate); externalstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) externalstate); /* Check if targetlist or qual contains a var node referencing the ctid column */ externalstate->cdb_want_ctid = contain_ctid_var_reference(&node->scan); ItemPointerSetInvalid(&externalstate->cdb_fake_ctid); #define EXTSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &externalstate->ss.ps); ExecInitScanTupleSlot(estate, &externalstate->ss); /* * get the relation object id from the relid'th entry in the range table * and open that relation. */ currentRelation = ExecOpenScanExternalRelation(estate, node->scan.scanrelid); if (Gp_role == GP_ROLE_EXECUTE && node->err_aosegfileinfos) { segfileinfo = (ResultRelSegFileInfo *)list_nth(node->err_aosegfileinfos, GetQEIndex()); } else { segfileinfo = NULL; } currentScanDesc = external_beginscan(currentRelation, node->scan.scanrelid, node->scancounter, node->uriList, node->fmtOpts, node->fmtType, node->isMasterOnly, node->rejLimit, node->rejLimitInRows, node->fmterrtbl, segfileinfo, node->encoding, node->scan.plan.qual); externalstate->ss.ss_currentRelation = currentRelation; externalstate->ess_ScanDesc = currentScanDesc; ExecAssignScanType(&externalstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&externalstate->ss.ps); ExecAssignScanProjectionInfo(&externalstate->ss); /* * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, * then this node is not eager free safe. */ externalstate->ss.ps.delayEagerFree = ((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0); initGpmonPktForExternalScan((Plan *)node, &externalstate->ss.ps.gpmon_pkt, estate); return externalstate; }
static void populate_recordset_object_end(void *state) { PopulateRecordsetState _state = (PopulateRecordsetState) state; HTAB *json_hash = _state->json_hash; Datum *values; bool *nulls; char fname[NAMEDATALEN]; int i; RecordIOData *my_extra = _state->my_extra; int ncolumns = my_extra->ncolumns; TupleDesc tupdesc = _state->ret_tdesc; JsonHashEntry hashentry; HeapTupleHeader rec = _state->rec; HeapTuple rettuple; if (_state->lex->lex_level > 1) return; values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); if (_state->rec) { HeapTupleData tuple; /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(_state->rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = _state->rec; /* Break down the tuple into fields */ heap_deform_tuple(&tuple, tupdesc, values, nulls); } else { for (i = 0; i < ncolumns; ++i) { values[i] = (Datum) 0; nulls[i] = true; } } for (i = 0; i < ncolumns; ++i) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; char *value; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) { nulls[i] = true; continue; } memset(fname, 0, NAMEDATALEN); strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN); hashentry = hash_search(json_hash, fname, HASH_FIND, NULL); /* * we can't just skip here if the key wasn't found since we might have * a domain to deal with. If we were passed in a non-null record * datum, we assume that the existing values are valid (if they're * not, then it's not our fault), but if we were passed in a null, * then every field which we don't populate needs to be run through * the input function just in case it's a domain type. */ if (hashentry == NULL && rec) continue; /* * Prepare to convert the column value from text */ if (column_info->column_type != column_type) { getTypeInputInfo(column_type, &column_info->typiofunc, &column_info->typioparam); fmgr_info_cxt(column_info->typiofunc, &column_info->proc, _state->fn_mcxt); column_info->column_type = column_type; } if (hashentry == NULL || hashentry->isnull) { /* * need InputFunctionCall to happen even for nulls, so that domain * checks are done */ values[i] = InputFunctionCall(&column_info->proc, NULL, column_info->typioparam, tupdesc->attrs[i]->atttypmod); nulls[i] = true; } else { value = hashentry->val; values[i] = InputFunctionCall(&column_info->proc, value, column_info->typioparam, tupdesc->attrs[i]->atttypmod); nulls[i] = false; } } rettuple = heap_form_tuple(tupdesc, values, nulls); tuplestore_puttuple(_state->tuple_store, rettuple); hash_destroy(json_hash); }
static void btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record); Buffer buffer; Page page; BTPageOpaque pageop; IndexTupleData trunctuple; /* * In normal operation, we would lock all the pages this WAL record * touches before changing any of them. In WAL replay, it should be okay * to lock just one page at a time, since no concurrent index updates can * be happening, and readers should not care whether they arrive at the * target page or not (since it's surely empty). */ /* parent page */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { OffsetNumber poffset; ItemId itemid; IndexTuple itup; OffsetNumber nextoffset; BlockNumber rightsib; page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); poffset = xlrec->poffset; nextoffset = OffsetNumberNext(poffset); itemid = PageGetItemId(page, nextoffset); itup = (IndexTuple) PageGetItem(page, itemid); rightsib = ItemPointerGetBlockNumber(&itup->t_tid); itemid = PageGetItemId(page, poffset); itup = (IndexTuple) PageGetItem(page, itemid); ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY); nextoffset = OffsetNumberNext(poffset); PageIndexTupleDelete(page, nextoffset); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Rewrite the leaf page as a halfdead page */ buffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = xlrec->leftblk; pageop->btpo_next = xlrec->rightblk; pageop->btpo.level = 0; pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF; pageop->btpo_cycleid = 0; /* * Construct a dummy hikey item that points to the next parent to be * deleted (if any). */ MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
Datum formatter_export(PG_FUNCTION_ARGS) { HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); TupleDesc tupdesc; HeapTupleData tuple; int ncolumns = 0; format_t *myData; char *data; int datlen; int i; /* Must be called via the external table format manager */ if (!CALLED_AS_FORMATTER(fcinfo)) elog(ERROR, "formatter_export: not called by format manager"); tupdesc = FORMATTER_GET_TUPDESC(fcinfo); /* Get our internal description of the formatter */ ncolumns = tupdesc->natts; myData = (format_t *) FORMATTER_GET_USER_CTX(fcinfo); if (myData == NULL) { myData = palloc(sizeof(format_t)); myData->ncols = ncolumns; myData->values = palloc(sizeof(Datum) * ncolumns); myData->nulls = palloc(sizeof(bool) * ncolumns); /* Determine required buffer size */ myData->buflen = 0; for (i = 0; i < ncolumns; i++) { Oid type = tupdesc->attrs[i]->atttypid; int32 typmod = tupdesc->attrs[i]->atttypmod; /* Don't know how to format dropped columns, error for now */ if (tupdesc->attrs[i]->attisdropped) elog(ERROR, "formatter_export: dropped columns"); switch (type) { case FLOAT8OID: { myData->buflen += sizeof(double); break; } case VARCHAROID: case BPCHAROID: case TEXTOID: { myData->buflen += (typmod > 0) ? typmod : MAX_FORMAT_STRING; break; } default: { elog(ERROR, "formatter_export error: unsupported data type"); break; } } } myData->buflen = Max(128, myData->buflen); /* allocate at least 128 bytes */ myData->buffer = palloc(myData->buflen + VARHDRSZ); FORMATTER_SET_USER_CTX(fcinfo, myData); } if (myData->ncols != ncolumns) elog(ERROR, "formatter_export: unexpected change of output record type"); /* break the input tuple into fields */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_data = rec; heap_deform_tuple(&tuple, tupdesc, myData->values, myData->nulls); datlen = 0; data = VARDATA(myData->buffer); /* ======================================================================= * MAIN FORMATTING CODE * * Currently this code assumes: * - Homogoneos hardware => No need to convert data to network byte order * - Support for TEXT/VARCHAR/BPCHAR/FLOAT8 only * - Length Prefixed strings * - No end of record tags, checksums, or optimizations for alignment. * - NULL values are cast to some sensible default value (NaN, "") * * ======================================================================= */ for (i = 0; i < ncolumns; i++) { Oid type = tupdesc->attrs[i]->atttypid; int typmod = tupdesc->attrs[i]->atttypmod; Datum val = myData->values[i]; bool nul = myData->nulls[i]; switch (type) { case FLOAT8OID: { float8 value; if (datlen + sizeof(value) >= myData->buflen) elog(ERROR, "formatter_export: buffer too small"); if (nul) value = NULL_FLOAT8_VALUE; else value = DatumGetFloat8(val); memcpy(&data[datlen], &value, sizeof(value)); datlen += sizeof(value); break; } case TEXTOID: case VARCHAROID: case BPCHAROID: { text *str; int32 len; if (nul) { str = NULL; len = 0; } else { str = DatumGetTextP(val); len = VARSIZE(str) - VARHDRSZ; if (typmod < 0) len = Min(len, MAX_FORMAT_STRING); } if (datlen + sizeof(len) + len >= myData->buflen) elog(ERROR, "formatter_export: buffer too small"); memcpy(&data[datlen], &len, sizeof(len)); datlen += sizeof(len); if (len > 0) { memcpy(&data[datlen], VARDATA(str), len); datlen += len; } break; } default: elog(ERROR, "formatter_export: unsupported datatype"); break; } } /* ======================================================================= */ SET_VARSIZE(myData->buffer, datlen + VARHDRSZ); PG_RETURN_BYTEA_P(myData->buffer); }
static void btree_xlog_unlink_page(uint8 info, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record); BlockNumber leftsib; BlockNumber rightsib; Buffer buffer; Page page; BTPageOpaque pageop; leftsib = xlrec->leftsib; rightsib = xlrec->rightsib; /* * In normal operation, we would lock all the pages this WAL record * touches before changing any of them. In WAL replay, it should be okay * to lock just one page at a time, since no concurrent index updates can * be happening, and readers should not care whether they arrive at the * target page or not (since it's surely empty). */ /* Fix left-link of right sibling */ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = leftsib; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Fix right-link of left sibling, if any */ if (leftsib != P_NONE) { if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_next = rightsib; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } /* Rewrite target page as empty deleted page */ buffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = leftsib; pageop->btpo_next = rightsib; pageop->btpo.xact = xlrec->btpo_xact; pageop->btpo_flags = BTP_DELETED; pageop->btpo_cycleid = 0; PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); /* * If we deleted a parent of the targeted leaf page, instead of the leaf * itself, update the leaf to point to the next remaining child in the * branch. */ if (XLogRecHasBlockRef(record, 3)) { /* * There is no real data on the page, so we just re-create it from * scratch using the information from the WAL record. */ IndexTupleData trunctuple; buffer = XLogInitBufferForRedo(record, 3); page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF; pageop->btpo_prev = xlrec->leafleftsib; pageop->btpo_next = xlrec->leafrightsib; pageop->btpo.level = 0; pageop->btpo_cycleid = 0; /* Add a dummy hikey item */ MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); } /* Update metapage if needed */ if (info == XLOG_BTREE_UNLINK_PAGE_META) _bt_restore_meta(record, 4); }
/* * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs. * * Currently MULTI_INSERT will always contain the full tuples. */ static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) { XLogReaderState *r = buf->record; xl_heap_multi_insert *xlrec; int i; char *data; char *tupledata; Size tuplelen; RelFileNode rnode; xlrec = (xl_heap_multi_insert *) XLogRecGetData(r); /* only interested in our database */ XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL); if (rnode.dbNode != ctx->slot->data.database) return; /* output plugin doesn't look for this origin, no need to queue */ if (FilterByOrigin(ctx, XLogRecGetOrigin(r))) return; tupledata = XLogRecGetBlockData(r, 0, &tuplelen); data = tupledata; for (i = 0; i < xlrec->ntuples; i++) { ReorderBufferChange *change; xl_multi_insert_tuple *xlhdr; int datalen; ReorderBufferTupleBuf *tuple; change = ReorderBufferGetChange(ctx->reorder); change->action = REORDER_BUFFER_CHANGE_INSERT; change->origin_id = XLogRecGetOrigin(r); memcpy(&change->data.tp.relnode, &rnode, sizeof(RelFileNode)); /* * CONTAINS_NEW_TUPLE will always be set currently as multi_insert * isn't used for catalogs, but better be future proof. * * We decode the tuple in pretty much the same way as DecodeXLogTuple, * but since the layout is slightly different, we can't use it here. */ if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE) { HeapTupleHeader header; xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data); data = ((char *) xlhdr) + SizeOfMultiInsertTuple; datalen = xlhdr->datalen; change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder, datalen); tuple = change->data.tp.newtuple; header = tuple->tuple.t_data; /* not a disk based tuple */ ItemPointerSetInvalid(&tuple->tuple.t_self); /* * We can only figure this out after reassembling the * transactions. */ tuple->tuple.t_tableOid = InvalidOid; tuple->tuple.t_len = datalen + SizeofHeapTupleHeader; memset(header, 0, SizeofHeapTupleHeader); memcpy((char *) tuple->tuple.t_data + SizeofHeapTupleHeader, (char *) data, datalen); data += datalen; header->t_infomask = xlhdr->t_infomask; header->t_infomask2 = xlhdr->t_infomask2; header->t_hoff = xlhdr->t_hoff; } /* * Reset toast reassembly state only after the last row in the last * xl_multi_insert_tuple record emitted by one heap_multi_insert() * call. */ if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI && (i + 1) == xlrec->ntuples) change->data.tp.clear_toast_afterwards = true; else change->data.tp.clear_toast_afterwards = false; ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change); } Assert(data == tupledata + tuplelen); }
Datum make_tuple_indirect(PG_FUNCTION_ARGS) { HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); HeapTupleData tuple; int ncolumns; Datum *values; bool *nulls; Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTuple newtup; int i; MemoryContext old_context; /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); heap_deform_tuple(&tuple, tupdesc, values, nulls); old_context = MemoryContextSwitchTo(TopTransactionContext); for (i = 0; i < ncolumns; i++) { struct varlena *attr; struct varlena *new_attr; struct varatt_indirect redirect_pointer; /* only work on existing, not-null varlenas */ if (tupdesc->attrs[i]->attisdropped || nulls[i] || tupdesc->attrs[i]->attlen != -1) continue; attr = (struct varlena *) DatumGetPointer(values[i]); /* don't recursively indirect */ if (VARATT_IS_EXTERNAL_INDIRECT(attr)) continue; /* copy datum, so it still lives later */ if (VARATT_IS_EXTERNAL_ONDISK(attr)) attr = heap_tuple_fetch_attr(attr); else { struct varlena *oldattr = attr; attr = palloc0(VARSIZE_ANY(oldattr)); memcpy(attr, oldattr, VARSIZE_ANY(oldattr)); } /* build indirection Datum */ new_attr = (struct varlena *) palloc0(INDIRECT_POINTER_SIZE); redirect_pointer.pointer = attr; SET_VARTAG_EXTERNAL(new_attr, VARTAG_INDIRECT); memcpy(VARDATA_EXTERNAL(new_attr), &redirect_pointer, sizeof(redirect_pointer)); values[i] = PointerGetDatum(new_attr); } newtup = heap_form_tuple(tupdesc, values, nulls); pfree(values); pfree(nulls); ReleaseTupleDesc(tupdesc); MemoryContextSwitchTo(old_context); /* * We intentionally don't use PG_RETURN_HEAPTUPLEHEADER here, because that * would cause the indirect toast pointers to be flattened out of the * tuple immediately, rendering subsequent testing irrelevant. So just * return the HeapTupleHeader pointer as-is. This violates the general * rule that composite Datums shouldn't contain toast pointers, but so * long as the regression test scripts don't insert the result of this * function into a container type (record, array, etc) it should be OK. */ PG_RETURN_POINTER(newtup->t_data); }
GenericTuple CvtChunksToTup(TupleChunkList tcList, SerTupInfo *pSerInfo, TupleRemapper *remapper) { StringInfoData serData; TupleChunkListItem tcItem; int i; GenericTuple tup; TupleChunkType tcType; AssertArg(tcList != NULL); AssertArg(tcList->p_first != NULL); AssertArg(pSerInfo != NULL); tcItem = tcList->p_first; if (tcList->num_chunks == 1) { GetChunkType(tcItem, &tcType); if (tcType == TC_EMPTY) { /* * the sender is indicating that there was a row with no attributes: * return a NULL tuple */ clearTCList(NULL, tcList); return (GenericTuple) heap_form_tuple(pSerInfo->tupdesc, pSerInfo->values, pSerInfo->nulls); } } /* * Dump all of the data in the tuple chunk list into a single StringInfo, * so that we can convert it into a HeapTuple. Check chunk types based on * whether there is only one chunk, or multiple chunks. * * We know roughly how much space we'll need, allocate all in one go. * */ initStringInfoOfSize(&serData, tcList->num_chunks * tcList->max_chunk_length); i = 0; do { /* Make sure that the type of this tuple chunk is correct! */ GetChunkType(tcItem, &tcType); if (i == 0) { if (tcItem->p_next == NULL) { if (tcType != TC_WHOLE) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Single chunk's type must be TC_WHOLE."))); } } else /* tcItem->p_next != NULL */ { if (tcType != TC_PARTIAL_START) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("First chunk of collection must have type" " TC_PARTIAL_START."))); } } } else /* i > 0 */ { if (tcItem->p_next == NULL) { if (tcType != TC_PARTIAL_END) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Last chunk of collection must have type" " TC_PARTIAL_END."))); } } else /* tcItem->p_next != NULL */ { if (tcType != TC_PARTIAL_MID) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Last chunk of collection must have type" " TC_PARTIAL_MID."))); } } } /* Copy this chunk into the tuple data. Don't include the header! */ appendBinaryStringInfo(&serData, (const char *) GetChunkDataPtr(tcItem) + TUPLE_CHUNK_HEADER_SIZE, tcItem->chunk_length - TUPLE_CHUNK_HEADER_SIZE); /* Go to the next chunk. */ tcItem = tcItem->p_next; i++; } while (tcItem != NULL); /* we've finished with the TCList, free it now. */ clearTCList(NULL, tcList); { TupSerHeader *tshp; unsigned int datalen; unsigned int nullslen; unsigned int hoff; HeapTupleHeader t_data; char *pos = (char *)serData.data; tshp = (TupSerHeader *)pos; if (!(tshp->tuplen & MEMTUP_LEAD_BIT) && tshp->natts == RECORD_CACHE_MAGIC_NATTS && tshp->infomask == RECORD_CACHE_MAGIC_INFOMASK) { uint32 tuplen = tshp->tuplen & ~MEMTUP_LEAD_BIT; /* a special tuple with record type cache */ List * typelist = (List *) deserializeNode(pos + sizeof(TupSerHeader), tuplen - sizeof(TupSerHeader)); TRHandleTypeLists(remapper, typelist); /* Free up memory we used. */ pfree(serData.data); return NULL; } if ((tshp->tuplen & MEMTUP_LEAD_BIT) != 0) { uint32 tuplen = memtuple_size_from_uint32(tshp->tuplen); tup = (GenericTuple) palloc(tuplen); memcpy(tup, pos, tuplen); pos += TYPEALIGN(TUPLE_CHUNK_ALIGN,tuplen); } else { HeapTuple htup; pos += sizeof(TupSerHeader); /* * if the tuple had toasted elements we have to deserialize the * old slow way. */ if ((tshp->infomask & HEAP_HASEXTERNAL) != 0) { serData.cursor += sizeof(TupSerHeader); tup = (GenericTuple) DeserializeTuple(pSerInfo, &serData); /* Free up memory we used. */ pfree(serData.data); return tup; } /* reconstruct lengths of null bitmap and data part */ if (tshp->infomask & HEAP_HASNULL) nullslen = BITMAPLEN(tshp->natts); else nullslen = 0; if (tshp->tuplen < sizeof(TupSerHeader) + nullslen) ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Interconnect error: cannot convert chunks to a heap tuple."), errdetail("tuple len %d < nullslen %d + headersize (%d)", tshp->tuplen, nullslen, (int)sizeof(TupSerHeader)))); datalen = tshp->tuplen - sizeof(TupSerHeader) - TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen); /* determine overhead size of tuple (should match heap_form_tuple) */ hoff = offsetof(HeapTupleHeaderData, t_bits) + TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen); if (tshp->infomask & HEAP_HASOID) hoff += sizeof(Oid); hoff = MAXALIGN(hoff); /* Allocate the space in one chunk, like heap_form_tuple */ htup = (HeapTuple) palloc(HEAPTUPLESIZE + hoff + datalen); tup = (GenericTuple) htup; t_data = (HeapTupleHeader) ((char *)htup + HEAPTUPLESIZE); /* make sure unused header fields are zeroed */ MemSetAligned(t_data, 0, hoff); /* reconstruct the HeapTupleData fields */ htup->t_len = hoff + datalen; ItemPointerSetInvalid(&(htup->t_self)); htup->t_data = t_data; /* reconstruct the HeapTupleHeaderData fields */ ItemPointerSetInvalid(&(t_data->t_ctid)); HeapTupleHeaderSetNatts(t_data, tshp->natts); t_data->t_infomask = tshp->infomask & ~HEAP_XACT_MASK; t_data->t_infomask |= HEAP_XMIN_INVALID | HEAP_XMAX_INVALID; t_data->t_hoff = hoff; if (nullslen) { memcpy((void *)t_data->t_bits, pos, nullslen); pos += TYPEALIGN(TUPLE_CHUNK_ALIGN,nullslen); } /* does the tuple descriptor expect an OID ? Note: we don't * have to set the oid itself, just the flag! (see heap_formtuple()) */ if (pSerInfo->tupdesc->tdhasoid) /* else leave infomask = 0 */ { t_data->t_infomask |= HEAP_HASOID; } /* and now the data proper (it would be nice if we could just * point our caller into our existing buffer in-place, but * we'll leave that for another day) */ memcpy((char *)t_data + hoff, pos, datalen); } } /* Free up memory we used. */ pfree(serData.data); return tup; }
/* * _hash_step() -- step to the next valid item in a scan in the bucket. * * If no valid record exists in the requested direction, return * false. Else, return true and set the hashso_curpos for the * scan to the right thing. * * 'bufP' points to the current buffer, which is pinned and read-locked. * On success exit, we have pin and read-lock on whichever page * contains the right item; on failure, we have released all buffers. */ bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; ItemPointer current; Buffer buf; Page page; HashPageOpaque opaque; OffsetNumber maxoff; OffsetNumber offnum; BlockNumber blkno; IndexTuple itup; current = &(so->hashso_curpos); buf = *bufP; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); /* * If _hash_step is called from _hash_first, current will not be valid, so * we can't dereference it. However, in that case, we presumably want to * start at the beginning/end of the page... */ maxoff = PageGetMaxOffsetNumber(page); if (ItemPointerIsValid(current)) offnum = ItemPointerGetOffsetNumber(current); else offnum = InvalidOffsetNumber; /* * 'offnum' now points to the last tuple we have seen (if any). * * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. */ do { switch (dir) { case ForwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberNext(offnum); /* move forward */ else offnum = FirstOffsetNumber; /* new page */ while (offnum > maxoff) { /* * either this page is empty (maxoff == * InvalidOffsetNumber) or we ran off the end. */ _hash_readnext(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { maxoff = PageGetMaxOffsetNumber(page); offnum = FirstOffsetNumber; } else { /* end of bucket */ maxoff = offnum = InvalidOffsetNumber; break; /* exit while */ } } break; case BackwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberPrev(offnum); /* move back */ else offnum = maxoff; /* new page */ while (offnum < FirstOffsetNumber) { /* * either this page is empty (offnum == * InvalidOffsetNumber) or we ran off the end. */ _hash_readprev(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) maxoff = offnum = PageGetMaxOffsetNumber(page); else { /* end of bucket */ maxoff = offnum = InvalidOffsetNumber; break; /* exit while */ } } break; default: /* NoMovementScanDirection */ /* this should not be reached */ break; } /* we ran off the end of the world without finding a match */ if (offnum == InvalidOffsetNumber) { *bufP = so->hashso_curbuf = InvalidBuffer; ItemPointerSetInvalid(current); return false; } /* get ready to check this tuple */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); } while (!_hash_checkqual(scan, itup)); /* if we made it to here, we've found a valid tuple */ blkno = BufferGetBlockNumber(buf); *bufP = so->hashso_curbuf = buf; ItemPointerSet(current, blkno, offnum); return true; }
/* * GetFileSegInfo * * Get the catalog entry for an appendonly (row-oriented) relation from the * pg_aoseg_* relation that belongs to the currently used * AppendOnly table. * * If a caller intends to append to this file segment entry they must * already hold a relation Append-Only segment file (transaction-scope) lock (tag * LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE) in order to guarantee * stability of the pg_aoseg information on this segment file and exclusive right * to append data to the segment file. */ ParquetFileSegInfo * GetParquetFileSegInfo(Relation parentrel, AppendOnlyEntry *aoEntry, Snapshot parquetMetaDataSnapshot, int segno) { Relation pg_parquetseg_rel; TupleDesc pg_parquetseg_dsc; HeapTuple tuple; ScanKeyData key[1]; SysScanDesc parquetscan; Datum eof, eof_uncompressed, tupcount; bool isNull; bool indexOK; Oid indexid; ParquetFileSegInfo *fsinfo; /* * Check the pg_paqseg relation to be certain the parquet table segment file * is there. */ pg_parquetseg_rel = heap_open(aoEntry->segrelid, AccessShareLock); pg_parquetseg_dsc = RelationGetDescr(pg_parquetseg_rel); if (Gp_role == GP_ROLE_EXECUTE) { indexOK = FALSE; indexid = InvalidOid; } else { indexOK = TRUE; indexid = aoEntry->segidxid; } /* * Setup a scan key to fetch from the index by segno. */ ScanKeyInit(&key[0], (AttrNumber) Anum_pg_parquetseg_segno, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(segno)); parquetscan = systable_beginscan(pg_parquetseg_rel, indexid, indexOK, SnapshotNow, 1, &key[0]); tuple = systable_getnext(parquetscan); if (!HeapTupleIsValid(tuple)) { /* This segment file does not have an entry. */ systable_endscan(parquetscan); heap_close(pg_parquetseg_rel, AccessShareLock); return NULL ; } tuple = heap_copytuple(tuple); systable_endscan(parquetscan); Assert(HeapTupleIsValid(tuple)); fsinfo = (ParquetFileSegInfo *) palloc0(sizeof(ParquetFileSegInfo)); /* get the eof */ eof = fastgetattr(tuple, Anum_pg_parquetseg_eof, pg_parquetseg_dsc, &isNull); if (isNull) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("got invalid eof value: NULL"))); /* get the tupcount */ tupcount = fastgetattr(tuple, Anum_pg_parquetseg_tupcount, pg_parquetseg_dsc, &isNull); if (isNull) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("got invalid tupcount value: NULL"))); /* get the uncompressed eof */ eof_uncompressed = fastgetattr(tuple, Anum_pg_parquetseg_eofuncompressed, pg_parquetseg_dsc, &isNull); /* * Confusing: This eof_uncompressed variable is never used. It appears we only * call fastgetattr to get the isNull value. this variable "eof_uncompressed" is * not at all the same as fsinfo->eof_uncompressed. */ if (isNull) { /* * NULL is allowed. Tables that were created before the release of the * eof_uncompressed catalog column will have a NULL instead of a value. */ fsinfo->eof_uncompressed = InvalidUncompressedEof; } else { fsinfo->eof_uncompressed = (int64) DatumGetFloat8(eof_uncompressed); } fsinfo->segno = segno; fsinfo->eof = (int64) DatumGetFloat8(eof); fsinfo->tupcount = (int64) DatumGetFloat8(tupcount); ItemPointerSetInvalid(&fsinfo->sequence_tid); if (fsinfo->eof < 0) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("invalid eof " INT64_FORMAT " for relation %s", fsinfo->eof, RelationGetRelationName(parentrel)))); /* Finish up scan and close appendonly catalog. */ heap_close(pg_parquetseg_rel, AccessShareLock); return fsinfo; }
Datum hstore_populate_record(PG_FUNCTION_ARGS) { Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); HStore *hs; HEntry *entries; char *ptr; HeapTupleHeader rec; Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTupleData tuple; HeapTuple rettuple; RecordIOData *my_extra; int ncolumns; int i; Datum *values; bool *nulls; if (!type_is_rowtype(argtype)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("first argument must be a rowtype"))); if (PG_ARGISNULL(0)) { if (PG_ARGISNULL(1)) PG_RETURN_NULL(); rec = NULL; /* * have no tuple to look at, so the only source of type info is the * argtype. The lookup_rowtype_tupdesc call below will error out if we * don't have a known composite type oid here. */ tupType = argtype; tupTypmod = -1; } else { rec = PG_GETARG_HEAPTUPLEHEADER(0); if (PG_ARGISNULL(1)) PG_RETURN_POINTER(rec); /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); } hs = PG_GETARG_HS(1); entries = ARRPTR(hs); ptr = STRPTR(hs); /* * if the input hstore is empty, we can only skip the rest if we were * passed in a non-null record, since otherwise there may be issues with * domain nulls. */ if (HS_COUNT(hs) == 0 && rec) PG_RETURN_POINTER(rec); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; if (rec) { /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; #ifdef PGXC tuple.t_xc_node_id = 0; #endif tuple.t_data = rec; } /* * We arrange to look up the needed I/O info just once per series of * calls, assuming the record type doesn't change underneath us. */ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns != ncolumns) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; my_extra->record_type = InvalidOid; my_extra->record_typmod = 0; } if (my_extra->record_type != tupType || my_extra->record_typmod != tupTypmod) { MemSet(my_extra, 0, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra->record_type = tupType; my_extra->record_typmod = tupTypmod; my_extra->ncolumns = ncolumns; } values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); if (rec) { /* Break down the tuple into fields */ heap_deform_tuple(&tuple, tupdesc, values, nulls); } else { for (i = 0; i < ncolumns; ++i) { values[i] = (Datum) 0; nulls[i] = true; } } for (i = 0; i < ncolumns; ++i) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; char *value; int idx; int vallen; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) { nulls[i] = true; continue; } idx = hstoreFindKey(hs, 0, NameStr(tupdesc->attrs[i]->attname), strlen(NameStr(tupdesc->attrs[i]->attname))); /* * we can't just skip here if the key wasn't found since we might have * a domain to deal with. If we were passed in a non-null record * datum, we assume that the existing values are valid (if they're * not, then it's not our fault), but if we were passed in a null, * then every field which we don't populate needs to be run through * the input function just in case it's a domain type. */ if (idx < 0 && rec) continue; /* * Prepare to convert the column value from text */ if (column_info->column_type != column_type) { getTypeInputInfo(column_type, &column_info->typiofunc, &column_info->typioparam); fmgr_info_cxt(column_info->typiofunc, &column_info->proc, fcinfo->flinfo->fn_mcxt); column_info->column_type = column_type; } if (idx < 0 || HS_VALISNULL(entries, idx)) { /* * need InputFunctionCall to happen even for nulls, so that domain * checks are done */ values[i] = InputFunctionCall(&column_info->proc, NULL, column_info->typioparam, tupdesc->attrs[i]->atttypmod); nulls[i] = true; } else { vallen = HS_VALLEN(entries, idx); value = palloc(1 + vallen); memcpy(value, HS_VAL(entries, ptr, idx), vallen); value[vallen] = 0; values[i] = InputFunctionCall(&column_info->proc, value, column_info->typioparam, tupdesc->attrs[i]->atttypmod); nulls[i] = false; } } rettuple = heap_form_tuple(tupdesc, values, nulls); ReleaseTupleDesc(tupdesc); PG_RETURN_DATUM(HeapTupleGetDatum(rettuple)); }
/* * Clean up redirect and placeholder tuples on the given page * * Redirect tuples can be marked placeholder once they're old enough. * Placeholder tuples can be removed if it won't change the offsets of * non-placeholder ones. * * Unlike the routines above, this works on both leaf and inner pages. */ static void vacuumRedirectAndPlaceholder(Relation index, Buffer buffer) { Page page = BufferGetPage(buffer); SpGistPageOpaque opaque = SpGistPageGetOpaque(page); OffsetNumber i, max = PageGetMaxOffsetNumber(page), firstPlaceholder = InvalidOffsetNumber; bool hasNonPlaceholder = false; bool hasUpdate = false; OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage]; OffsetNumber itemnos[MaxIndexTuplesPerPage]; spgxlogVacuumRedirect xlrec; XLogRecData rdata[3]; xlrec.node = index->rd_node; xlrec.blkno = BufferGetBlockNumber(buffer); xlrec.nToPlaceholder = 0; xlrec.newestRedirectXid = InvalidTransactionId; START_CRIT_SECTION(); /* * Scan backwards to convert old redirection tuples to placeholder tuples, * and identify location of last non-placeholder tuple while at it. */ for (i = max; i >= FirstOffsetNumber && (opaque->nRedirection > 0 || !hasNonPlaceholder); i--) { SpGistDeadTuple dt; dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i)); if (dt->tupstate == SPGIST_REDIRECT && TransactionIdPrecedes(dt->xid, RecentGlobalXmin)) { dt->tupstate = SPGIST_PLACEHOLDER; Assert(opaque->nRedirection > 0); opaque->nRedirection--; opaque->nPlaceholder++; /* remember newest XID among the removed redirects */ if (!TransactionIdIsValid(xlrec.newestRedirectXid) || TransactionIdPrecedes(xlrec.newestRedirectXid, dt->xid)) xlrec.newestRedirectXid = dt->xid; ItemPointerSetInvalid(&dt->pointer); itemToPlaceholder[xlrec.nToPlaceholder] = i; xlrec.nToPlaceholder++; hasUpdate = true; } if (dt->tupstate == SPGIST_PLACEHOLDER) { if (!hasNonPlaceholder) firstPlaceholder = i; } else { hasNonPlaceholder = true; } } /* * Any placeholder tuples at the end of page can safely be removed. We * can't remove ones before the last non-placeholder, though, because we * can't alter the offset numbers of non-placeholder tuples. */ if (firstPlaceholder != InvalidOffsetNumber) { /* * We do not store this array to rdata because it's easy to recreate. */ for (i = firstPlaceholder; i <= max; i++) itemnos[i - firstPlaceholder] = i; i = max - firstPlaceholder + 1; Assert(opaque->nPlaceholder >= i); opaque->nPlaceholder -= i; /* The array is surely sorted, so can use PageIndexMultiDelete */ PageIndexMultiDelete(page, itemnos, i); hasUpdate = true; } xlrec.firstPlaceholder = firstPlaceholder; if (hasUpdate) MarkBufferDirty(buffer); if (hasUpdate && RelationNeedsWAL(index)) { XLogRecPtr recptr; ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRedirect, 0); ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1); ACCEPT_RDATA_BUFFER(buffer, 2); recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata); PageSetLSN(page, recptr); } END_CRIT_SECTION(); }
/* * Add a tuple to the new heap. * * Visibility information is copied from the original tuple, except that * we "freeze" very-old tuples. Note that since we scribble on new_tuple, * it had better be temp storage not a pointer to the original tuple. * * state opaque state as returned by begin_heap_rewrite * old_tuple original tuple in the old heap * new_tuple new, rewritten tuple to be inserted to new heap */ void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple) { MemoryContext old_cxt; ItemPointerData old_tid; TidHashKey hashkey; bool found; bool free_new; old_cxt = MemoryContextSwitchTo(state->rs_cxt); /* * Copy the original tuple's visibility information into new_tuple. * * XXX we might later need to copy some t_infomask2 bits, too? Right now, * we intentionally clear the HOT status bits. */ memcpy(&new_tuple->t_data->t_choice.t_heap, &old_tuple->t_data->t_choice.t_heap, sizeof(HeapTupleFields)); new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK; new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK; new_tuple->t_data->t_infomask |= old_tuple->t_data->t_infomask & HEAP_XACT_MASK; /* * While we have our hands on the tuple, we may as well freeze any * very-old xmin or xmax, so that future VACUUM effort can be saved. * * Note we abuse heap_freeze_tuple() a bit here, since it's expecting to * be given a pointer to a tuple in a disk buffer. It happens though that * we can get the right things to happen by passing InvalidBuffer for the * buffer. */ heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid, InvalidBuffer); /* * Invalid ctid means that ctid should point to the tuple itself. We'll * override it later if the tuple is part of an update chain. */ ItemPointerSetInvalid(&new_tuple->t_data->t_ctid); /* * If the tuple has been updated, check the old-to-new mapping hash table. */ if (!(old_tuple->t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) && !(ItemPointerEquals(&(old_tuple->t_self), &(old_tuple->t_data->t_ctid)))) { OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); hashkey.xmin = HeapTupleHeaderGetXmax(old_tuple->t_data); hashkey.tid = old_tuple->t_data->t_ctid; mapping = (OldToNewMapping) hash_search(state->rs_old_new_tid_map, &hashkey, HASH_FIND, NULL); if (mapping != NULL) { /* * We've already copied the tuple that t_ctid points to, so we can * set the ctid of this tuple to point to the new location, and * insert it right away. */ new_tuple->t_data->t_ctid = mapping->new_tid; /* We don't need the mapping entry anymore */ hash_search(state->rs_old_new_tid_map, &hashkey, HASH_REMOVE, &found); Assert(found); } else { /* * We haven't seen the tuple t_ctid points to yet. Stash this * tuple into unresolved_tups to be written later. */ UnresolvedTup unresolved; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, HASH_ENTER, &found); Assert(!found); unresolved->old_tid = old_tuple->t_self; unresolved->tuple = heap_copytuple(new_tuple); /* * We can't do anything more now, since we don't know where the * tuple will be written. */ MemoryContextSwitchTo(old_cxt); return; } } /* * Now we will write the tuple, and then check to see if it is the B tuple * in any new or known pair. When we resolve a known pair, we will be * able to write that pair's A tuple, and then we have to check if it * resolves some other pair. Hence, we need a loop here. */ old_tid = old_tuple->t_self; free_new = false; for (;;) { ItemPointerData new_tid; /* Insert the tuple and find out where it's put in new_heap */ raw_heap_insert(state, new_tuple); new_tid = new_tuple->t_self; /* * If the tuple is the updated version of a row, and the prior version * wouldn't be DEAD yet, then we need to either resolve the prior * version (if it's waiting in rs_unresolved_tups), or make an entry * in rs_old_new_tid_map (so we can resolve it when we do see it). The * previous tuple's xmax would equal this one's xmin, so it's * RECENTLY_DEAD if and only if the xmin is not before OldestXmin. */ if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) && !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data), state->rs_oldest_xmin)) { /* * Okay, this is B in an update pair. See if we've seen A. */ UnresolvedTup unresolved; memset(&hashkey, 0, sizeof(hashkey)); hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); hashkey.tid = old_tid; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, HASH_FIND, NULL); if (unresolved != NULL) { /* * We have seen and memorized the previous tuple already. Now * that we know where we inserted the tuple its t_ctid points * to, fix its t_ctid and insert it to the new heap. */ if (free_new) heap_freetuple(new_tuple); new_tuple = unresolved->tuple; free_new = true; old_tid = unresolved->old_tid; new_tuple->t_data->t_ctid = new_tid; /* * We don't need the hash entry anymore, but don't free its * tuple just yet. */ hash_search(state->rs_unresolved_tups, &hashkey, HASH_REMOVE, &found); Assert(found); /* loop back to insert the previous tuple in the chain */ continue; } else { /* * Remember the new tid of this tuple. We'll use it to set the * ctid when we find the previous tuple in the chain. */ OldToNewMapping mapping; mapping = hash_search(state->rs_old_new_tid_map, &hashkey, HASH_ENTER, &found); Assert(!found); mapping->new_tid = new_tid; } } /* Done with this (chain of) tuples, for now */ if (free_new) heap_freetuple(new_tuple); break; } MemoryContextSwitchTo(old_cxt); }
/* * record_image_eq : * compares two records for identical contents, based on byte images * result : * returns true if the records are identical, false otherwise. * * Note: we do not use record_image_cmp here, since we can avoid * de-toasting for unequal lengths this way. */ Datum record_image_eq(PG_FUNCTION_ARGS) { HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0); HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1); bool result = true; Oid tupType1; Oid tupType2; int32 tupTypmod1; int32 tupTypmod2; TupleDesc tupdesc1; TupleDesc tupdesc2; HeapTupleData tuple1; HeapTupleData tuple2; int ncolumns1; int ncolumns2; RecordCompareData *my_extra; int ncols; Datum *values1; Datum *values2; bool *nulls1; bool *nulls2; int i1; int i2; int j; /* Extract type info from the tuples */ tupType1 = HeapTupleHeaderGetTypeId(record1); tupTypmod1 = HeapTupleHeaderGetTypMod(record1); tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1); ncolumns1 = tupdesc1->natts; tupType2 = HeapTupleHeaderGetTypeId(record2); tupTypmod2 = HeapTupleHeaderGetTypMod(record2); tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2); ncolumns2 = tupdesc2->natts; /* Build temporary HeapTuple control structures */ tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; /* * We arrange to look up the needed comparison info just once per series * of calls, assuming the record types don't change underneath us. */ ncols = Max(ncolumns1, ncolumns2); my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns < ncols) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, offsetof(RecordCompareData, columns) + ncols * sizeof(ColumnCompareData)); my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra; my_extra->ncolumns = ncols; my_extra->record1_type = InvalidOid; my_extra->record1_typmod = 0; my_extra->record2_type = InvalidOid; my_extra->record2_typmod = 0; } if (my_extra->record1_type != tupType1 || my_extra->record1_typmod != tupTypmod1 || my_extra->record2_type != tupType2 || my_extra->record2_typmod != tupTypmod2) { MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData)); my_extra->record1_type = tupType1; my_extra->record1_typmod = tupTypmod1; my_extra->record2_type = tupType2; my_extra->record2_typmod = tupTypmod2; } /* Break down the tuples into fields */ values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum)); nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool)); heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1); values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum)); nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool)); heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2); /* * Scan corresponding columns, allowing for dropped columns in different * places in the two rows. i1 and i2 are physical column indexes, j is * the logical column index. */ i1 = i2 = j = 0; while (i1 < ncolumns1 || i2 < ncolumns2) { /* * Skip dropped columns */ if (i1 < ncolumns1 && tupdesc1->attrs[i1]->attisdropped) { i1++; continue; } if (i2 < ncolumns2 && tupdesc2->attrs[i2]->attisdropped) { i2++; continue; } if (i1 >= ncolumns1 || i2 >= ncolumns2) break; /* we'll deal with mismatch below loop */ /* * Have two matching columns, they must be same type */ if (tupdesc1->attrs[i1]->atttypid != tupdesc2->attrs[i2]->atttypid) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot compare dissimilar column types %s and %s at record column %d", format_type_be(tupdesc1->attrs[i1]->atttypid), format_type_be(tupdesc2->attrs[i2]->atttypid), j + 1))); /* * We consider two NULLs equal; NULL > not-NULL. */ if (!nulls1[i1] || !nulls2[i2]) { if (nulls1[i1] || nulls2[i2]) { result = false; break; } /* Compare the pair of elements */ if (tupdesc1->attrs[i1]->attlen == -1) { Size len1, len2; len1 = toast_raw_datum_size(values1[i1]); len2 = toast_raw_datum_size(values2[i2]); /* No need to de-toast if lengths don't match. */ if (len1 != len2) result = false; else { struct varlena *arg1val; struct varlena *arg2val; arg1val = PG_DETOAST_DATUM_PACKED(values1[i1]); arg2val = PG_DETOAST_DATUM_PACKED(values2[i2]); result = (memcmp(VARDATA_ANY(arg1val), VARDATA_ANY(arg2val), len1 - VARHDRSZ) == 0); /* Only free memory if it's a copy made here. */ if ((Pointer) arg1val != (Pointer) values1[i1]) pfree(arg1val); if ((Pointer) arg2val != (Pointer) values2[i2]) pfree(arg2val); } } else if (tupdesc1->attrs[i1]->attbyval) { switch (tupdesc1->attrs[i1]->attlen) { case 1: result = (GET_1_BYTE(values1[i1]) == GET_1_BYTE(values2[i2])); break; case 2: result = (GET_2_BYTES(values1[i1]) == GET_2_BYTES(values2[i2])); break; case 4: result = (GET_4_BYTES(values1[i1]) == GET_4_BYTES(values2[i2])); break; #if SIZEOF_DATUM == 8 case 8: result = (GET_8_BYTES(values1[i1]) == GET_8_BYTES(values2[i2])); break; #endif default: Assert(false); /* cannot happen */ } } else { result = (memcmp(DatumGetPointer(values1[i1]), DatumGetPointer(values2[i2]), tupdesc1->attrs[i1]->attlen) == 0); } if (!result) break; } /* equal, so continue to next column */ i1++, i2++, j++; } /* * If we didn't break out of the loop early, check for column count * mismatch. (We do not report such mismatch if we found unequal column * values; is that a feature or a bug?) */ if (result) { if (i1 != ncolumns1 || i2 != ncolumns2) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot compare record types with different numbers of columns"))); } pfree(values1); pfree(nulls1); pfree(values2); pfree(nulls2); ReleaseTupleDesc(tupdesc1); ReleaseTupleDesc(tupdesc2); /* Avoid leaking memory when handed toasted input. */ PG_FREE_IF_COPY(record1, 0); PG_FREE_IF_COPY(record2, 1); PG_RETURN_BOOL(result); }
/* * _hash_step() -- step to the next valid item in a scan in the bucket. * * If no valid record exists in the requested direction, return * false. Else, return true and set the hashso_curpos for the * scan to the right thing. * * 'bufP' points to the current buffer, which is pinned and read-locked. * On success exit, we have pin and read-lock on whichever page * contains the right item; on failure, we have released all buffers. */ bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; ItemPointer current; Buffer buf; Page page; HashPageOpaque opaque; OffsetNumber maxoff; OffsetNumber offnum; BlockNumber blkno; IndexTuple itup; current = &(so->hashso_curpos); buf = *bufP; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); /* * If _hash_step is called from _hash_first, current will not be valid, so * we can't dereference it. However, in that case, we presumably want to * start at the beginning/end of the page... */ maxoff = PageGetMaxOffsetNumber(page); if (ItemPointerIsValid(current)) offnum = ItemPointerGetOffsetNumber(current); else offnum = InvalidOffsetNumber; /* * 'offnum' now points to the last tuple we examined (if any). * * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. */ do { switch (dir) { case ForwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberNext(offnum); /* move forward */ else { /* new page, locate starting position by binary search */ offnum = _hash_binsearch(page, so->hashso_sk_hash); } for (;;) { /* * check if we're still in the range of items with the * target hash key */ if (offnum <= maxoff) { Assert(offnum >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup)) break; /* yes, so exit for-loop */ } /* * ran off the end of this page, try the next */ _hash_readnext(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { maxoff = PageGetMaxOffsetNumber(page); offnum = _hash_binsearch(page, so->hashso_sk_hash); } else { /* end of bucket */ itup = NULL; break; /* exit for-loop */ } } break; case BackwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberPrev(offnum); /* move back */ else { /* new page, locate starting position by binary search */ offnum = _hash_binsearch_last(page, so->hashso_sk_hash); } for (;;) { /* * check if we're still in the range of items with the * target hash key */ if (offnum >= FirstOffsetNumber) { Assert(offnum <= maxoff); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup)) break; /* yes, so exit for-loop */ } /* * ran off the end of this page, try the next */ _hash_readprev(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { maxoff = PageGetMaxOffsetNumber(page); offnum = _hash_binsearch_last(page, so->hashso_sk_hash); } else { /* end of bucket */ itup = NULL; break; /* exit for-loop */ } } break; default: /* NoMovementScanDirection */ /* this should not be reached */ itup = NULL; break; } if (itup == NULL) { /* we ran off the end of the bucket without finding a match */ *bufP = so->hashso_curbuf = InvalidBuffer; ItemPointerSetInvalid(current); return false; } /* check the tuple quals, loop around if not met */ } while (!_hash_checkqual(scan, itup)); /* if we made it to here, we've found a valid tuple */ blkno = BufferGetBlockNumber(buf); *bufP = so->hashso_curbuf = buf; ItemPointerSet(current, blkno, offnum); return true; }
static void spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr; OffsetNumber *itemToPlaceholder; Buffer buffer; itemToPlaceholder = xldata->offsets; /* * If any redirection tuples are being removed, make sure there are no * live Hot Standby transactions that might need to see them. */ if (InHotStandby) { if (TransactionIdIsValid(xldata->newestRedirectXid)) ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid, xldata->node); } if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); SpGistPageOpaque opaque = SpGistPageGetOpaque(page); int i; /* Convert redirect pointers to plain placeholders */ for (i = 0; i < xldata->nToPlaceholder; i++) { SpGistDeadTuple dt; dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, itemToPlaceholder[i])); Assert(dt->tupstate == SPGIST_REDIRECT); dt->tupstate = SPGIST_PLACEHOLDER; ItemPointerSetInvalid(&dt->pointer); } Assert(opaque->nRedirection >= xldata->nToPlaceholder); opaque->nRedirection -= xldata->nToPlaceholder; opaque->nPlaceholder += xldata->nToPlaceholder; /* Remove placeholder tuples at end of page */ if (xldata->firstPlaceholder != InvalidOffsetNumber) { int max = PageGetMaxOffsetNumber(page); OffsetNumber *toDelete; toDelete = palloc(sizeof(OffsetNumber) * max); for (i = xldata->firstPlaceholder; i <= max; i++) toDelete[i - xldata->firstPlaceholder] = i; i = max - xldata->firstPlaceholder + 1; Assert(opaque->nPlaceholder >= i); opaque->nPlaceholder -= i; /* The array is sorted, so can use PageIndexMultiDelete */ PageIndexMultiDelete(page, toDelete, i); pfree(toDelete); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * record_send - binary output routine for any composite type. */ Datum record_send(PG_FUNCTION_ARGS) { HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTupleData tuple; RecordIOData *my_extra; int ncolumns; int validcols; int i; Datum *values; bool *nulls; StringInfoData buf; /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; /* * We arrange to look up the needed I/O info just once per series of * calls, assuming the record type doesn't change underneath us. */ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns != ncolumns) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; my_extra->record_type = InvalidOid; my_extra->record_typmod = 0; } if (my_extra->record_type != tupType || my_extra->record_typmod != tupTypmod) { MemSet(my_extra, 0, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra->record_type = tupType; my_extra->record_typmod = tupTypmod; my_extra->ncolumns = ncolumns; } values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); /* Break down the tuple into fields */ heap_deform_tuple(&tuple, tupdesc, values, nulls); /* And build the result string */ pq_begintypsend(&buf); /* Need to scan to count nondeleted columns */ validcols = 0; for (i = 0; i < ncolumns; i++) { if (!tupdesc->attrs[i]->attisdropped) validcols++; } pq_sendint(&buf, validcols, 4); for (i = 0; i < ncolumns; i++) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; bytea *outputbytes; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) continue; pq_sendint(&buf, column_type, sizeof(Oid)); if (nulls[i]) { /* emit -1 data length to signify a NULL */ pq_sendint(&buf, -1, 4); continue; } /* * Convert the column value to binary */ if (column_info->column_type != column_type) { bool typIsVarlena; getTypeBinaryOutputInfo(column_type, &column_info->typiofunc, &typIsVarlena); fmgr_info_cxt(column_info->typiofunc, &column_info->proc, fcinfo->flinfo->fn_mcxt); column_info->column_type = column_type; } outputbytes = SendFunctionCall(&column_info->proc, values[i]); /* We assume the result will not have been toasted */ pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); pfree(outputbytes); } pfree(values); pfree(nulls); ReleaseTupleDesc(tupdesc); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); }
/* ---------------------------------------------------------------- * ExecInitSubqueryScan * ---------------------------------------------------------------- */ SubqueryScanState * ExecInitSubqueryScan(SubqueryScan *node, EState *estate, int eflags) { SubqueryScanState *subquerystate; /* check for unsupported flags */ Assert(!(eflags & EXEC_FLAG_MARK)); /* * SubqueryScan should not have any "normal" children. Also, if planner * left anything in subrtable, it's fishy. */ Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); Assert(node->subrtable == NIL); /* * Since subquery nodes create its own executor state, * and pass it down to its child nodes, we always * initialize the subquery node. However, some * fields are not initialized if not necessary, see * below. */ /* * create state structure */ subquerystate = makeNode(SubqueryScanState); subquerystate->ss.ps.plan = (Plan *) node; subquerystate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &subquerystate->ss.ps); /* * initialize child expressions */ subquerystate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) subquerystate); subquerystate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) subquerystate); /* Check if targetlist or qual contains a var node referencing the ctid column */ subquerystate->cdb_want_ctid = contain_ctid_var_reference(&node->scan); ItemPointerSetInvalid(&subquerystate->cdb_fake_ctid); #define SUBQUERYSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &subquerystate->ss.ps); ExecInitScanTupleSlot(estate, &subquerystate->ss); /* * initialize subquery */ subquerystate->subplan = ExecInitNode(node->subplan, estate, eflags); /* return borrowed share node list */ estate->es_sharenode = estate->es_sharenode; /*subquerystate->ss.ps.ps_TupFromTlist = false;*/ /* * Initialize scan tuple type (needed by ExecAssignScanProjectionInfo) */ ExecAssignScanType(&subquerystate->ss, ExecGetResultType(subquerystate->subplan)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&subquerystate->ss.ps); ExecAssignScanProjectionInfo(&subquerystate->ss); initGpmonPktForSubqueryScan((Plan *)node, &subquerystate->ss.ps.gpmon_pkt, estate); return subquerystate; }