static void LockSegfilesOnMasterForSingleRel(Relation rel, int32 segno) { Insist(Gp_role == GP_ROLE_DISPATCH); /* * do not lock segfile with content id = -1 */ /* for (i = 1; i < rel->rd_segfile0_count; ++i) { if (RelationIsAoRows(rel) || RelationIsParquet(rel)) { LockRelationAppendOnlySegmentFile(&rel->rd_node, segno, AccessExclusiveLock, false, i - 1); } } */ { if (RelationIsAoRows(rel) || RelationIsParquet(rel)) { LockRelationAppendOnlySegmentFile(&rel->rd_node, segno, AccessExclusiveLock, false); } } }
/* * GetSegFilesTotals * * Get the total bytes and tuples for a specific parquet table * from the pg_aoseg table on this local segdb. */ ParquetFileSegTotals *GetParquetSegFilesTotals(Relation parentrel, Snapshot parquetMetaDataSnapshot) { Relation pg_paqseg_rel; TupleDesc pg_paqseg_dsc; HeapTuple tuple; SysScanDesc paqscan; ParquetFileSegTotals *result; Datum eof, eof_uncompressed, tupcount; bool isNull; AppendOnlyEntry *aoEntry = NULL; Assert(RelationIsParquet(parentrel)); aoEntry = GetAppendOnlyEntry(RelationGetRelid(parentrel), parquetMetaDataSnapshot); result = (ParquetFileSegTotals *) palloc0(sizeof(ParquetFileSegTotals)); pg_paqseg_rel = heap_open(aoEntry->segrelid, AccessShareLock); pg_paqseg_dsc = RelationGetDescr(pg_paqseg_rel); paqscan = systable_beginscan(pg_paqseg_rel, InvalidOid, FALSE, parquetMetaDataSnapshot, 0, NULL); while (HeapTupleIsValid(tuple = systable_getnext(paqscan))) { eof = fastgetattr(tuple, Anum_pg_parquetseg_eof, pg_paqseg_dsc, &isNull); tupcount = fastgetattr(tuple, Anum_pg_parquetseg_tupcount, pg_paqseg_dsc, &isNull); eof_uncompressed = fastgetattr(tuple, Anum_pg_parquetseg_eofuncompressed, pg_paqseg_dsc, &isNull); if(isNull) result->totalbytesuncompressed = InvalidUncompressedEof; else result->totalbytesuncompressed += (int64)DatumGetFloat8(eof_uncompressed); result->totalbytes += (int64)DatumGetFloat8(eof); result->totaltuples += (int64)DatumGetFloat8(tupcount); result->totalfilesegs++; CHECK_FOR_INTERRUPTS(); } systable_endscan(paqscan); heap_close(pg_paqseg_rel, AccessShareLock); pfree(aoEntry); return result; }
/* * calculate size of a relation * * Iterator over all files belong to the relation and do stat. * The obviously better way is to use glob. For whatever reason, * glob is extremely slow if there are lots of relations in the * database. So we handle all cases, instead. */ int64 calculate_relation_size(Relation rel) { int64 totalsize = 0; char *relationpath; char pathname[MAXPGPATH]; struct stat fst; int i; relationpath = relpath(rel->rd_node); if(RelationIsHeap(rel)) { /* Ordinary relation, including heap and index. * They take form of relationpath, or relationpath.%d * There will be no holes, therefore, we can stop we * we reach the first non-exist file. */ for(i=0; ; ++i) { if (i==0) snprintf(pathname, MAXPGPATH, "%s", relationpath); else snprintf(pathname, MAXPGPATH, "%s.%d", relationpath, i); if (stat(pathname, &fst) >= 0) totalsize += fst.st_size; else { if (errno == ENOENT) break; else ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file %s: %m", pathname) )); } } } else if (RelationIsAoRows(rel)) totalsize = GetAOTotalBytes(rel, SnapshotNow); else if (RelationIsParquet(rel)) totalsize = GetParquetTotalBytes(rel, SnapshotNow); /* RELSTORAGE_VIRTUAL has no space usage */ return totalsize; }
/* * Has the same signature as RelationGetAttributeCompressionFuncs() even though * we don't actually need the full Relation data structure. I deem consistency * of API more important in this case. */ PGFunction * RelationGetRelationCompressionFuncs(Relation rel) { AppendOnlyEntry *aoentry; char *comptype = NULL; PGFunction *compFuncs; if(RelationIsAoRows(rel) || RelationIsParquet(rel)){ aoentry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow); comptype = aoentry->compresstype; } compFuncs = get_funcs_for_compression(comptype); return compFuncs; }
/* * getTableType * Return the table type for a given relation. */ int getTableType(Relation rel) { Assert(rel != NULL && rel->rd_rel != NULL); if (RelationIsHeap(rel)) { return TableTypeHeap; } if (RelationIsAoRows(rel)) { return TableTypeAppendOnly; } if (RelationIsParquet(rel)) { return TableTypeParquet; } elog(ERROR, "undefined table type for storage format: %c", rel->rd_rel->relstorage); return TableTypeInvalid; }
/* * Compute the on-disk size of files for the relation according to the * stat function, including heap data, index data, toast data, aoseg data, * and aoblkdir data. */ static int64 calculate_total_relation_size(Oid Relid) { Relation heapRel; Oid toastOid; AppendOnlyEntry *aoEntry = NULL; int64 size; ListCell *cell; heapRel = relation_open(Relid, AccessShareLock); toastOid = heapRel->rd_rel->reltoastrelid; if (RelationIsAoRows(heapRel) || RelationIsParquet(heapRel)) aoEntry = GetAppendOnlyEntry(Relid, SnapshotNow); /* Get the heap size */ if (Relid == 0 || heapRel->rd_node.relNode == 0) size = 0; else size = calculate_relation_size(heapRel); /* Include any dependent indexes */ if (heapRel->rd_rel->relhasindex) { List *index_oids = RelationGetIndexList(heapRel); foreach(cell, index_oids) { Oid idxOid = lfirst_oid(cell); Relation iRel; iRel = relation_open(idxOid, AccessShareLock); size += calculate_relation_size(iRel); relation_close(iRel, AccessShareLock); }
/* * InitScanStateRelationDetails * Opens a relation and sets various relation specific ScanState fields. */ void InitScanStateRelationDetails(ScanState *scanState, Plan *plan, EState *estate) { Assert(NULL != scanState); PlanState *planState = &scanState->ps; /* Initialize child expressions */ planState->targetlist = (List *)ExecInitExpr((Expr *)plan->targetlist, planState); planState->qual = (List *)ExecInitExpr((Expr *)plan->qual, planState); Relation currentRelation = ExecOpenScanRelation(estate, ((Scan *)plan)->scanrelid); scanState->ss_currentRelation = currentRelation; if (RelationIsAoRows(currentRelation) || RelationIsParquet(currentRelation)) { scanState->splits = GetFileSplitsOfSegment(estate->es_plannedstmt->scantable_splits, currentRelation->rd_id, GetQEIndex()); } ExecAssignScanType(scanState, RelationGetDescr(currentRelation)); ExecAssignScanProjectionInfo(scanState); scanState->tableType = getTableType(scanState->ss_currentRelation); }
/* ---------------------------------------------------------------- * ExecInsert * * INSERTs have to add the tuple into * the base relation and insert appropriate tuples into the * index relations. * Insert can be part of an update operation when * there is a preceding SplitUpdate node. * ---------------------------------------------------------------- */ void ExecInsert(TupleTableSlot *slot, DestReceiver *dest, EState *estate, PlanGenerator planGen, bool isUpdate) { void *tuple = NULL; ResultRelInfo *resultRelInfo = NULL; Relation resultRelationDesc = NULL; Oid newId = InvalidOid; TupleTableSlot *partslot = NULL; AOTupleId aoTupleId = AOTUPLEID_INIT; bool rel_is_heap = false; bool rel_is_aorows = false; bool rel_is_external = false; bool rel_is_parquet = false; /* * get information on the (current) result relation */ if (estate->es_result_partitions) { resultRelInfo = slot_get_partition(slot, estate); estate->es_result_relation_info = resultRelInfo; if (NULL != resultRelInfo->ri_parquetSendBack) { /* * The Parquet part we are about to insert into * has sendBack information. This means we're inserting into the * part twice, which is not supported. Error out (GPSQL-2291) */ Assert(gp_parquet_insert_sort); ereport(ERROR, (errcode(ERRCODE_CDB_FEATURE_NOT_YET), errmsg("Cannot insert out-of-order tuples in parquet partitions"), errhint("Sort the data on the partitioning key(s) before inserting"), errOmitLocation(true))); } /* * Check if we need to close the last parquet partition we * inserted into (GPSQL-2291). */ Oid new_part_oid = resultRelInfo->ri_RelationDesc->rd_id; if (gp_parquet_insert_sort && PLANGEN_OPTIMIZER == planGen && InvalidOid != estate->es_last_parq_part && new_part_oid != estate->es_last_parq_part) { Assert(NULL != estate->es_partition_state->result_partition_hash); ResultPartHashEntry *entry = hash_search(estate->es_partition_state->result_partition_hash, &estate->es_last_parq_part, HASH_FIND, NULL /* found */); Assert(NULL != entry); Assert(entry->offset < estate->es_num_result_relations); ResultRelInfo *oldResultRelInfo = & estate->es_result_relations[entry->offset]; elog(DEBUG1, "Switching from old part oid=%d name=[%s] to new part oid=%d name=[%s]", estate->es_last_parq_part, oldResultRelInfo->ri_RelationDesc->rd_rel->relname.data, new_part_oid, resultRelInfo->ri_RelationDesc->rd_rel->relname.data); /* * We are opening a new partition, and the last partition we * inserted into was a Parquet part. Let's close the old * parquet insert descriptor to free the memory before * opening the new one. */ ParquetInsertDescData *oldInsertDesc = oldResultRelInfo->ri_parquetInsertDesc; /* * We need to preserve the "sendback" information that needs to be * sent back to the QD process from this part. * Compute it here, and store it for later use. */ QueryContextDispatchingSendBack sendback = CreateQueryContextDispatchingSendBack(1); sendback->relid = RelationGetRelid(oldResultRelInfo->ri_RelationDesc); oldInsertDesc->sendback = sendback; parquet_insert_finish(oldInsertDesc); /* Store the sendback information in the resultRelInfo for this part */ oldResultRelInfo->ri_parquetSendBack = sendback; /* Record in the resultRelInfo that we closed the parquet insert descriptor */ oldResultRelInfo->ri_parquetInsertDesc = NULL; /* Reset the last parquet part Oid, it's now closed */ estate->es_last_parq_part = InvalidOid; } } else { resultRelInfo = estate->es_result_relation_info; } Assert (!resultRelInfo->ri_projectReturning); resultRelationDesc = resultRelInfo->ri_RelationDesc; rel_is_heap = RelationIsHeap(resultRelationDesc); rel_is_aorows = RelationIsAoRows(resultRelationDesc); rel_is_external = RelationIsExternal(resultRelationDesc); rel_is_parquet = RelationIsParquet(resultRelationDesc); /* Validate that insert is not part of an non-allowed update operation. */ if (isUpdate && (rel_is_aorows || rel_is_parquet)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Append-only tables are not updatable. Operation not permitted."), errOmitLocation(true))); } partslot = reconstructMatchingTupleSlot(slot, resultRelInfo); if (rel_is_heap || rel_is_external) { tuple = ExecFetchSlotHeapTuple(partslot); } else if (rel_is_aorows) { tuple = ExecFetchSlotMemTuple(partslot, false); } else if (rel_is_parquet) { tuple = NULL; } Assert( partslot != NULL ); Assert( rel_is_parquet || (tuple != NULL)); /* Execute triggers in Planner-generated plans */ if (planGen == PLANGEN_PLANNER) { /* BEFORE ROW INSERT Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0) { HeapTuple newtuple; /* NYI */ if(rel_is_parquet) elog(ERROR, "triggers are not supported on tables that use column-oriented storage"); newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple); if (newtuple == NULL) /* "do nothing" */ { return; } if (newtuple != tuple) /* modified by Trigger(s) */ { /* * Put the modified tuple into a slot for convenience of routines * below. We assume the tuple was allocated in per-tuple memory * context, and therefore will go away by itself. The tuple table * slot should not try to clear it. */ TupleTableSlot *newslot = estate->es_trig_tuple_slot; if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor) ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor); ExecStoreGenericTuple(newtuple, newslot, false); newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */ tuple = newtuple; partslot = newslot; } } } /* * Check the constraints of the tuple */ if (resultRelationDesc->rd_att->constr && planGen == PLANGEN_PLANNER) { ExecConstraints(resultRelInfo, partslot, estate); } /* * insert the tuple * * Note: heap_insert returns the tid (location) of the new tuple in the * t_self field. * * NOTE: for append-only relations we use the append-only access methods. */ if (rel_is_aorows) { if (resultRelInfo->ri_aoInsertDesc == NULL) { ResultRelSegFileInfo *segfileinfo = NULL; /* Set the pre-assigned fileseg number to insert into */ ResultRelInfoSetSegFileInfo(resultRelInfo, estate->es_result_segfileinfos); segfileinfo = (ResultRelSegFileInfo *)list_nth(resultRelInfo->ri_aosegfileinfos, GetQEIndex()); resultRelInfo->ri_aoInsertDesc = appendonly_insert_init(resultRelationDesc, segfileinfo); } appendonly_insert(resultRelInfo->ri_aoInsertDesc, tuple, &newId, &aoTupleId); } else if (rel_is_external) { /* Writable external table */ if (resultRelInfo->ri_extInsertDesc == NULL) resultRelInfo->ri_extInsertDesc = external_insert_init( resultRelationDesc, 0); newId = external_insert(resultRelInfo->ri_extInsertDesc, tuple); } else if(rel_is_parquet) { /* If there is no parquet insert descriptor, create it now. */ if (resultRelInfo->ri_parquetInsertDesc == NULL) { ResultRelSegFileInfo *segfileinfo = NULL; ResultRelInfoSetSegFileInfo(resultRelInfo, estate->es_result_segfileinfos); segfileinfo = (ResultRelSegFileInfo *)list_nth(resultRelInfo->ri_aosegfileinfos, GetQEIndex()); resultRelInfo->ri_parquetInsertDesc = parquet_insert_init(resultRelationDesc, segfileinfo); /* * Just opened a new parquet partition for insert. Save the Oid * in estate, so that we can close it when switching to a * new partition (GPSQL-2291) */ elog(DEBUG1, "Saving es_last_parq_part. Old=%d, new=%d", estate->es_last_parq_part, resultRelationDesc->rd_id); estate->es_last_parq_part = resultRelationDesc->rd_id; } newId = parquet_insert(resultRelInfo->ri_parquetInsertDesc, partslot); } else { Insist(rel_is_heap); newId = heap_insert(resultRelationDesc, tuple, estate->es_snapshot->curcid, true, true, GetCurrentTransactionId()); } IncrAppended(); (estate->es_processed)++; (resultRelInfo->ri_aoprocessed)++; estate->es_lastoid = newId; partslot->tts_tableOid = RelationGetRelid(resultRelationDesc); if (rel_is_aorows || rel_is_parquet) { /* NOTE: Current version does not support index upon parquet table. */ /* * insert index entries for AO Row-Store tuple */ if (resultRelInfo->ri_NumIndices > 0 && !rel_is_parquet) ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false); } else { /* Use parttuple for index update in case this is an indexed heap table. */ TupleTableSlot *xslot = partslot; void *xtuple = tuple; setLastTid(&(((HeapTuple) xtuple)->t_self)); /* * insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(xslot, &(((HeapTuple) xtuple)->t_self), estate, false); } if (planGen == PLANGEN_PLANNER) { /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple); } }