HeapTuple CvtChunksToHeapTup(TupleChunkList tcList, SerTupInfo * pSerInfo) { StringInfoData serData; TupleChunkListItem tcItem; int i; HeapTuple htup; TupleChunkType tcType; AssertArg(tcList != NULL); AssertArg(tcList->p_first != NULL); AssertArg(pSerInfo != NULL); tcItem = tcList->p_first; if (tcList->num_chunks == 1) { GetChunkType(tcItem, &tcType); if (tcType == TC_EMPTY) { /* * the sender is indicating that there was a row with no attributes: * return a NULL tuple */ clearTCList(NULL, tcList); htup = heap_form_tuple(pSerInfo->tupdesc, pSerInfo->values, pSerInfo->nulls); return htup; } } /* * Dump all of the data in the tuple chunk list into a single StringInfo, * so that we can convert it into a HeapTuple. Check chunk types based on * whether there is only one chunk, or multiple chunks. * * We know roughly how much space we'll need, allocate all in one go. * */ initStringInfoOfSize(&serData, tcList->num_chunks * tcList->max_chunk_length); i = 0; do { /* Make sure that the type of this tuple chunk is correct! */ GetChunkType(tcItem, &tcType); if (i == 0) { if (tcItem->p_next == NULL) { if (tcType != TC_WHOLE) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Single chunk's type must be TC_WHOLE."))); } } else /* tcItem->p_next != NULL */ { if (tcType != TC_PARTIAL_START) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("First chunk of collection must have type" " TC_PARTIAL_START."))); } } } else /* i > 0 */ { if (tcItem->p_next == NULL) { if (tcType != TC_PARTIAL_END) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Last chunk of collection must have type" " TC_PARTIAL_END."))); } } else /* tcItem->p_next != NULL */ { if (tcType != TC_PARTIAL_MID) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Last chunk of collection must have type" " TC_PARTIAL_MID."))); } } } /* Copy this chunk into the tuple data. Don't include the header! */ appendBinaryStringInfo(&serData, (const char *) GetChunkDataPtr(tcItem) + TUPLE_CHUNK_HEADER_SIZE, tcItem->chunk_length - TUPLE_CHUNK_HEADER_SIZE); /* Go to the next chunk. */ tcItem = tcItem->p_next; i++; } while (tcItem != NULL); /* we've finished with the TCList, free it now. */ clearTCList(NULL, tcList); { TupSerHeader *tshp; unsigned int datalen; unsigned int nullslen; unsigned int hoff; HeapTupleHeader t_data; char *pos = (char *)serData.data; tshp = (TupSerHeader *)pos; if ((tshp->tuplen & MEMTUP_LEAD_BIT) != 0) { uint32 tuplen = memtuple_size_from_uint32(tshp->tuplen); htup = (HeapTuple) palloc(tuplen); memcpy(htup, pos, tuplen); pos += TYPEALIGN(TUPLE_CHUNK_ALIGN,tuplen); } else { pos += sizeof(TupSerHeader); /* if the tuple had toasted elements we have to deserialize * the old slow way. */ if ((tshp->infomask & HEAP_HASEXTERNAL) != 0) { serData.cursor += sizeof(TupSerHeader); htup = DeserializeTuple(pSerInfo, &serData); /* Free up memory we used. */ pfree(serData.data); return htup; } /* reconstruct lengths of null bitmap and data part */ if (tshp->infomask & HEAP_HASNULL) nullslen = BITMAPLEN(tshp->natts); else nullslen = 0; if (tshp->tuplen < sizeof(TupSerHeader) + nullslen) ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Interconnect error: cannot convert chunks to a heap tuple."), errdetail("tuple len %d < nullslen %d + headersize (%d)", tshp->tuplen, nullslen, (int)sizeof(TupSerHeader)))); datalen = tshp->tuplen - sizeof(TupSerHeader) - TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen); /* determine overhead size of tuple (should match heap_form_tuple) */ hoff = offsetof(HeapTupleHeaderData, t_bits) + TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen); if (tshp->infomask & HEAP_HASOID) hoff += sizeof(Oid); hoff = MAXALIGN(hoff); /* Allocate the space in one chunk, like heap_form_tuple */ htup = (HeapTuple)palloc(HEAPTUPLESIZE + hoff + datalen); t_data = (HeapTupleHeader) ((char *)htup + HEAPTUPLESIZE); /* make sure unused header fields are zeroed */ MemSetAligned(t_data, 0, hoff); /* reconstruct the HeapTupleData fields */ htup->t_len = hoff + datalen; ItemPointerSetInvalid(&(htup->t_self)); htup->t_data = t_data; /* reconstruct the HeapTupleHeaderData fields */ ItemPointerSetInvalid(&(t_data->t_ctid)); HeapTupleHeaderSetNatts(t_data, tshp->natts); t_data->t_infomask = tshp->infomask & ~HEAP_XACT_MASK; t_data->t_infomask |= HEAP_XMIN_INVALID | HEAP_XMAX_INVALID; t_data->t_hoff = hoff; if (nullslen) { memcpy((void *)t_data->t_bits, pos, nullslen); pos += TYPEALIGN(TUPLE_CHUNK_ALIGN,nullslen); } /* does the tuple descriptor expect an OID ? Note: we don't * have to set the oid itself, just the flag! (see heap_formtuple()) */ if (pSerInfo->tupdesc->tdhasoid) /* else leave infomask = 0 */ { t_data->t_infomask |= HEAP_HASOID; } /* and now the data proper (it would be nice if we could just * point our caller into our existing buffer in-place, but * we'll leave that for another day) */ memcpy((char *)t_data + hoff, pos, datalen); } } /* Free up memory we used. */ pfree(serData.data); return htup; }
/* * Function: SendTuple - Sends a portion or whole tuple to the AMS layer. */ SendReturnCode SendTuple(MotionLayerState *mlStates, ChunkTransportState *transportStates, int16 motNodeID, HeapTuple tuple, int16 targetRoute) { MotionNodeEntry *pMNEntry; TupleChunkListData tcList; MemoryContext oldCtxt; SendReturnCode rc; AssertArg(tuple != NULL); /* * Analyze tools. Do not send any thing if this slice is in the bit mask */ if (gp_motion_slice_noop != 0 && (gp_motion_slice_noop & (1 << currentSliceId)) != 0) return SEND_COMPLETE; /* * Pull up the motion node entry with the node's details. This includes * details that affect sending, such as whether the motion node needs to * include backup segment-dbs. */ pMNEntry = getMotionNodeEntry(mlStates, motNodeID, "SendTuple"); #ifdef AMS_VERBOSE_LOGGING elog(DEBUG5, "Serializing HeapTuple for sending."); #endif if (targetRoute != BROADCAST_SEGIDX) { struct directTransportBuffer b; getTransportDirectBuffer(transportStates, motNodeID, targetRoute, &b); if (b.pri != NULL && b.prilen > TUPLE_CHUNK_HEADER_SIZE) { int sent = 0; sent = SerializeTupleDirect(tuple, &pMNEntry->ser_tup_info, &b); if (sent > 0) { putTransportDirectBuffer(transportStates, motNodeID, targetRoute, sent); /* fill-in tcList fields to update stats */ tcList.num_chunks = 1; tcList.serialized_data_length = sent; /* update stats */ statSendTuple(mlStates, pMNEntry, &tcList); return SEND_COMPLETE; } } /* Otherwise fall-through */ } /* Create and store the serialized form, and some stats about it. */ oldCtxt = MemoryContextSwitchTo(mlStates->motion_layer_mctx); SerializeTupleIntoChunks(tuple, &pMNEntry->ser_tup_info, &tcList); MemoryContextSwitchTo(oldCtxt); #ifdef AMS_VERBOSE_LOGGING elog(DEBUG5, "Serialized HeapTuple for sending:\n" "\ttarget-route %d \n" "\t%d bytes in serial form\n" "\tbroken into %d chunks", targetRoute, tcList.serialized_data_length, tcList.num_chunks); #endif /* do the send. */ if (!SendTupleChunkToAMS(mlStates, transportStates, motNodeID, targetRoute, tcList.p_first)) { pMNEntry->stopped = true; rc = STOP_SENDING; } else { /* update stats */ statSendTuple(mlStates, pMNEntry, &tcList); rc = SEND_COMPLETE; } /* cleanup */ clearTCList(&pMNEntry->ser_tup_info.chunkCache, &tcList); return rc; }
void EndMotionLayerNode(MotionLayerState *mlStates, int16 motNodeID, bool flushCommLayer) { MotionNodeEntry *pMNEntry; ChunkSorterEntry *pCSEntry; int i; pMNEntry = getMotionNodeEntry(mlStates, motNodeID, "EndMotionLayerNode"); #ifdef AMS_VERBOSE_LOGGING elog(DEBUG5, "Cleaning up Motion Layer details for motion node %d.", motNodeID); #endif /* * Iterate through all entries in the motion layer's chunk-sort map, to * see if we have gotten end-of-stream from all senders. */ if (pMNEntry->preserve_order && pMNEntry->ready_tuple_lists != NULL) { for (i=0; i < GpIdentity.numsegments; i++) { pCSEntry = &pMNEntry->ready_tuple_lists[i]; /* * QD should not expect end-of-stream comes from QEs who is not members of * direct dispatch */ if (!pCSEntry->init) continue; if (pMNEntry->preserve_order && gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) { /* Print chunk-sorter entry statistics. */ elog(DEBUG4, "Chunk-sorter entry [route=%d,node=%d] statistics:\n" "\tAvailable Tuples High-Watermark: " UINT64_FORMAT, i, pMNEntry->motion_node_id, pMNEntry->stat_tuples_available_hwm); } if (!pMNEntry->stopped && !pCSEntry->end_of_stream) { if (flushCommLayer) { elog(FATAL, "Motion layer node %d cleanup - did not receive" " end-of-stream from sender %d.", motNodeID, i); /*** TODO - get chunks until end-of-stream comes in. ***/ } else { elog(LOG, "Motion layer node %d cleanup - did not receive" " end-of-stream from sender %d.", motNodeID, i); } } else { /* End-of-stream is marked for this entry. */ /*** TODO - do more than just complain! ***/ if (pCSEntry->chunk_list.num_chunks > 0) { elog(LOG, "Motion layer node %d cleanup - there are still" " %d chunks enqueued from sender %d.", motNodeID, pCSEntry->chunk_list.num_chunks, i ); } /*** TODO - Make sure there are no outstanding tuples in the tuple-store. ***/ } /* * Clean up the chunk-sorter entry, then remove it from the hash * table. */ clearTCList(&pMNEntry->ser_tup_info.chunkCache, &pCSEntry->chunk_list); if (pMNEntry->preserve_order) /* Clean up the tuple-store. */ htfifo_destroy(pCSEntry->ready_tuples); } } pMNEntry->cleanedUp = true; /* Clean up the motion-node entry, then remove it from the hash table. */ if (gp_log_interconnect >= GPVARS_VERBOSITY_VERBOSE) { if (pMNEntry->stat_total_bytes_sent > 0 || pMNEntry->sel_wr_wait > 0) { elog(LOG, "Interconnect seg%d slice%d sent " UINT64_FORMAT " tuples, " UINT64_FORMAT " total bytes, " UINT64_FORMAT " tuple bytes, " UINT64_FORMAT " chunks; waited " UINT64_FORMAT " usec.", Gp_segment, currentSliceId, pMNEntry->stat_total_sends, pMNEntry->stat_total_bytes_sent, pMNEntry->stat_tuple_bytes_sent, pMNEntry->stat_total_chunks_sent, pMNEntry->sel_wr_wait ); } if (pMNEntry->stat_total_bytes_recvd > 0 || pMNEntry->sel_rd_wait > 0) { elog(LOG, "Interconnect seg%d slice%d received from slice%d: " UINT64_FORMAT " tuples, " UINT64_FORMAT " total bytes, " UINT64_FORMAT " tuple bytes, " UINT64_FORMAT " chunks; waited " UINT64_FORMAT " usec.", Gp_segment, currentSliceId, motNodeID, pMNEntry->stat_total_recvs, pMNEntry->stat_total_bytes_recvd, pMNEntry->stat_tuple_bytes_recvd, pMNEntry->stat_total_chunks_recvd, pMNEntry->sel_rd_wait ); } } CleanupSerTupInfo(&pMNEntry->ser_tup_info); FreeTupleDesc(pMNEntry->tuple_desc); if (!pMNEntry->preserve_order) htfifo_destroy(pMNEntry->ready_tuples); pMNEntry->valid = false; }
void CheckAndSendRecordCache(MotionLayerState *mlStates, ChunkTransportState *transportStates, int16 motNodeID, int16 targetRoute) { MotionNodeEntry *pMNEntry; TupleChunkListData tcList; MemoryContext oldCtxt; ChunkTransportStateEntry *pEntry = NULL; MotionConn *conn; getChunkTransportState(transportStates, motNodeID, &pEntry); /* * for broadcast we only mark sent_record_typmod for connection 0 for * efficiency and convenience */ if (targetRoute == BROADCAST_SEGIDX) conn = &pEntry->conns[0]; else conn = &pEntry->conns[targetRoute]; /* * Analyze tools. Do not send any thing if this slice is in the bit mask */ if (gp_motion_slice_noop != 0 && (gp_motion_slice_noop & (1 << currentSliceId)) != 0) return; /* * Pull up the motion node entry with the node's details. This includes * details that affect sending, such as whether the motion node needs to * include backup segment-dbs. */ pMNEntry = getMotionNodeEntry(mlStates, motNodeID, "SendRecordCache"); if (!ShouldSendRecordCache(conn, &pMNEntry->ser_tup_info)) return; #ifdef AMS_VERBOSE_LOGGING elog(DEBUG5, "Serializing RecordCache for sending."); #endif /* Create and store the serialized form, and some stats about it. */ oldCtxt = MemoryContextSwitchTo(mlStates->motion_layer_mctx); SerializeRecordCacheIntoChunks(&pMNEntry->ser_tup_info, &tcList, conn); MemoryContextSwitchTo(oldCtxt); #ifdef AMS_VERBOSE_LOGGING elog(DEBUG5, "Serialized RecordCache for sending:\n" "\ttarget-route %d \n" "\t%d bytes in serial form\n" "\tbroken into %d chunks", targetRoute, tcList.serialized_data_length, tcList.num_chunks); #endif /* do the send. */ if (!SendTupleChunkToAMS(mlStates, transportStates, motNodeID, targetRoute, tcList.p_first)) { pMNEntry->stopped = true; } else { /* update stats */ statSendTuple(mlStates, pMNEntry, &tcList); } /* cleanup */ clearTCList(&pMNEntry->ser_tup_info.chunkCache, &tcList); UpdateSentRecordCache(conn); }