Exemplo n.º 1
0
Arquivo: tupser.c Projeto: 50wu/gpdb
/*
 * Convert a HeapTuple into a byte-sequence, and store it directly
 * into a chunklist for transmission.
 *
 * This code is based on the printtup_internal_20() function in printtup.c.
 */
void
SerializeTupleIntoChunks(HeapTuple tuple, SerTupInfo * pSerInfo, TupleChunkList tcList)
{
	TupleChunkListItem tcItem = NULL;
	MemoryContext oldCtxt;
	TupleDesc	tupdesc;
	int			i,
		natts;
	bool		fHandled;

	AssertArg(tcList != NULL);
	AssertArg(tuple != NULL);
	AssertArg(pSerInfo != NULL);

	tupdesc = pSerInfo->tupdesc;
	natts = tupdesc->natts;

	/* get ready to go */
	tcList->p_first = NULL;
	tcList->p_last = NULL;
	tcList->num_chunks = 0;
	tcList->serialized_data_length = 0;
	tcList->max_chunk_length = Gp_max_tuple_chunk_size;

	if (natts == 0)
	{
		tcItem = getChunkFromCache(&pSerInfo->chunkCache);
		if (tcItem == NULL)
		{
			ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY),
							errmsg("Could not allocate space for first chunk item in new chunk list.")));
		}

		/* TC_EMTPY is just one chunk */
		SetChunkType(tcItem->chunk_data, TC_EMPTY);
		tcItem->chunk_length = TUPLE_CHUNK_HEADER_SIZE;
		appendChunkToTCList(tcList, tcItem);

		return;
	}

	tcItem = getChunkFromCache(&pSerInfo->chunkCache);
	if (tcItem == NULL)
	{
		ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY),
						errmsg("Could not allocate space for first chunk item in new chunk list.")));
	}

	/* assume that we'll take a single chunk */
	SetChunkType(tcItem->chunk_data, TC_WHOLE);
	tcItem->chunk_length = TUPLE_CHUNK_HEADER_SIZE;
	appendChunkToTCList(tcList, tcItem);

	AssertState(s_tupSerMemCtxt != NULL);

	if (is_heaptuple_memtuple(tuple))
	{
		addByteStringToChunkList(tcList, (char *)tuple, memtuple_get_size((MemTuple)tuple, NULL), &pSerInfo->chunkCache);
		addPadding(tcList, &pSerInfo->chunkCache, memtuple_get_size((MemTuple)tuple, NULL));
	}
	else
	{
		TupSerHeader tsh;

		unsigned int	datalen;
		unsigned int	nullslen;

		HeapTupleHeader t_data = tuple->t_data;

		datalen = tuple->t_len - t_data->t_hoff;
		if (HeapTupleHasNulls(tuple))
			nullslen = BITMAPLEN(HeapTupleHeaderGetNatts(t_data));
		else
			nullslen = 0;

		tsh.tuplen = sizeof(TupSerHeader) + TYPEALIGN(TUPLE_CHUNK_ALIGN,nullslen) + datalen;
		tsh.natts = HeapTupleHeaderGetNatts(t_data);
		tsh.infomask = t_data->t_infomask;

		addByteStringToChunkList(tcList, (char *)&tsh, sizeof(TupSerHeader), &pSerInfo->chunkCache);
		/* If we don't have any attributes which have been toasted, we
		 * can be very very simple: just send the raw data. */
		if ((tsh.infomask & HEAP_HASEXTERNAL) == 0)
		{
			if (nullslen)
			{
				addByteStringToChunkList(tcList, (char *)t_data->t_bits, nullslen, &pSerInfo->chunkCache);
				addPadding(tcList,&pSerInfo->chunkCache,nullslen);
			}

			addByteStringToChunkList(tcList, (char *)t_data + t_data->t_hoff, datalen, &pSerInfo->chunkCache);
			addPadding(tcList,&pSerInfo->chunkCache,datalen);
		}
		else
		{
			/* We have to be more careful when we have tuples that
			 * have been toasted. Ideally we'd like to send the
			 * untoasted attributes in as "raw" a format as possible
			 * but that makes rebuilding the tuple harder .
			 */
			oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt);

			/* deconstruct the tuple (faster than a heap_getattr loop) */
			heap_deform_tuple(tuple, tupdesc, pSerInfo->values, pSerInfo->nulls);

			MemoryContextSwitchTo(oldCtxt);

			/* Send the nulls character-array. */
			addByteStringToChunkList(tcList, pSerInfo->nulls, natts, &pSerInfo->chunkCache);
			addPadding(tcList,&pSerInfo->chunkCache,natts);

			/*
			 * send the attributes of this tuple: NOTE anything which allocates
			 * temporary space (e.g. could result in a PG_DETOAST_DATUM) should be
			 * executed with the memory context set to s_tupSerMemCtxt
			 */
			for (i = 0; i < natts; ++i)
			{
				SerAttrInfo *attrInfo = pSerInfo->myinfo + i;
				Datum		origattr = pSerInfo->values[i],
					attr;
				bytea	   *outputbytes=0;

				/* skip null attributes (already taken care of above) */
				if (pSerInfo->nulls[i])
					continue;

				/*
				 * If we have a toasted datum, forcibly detoast it here to avoid
				 * memory leakage: we want to force the detoast allocation(s) to
				 * happen in our reset-able serialization context.
				 */
				if (attrInfo->typisvarlena)
				{
					oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt);
					/* we want to detoast but leave compressed, if
					 * possible, but we have to handle varlena
					 * attributes (and others ?) differently than we
					 * currently do (first step is to use
					 * heap_tuple_fetch_attr() instead of
					 * PG_DETOAST_DATUM()). */
					attr = PointerGetDatum(PG_DETOAST_DATUM(origattr));
					MemoryContextSwitchTo(oldCtxt);
				}
				else
					attr = origattr;

				/*
				 * Assume that the data's output will be handled by the special IO
				 * code, and if not then we can handle it the slow way.
				 */
				fHandled = true;
				switch (attrInfo->atttypid)
				{
					case INT4OID:
						addInt32ToChunkList(tcList, DatumGetInt32(attr), &pSerInfo->chunkCache);
						break;
					case CHAROID:
						addCharToChunkList(tcList, DatumGetChar(attr), &pSerInfo->chunkCache);
						addPadding(tcList,&pSerInfo->chunkCache,1);
						break;
					case BPCHAROID:
					case VARCHAROID:
					case INT2VECTOROID: /* postgres serialization logic broken, use our own */
					case OIDVECTOROID: /* postgres serialization logic broken, use our own */
					case ANYARRAYOID:
					{
						text	   *pText = DatumGetTextP(attr);
						int32		textSize = VARSIZE(pText) - VARHDRSZ;

						addInt32ToChunkList(tcList, textSize, &pSerInfo->chunkCache);
						addByteStringToChunkList(tcList, (char *) VARDATA(pText), textSize, &pSerInfo->chunkCache);
						addPadding(tcList,&pSerInfo->chunkCache,textSize);
						break;
					}
					case DATEOID:
					{
						DateADT date = DatumGetDateADT(attr);

						addByteStringToChunkList(tcList, (char *) &date, sizeof(DateADT), &pSerInfo->chunkCache);
						break;
					}
					case NUMERICOID:
					{
						/*
						 * Treat the numeric as a varlena variable, and just push
						 * the whole shebang to the output-buffer.	We don't care
						 * about the guts of the numeric.
						 */
						Numeric		num = DatumGetNumeric(attr);
						int32		numSize = VARSIZE(num) - VARHDRSZ;

						addInt32ToChunkList(tcList, numSize, &pSerInfo->chunkCache);
						addByteStringToChunkList(tcList, (char *) VARDATA(num), numSize, &pSerInfo->chunkCache);
						addPadding(tcList,&pSerInfo->chunkCache,numSize);
						break;
					}

					case ACLITEMOID:
					{
						AclItem		*aip = DatumGetAclItemP(attr);
						char		*outputstring;
						int32		aclSize ;

						outputstring = DatumGetCString(DirectFunctionCall1(aclitemout,
																		   PointerGetDatum(aip)));

						aclSize = strlen(outputstring);
						addInt32ToChunkList(tcList, aclSize, &pSerInfo->chunkCache);
						addByteStringToChunkList(tcList, outputstring,aclSize, &pSerInfo->chunkCache);
						addPadding(tcList,&pSerInfo->chunkCache,aclSize);
						break;
					}	

					case 210: /* storage manager */
					{
						char		*smgrstr;
						int32		strsize;

						smgrstr = DatumGetCString(DirectFunctionCall1(smgrout, 0));
						strsize = strlen(smgrstr);
						addInt32ToChunkList(tcList, strsize, &pSerInfo->chunkCache);
						addByteStringToChunkList(tcList, smgrstr, strsize, &pSerInfo->chunkCache);
						addPadding(tcList,&pSerInfo->chunkCache,strsize);
						break;
					}

					default:
						fHandled = false;
				}

				if (fHandled)
					continue;

				/*
				 * the FunctionCall2 call into the send function may result in some
				 * allocations which we'd like to have contained by our reset-able
				 * context
				 */
				oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt);						  
							  
				/* Call the attribute type's binary input converter. */
				if (attrInfo->send_finfo.fn_nargs == 1)
					outputbytes =
						DatumGetByteaP(FunctionCall1(&attrInfo->send_finfo,
													 attr));
				else if (attrInfo->send_finfo.fn_nargs == 2)
					outputbytes =
						DatumGetByteaP(FunctionCall2(&attrInfo->send_finfo,
													 attr,
													 ObjectIdGetDatum(attrInfo->send_typio_param)));
				else if (attrInfo->send_finfo.fn_nargs == 3)
					outputbytes =
						DatumGetByteaP(FunctionCall3(&attrInfo->send_finfo,
													 attr,
													 ObjectIdGetDatum(attrInfo->send_typio_param),
													 Int32GetDatum(tupdesc->attrs[i]->atttypmod)));
				else
				{
					ereport(ERROR,
							(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
							 errmsg("Conversion function takes %d args",attrInfo->recv_finfo.fn_nargs)));
				}
		
				MemoryContextSwitchTo(oldCtxt);

				/* We assume the result will not have been toasted */
				addInt32ToChunkList(tcList, VARSIZE(outputbytes) - VARHDRSZ, &pSerInfo->chunkCache);
				addByteStringToChunkList(tcList, VARDATA(outputbytes),
										 VARSIZE(outputbytes) - VARHDRSZ, &pSerInfo->chunkCache);
				addPadding(tcList,&pSerInfo->chunkCache,VARSIZE(outputbytes) - VARHDRSZ);

				/*
				 * this was allocated in our reset-able context, but we *are* done
				 * with it; and for tuples with several large columns it'd be nice to
				 * free the memory back to the context
				 */
				pfree(outputbytes);

			}

			MemoryContextReset(s_tupSerMemCtxt);
		}
	}

	/*
	 * if we have more than 1 chunk we have to set the chunk types on our
	 * first chunk and last chunk
	 */
	if (tcList->num_chunks > 1)
	{
		TupleChunkListItem first,
			last;

		first = tcList->p_first;
		last = tcList->p_last;

		Assert(first != NULL);
		Assert(first != last);
		Assert(last != NULL);

		SetChunkType(first->chunk_data, TC_PARTIAL_START);
		SetChunkType(last->chunk_data, TC_PARTIAL_END);

		/*
		 * any intervening chunks are already set to TC_PARTIAL_MID when
		 * allocated
		 */
	}

	return;
}
Exemplo n.º 2
0
/*
 * Add an attribute to the hash calculation.
 * **IMPORTANT: any new hard coded support for a data type in here
 * must be added to isGreenplumDbHashable() below!
 *
 * Note that the caller should provide the base type if the datum is
 * of a domain type. It is quite expensive to call get_typtype() and
 * getBaseType() here since this function gets called a lot for the
 * same set of Datums.
 *
 * @param hashFn called to update the hash value.
 * @param clientData passed to hashFn.
 */
void
hashDatum(Datum datum, Oid type, datumHashFunction hashFn, void *clientData)
{

	void	   *buf = NULL;		/* pointer to the data */
	size_t		len = 0;		/* length for the data buffer */
	
	int64		intbuf;			/* an 8 byte buffer for all integer sizes */
		
	float4		buf_f4;
	float8		buf_f8;
	Timestamp	tsbuf;			/* timestamp data dype is either a double or
								 * int8 (determined in compile time) */
	TimestampTz tstzbuf;
	DateADT		datebuf;
	TimeADT		timebuf;
	TimeTzADT  *timetzptr;
	Interval   *intervalptr;
	AbsoluteTime abstime_buf;
	RelativeTime reltime_buf;
	TimeInterval tinterval;
	AbsoluteTime tinterval_len;
	
	Numeric		num;
	bool		bool_buf;
	char        char_buf;
	Name		namebuf;
	
	ArrayType  *arrbuf;
	inet		 *inetptr; /* inet/cidr */
	unsigned char inet_hkey[sizeof(inet_struct)];
	macaddr		*macptr; /* MAC address */
	
	VarBit		*vbitptr;
	
	int2vector *i2vec_buf;
	oidvector  *oidvec_buf;
	
	Cash		cash_buf;
	AclItem	   *aclitem_ptr;
	uint32		aclitem_buf;
	
	/*
	 * special case buffers
	 */
	uint32		nanbuf;
	uint32		invalidbuf;

	void *tofree = NULL;

	/*
	 * Select the hash to be performed according to the field type we are adding to the
	 * hash.
	 */
	switch (type)
	{
		/*
		 * ======= NUMERIC TYPES ========
		 */
		case INT2OID:			/* -32 thousand to 32 thousand, 2-byte storage */
			intbuf = (int64) DatumGetInt16(datum);		/* cast to 8 byte before
														 * hashing */
			buf = &intbuf;
			len = sizeof(intbuf);
			break;

		case INT4OID:			/* -2 billion to 2 billion integer, 4-byte
								 * storage */
			intbuf = (int64) DatumGetInt32(datum);		/* cast to 8 byte before
														 * hashing */
			buf = &intbuf;
			len = sizeof(intbuf);
			break;
			
		case INT8OID:			/* ~18 digit integer, 8-byte storage */
			intbuf = DatumGetInt64(datum);		/* cast to 8 byte before
												 * hashing */
			buf = &intbuf;
			len = sizeof(intbuf);
			break;

		case FLOAT4OID: /* single-precision floating point number,
								 * 4-byte storage */
			buf_f4 = DatumGetFloat4(datum);

			/*
			 * On IEEE-float machines, minus zero and zero have different bit
			 * patterns but should compare as equal.  We must ensure that they
			 * have the same hash value, which is most easily done this way:
			 */
			if (buf_f4 == (float4) 0)
				buf_f4 = 0.0;

			buf = &buf_f4;
			len = sizeof(buf_f4);
			break;

		case FLOAT8OID: /* double-precision floating point number,
								 * 8-byte storage */
			buf_f8 = DatumGetFloat8(datum);

			/*
			 * On IEEE-float machines, minus zero and zero have different bit
			 * patterns but should compare as equal.  We must ensure that they
			 * have the same hash value, which is most easily done this way:
			 */
			if (buf_f8 == (float8) 0)
				buf_f8 = 0.0;

			buf = &buf_f8;
			len = sizeof(buf_f8);
			break;

		case NUMERICOID:

			num = DatumGetNumeric(datum);

			if (NUMERIC_IS_NAN(num))
			{
				nanbuf = NAN_VAL;
				buf = &nanbuf;
				len = sizeof(nanbuf);
			}
			else
				/* not a nan */
			{
				buf = num->n_data;
				len = (VARSIZE(num) - NUMERIC_HDRSZ);
			}

            /* 
             * If we did a pg_detoast_datum, we need to remember to pfree, 
             * or we will leak memory.  Because of the 1-byte varlena header stuff.
             */
            if (num != DatumGetPointer(datum)) 
                tofree = num;

			break;
		
		/*
		 * ====== CHARACTER TYPES =======
		 */
		case CHAROID:			/* char(1), single character */
			char_buf = DatumGetChar(datum);
			buf = &char_buf;
			len = 1;
			break;

		case BPCHAROID: /* char(n), blank-padded string, fixed storage */
		case TEXTOID:   /* text */
		case VARCHAROID: /* varchar */ 
		case BYTEAOID:   /* bytea */
			{
				int tmplen;
				varattrib_untoast_ptr_len(datum, (char **) &buf, &tmplen, &tofree);
				/* adjust length to not include trailing blanks */
				if (type != BYTEAOID && tmplen > 1)
					tmplen = ignoreblanks((char *) buf, tmplen);

				len = tmplen;
				break;
			}

		case NAMEOID:
			namebuf = DatumGetName(datum);
			len = NAMEDATALEN;
			buf = NameStr(*namebuf);

			/* adjust length to not include trailing blanks */
			if (len > 1)
				len = ignoreblanks((char *) buf, len);
			break;
		
		/*
		 * ====== OBJECT IDENTIFIER TYPES ======
		 */
		case OIDOID:				/* object identifier(oid), maximum 4 billion */
		case REGPROCOID:			/* function name */
		case REGPROCEDUREOID:		/* function name with argument types */
		case REGOPEROID:			/* operator name */
		case REGOPERATOROID:		/* operator with argument types */
		case REGCLASSOID:			/* relation name */
		case REGTYPEOID:			/* data type name */
			intbuf = (int64) DatumGetUInt32(datum);	/* cast to 8 byte before hashing */
			buf = &intbuf;
			len = sizeof(intbuf);
			break;

        case TIDOID:                /* tuple id (6 bytes) */
            buf = DatumGetPointer(datum);
            len = SizeOfIptrData;
            break;
			
		/*
		 * ====== DATE/TIME TYPES ======
		 */
		case TIMESTAMPOID:		/* date and time */
			tsbuf = DatumGetTimestamp(datum);
			buf = &tsbuf;
			len = sizeof(tsbuf);
			break;

		case TIMESTAMPTZOID:	/* date and time with time zone */
			tstzbuf = DatumGetTimestampTz(datum);
			buf = &tstzbuf;
			len = sizeof(tstzbuf);
			break;

		case DATEOID:			/* ANSI SQL date */
			datebuf = DatumGetDateADT(datum);
			buf = &datebuf;
			len = sizeof(datebuf);
			break;

		case TIMEOID:			/* hh:mm:ss, ANSI SQL time */
			timebuf = DatumGetTimeADT(datum);
			buf = &timebuf;
			len = sizeof(timebuf);
			break;

		case TIMETZOID: /* time with time zone */
			
			/*
			 * will not compare to TIMEOID on equal values.
			 * Postgres never attempts to compare the two as well.
			 */
			timetzptr = DatumGetTimeTzADTP(datum);
			buf = (unsigned char *) timetzptr;
			
			/*
			 * Specify hash length as sizeof(double) + sizeof(int4), not as
			 * sizeof(TimeTzADT), so that any garbage pad bytes in the structure
			 * won't be included in the hash!
			 */
			len = sizeof(timetzptr->time) + sizeof(timetzptr->zone);
			break;

		case INTERVALOID:		/* @ <number> <units>, time interval */
			intervalptr = DatumGetIntervalP(datum);
			buf = (unsigned char *) intervalptr;
			/*
			 * Specify hash length as sizeof(double) + sizeof(int4), not as
			 * sizeof(Interval), so that any garbage pad bytes in the structure
			 * won't be included in the hash!
			 */
			len = sizeof(intervalptr->time) + sizeof(intervalptr->month);
			break;
			
		case ABSTIMEOID:
			abstime_buf = DatumGetAbsoluteTime(datum);
			
			if (abstime_buf == INVALID_ABSTIME)
			{
				/* hash to a constant value */
				invalidbuf = INVALID_VAL;
				len = sizeof(invalidbuf);
				buf = &invalidbuf;
			}
			else
			{
				len = sizeof(abstime_buf);
				buf = &abstime_buf;
			}
					
			break;

		case RELTIMEOID:
			reltime_buf = DatumGetRelativeTime(datum);
			
			if (reltime_buf == INVALID_RELTIME)
			{
				/* hash to a constant value */
				invalidbuf = INVALID_VAL;
				len = sizeof(invalidbuf);
				buf = &invalidbuf;
			}
			else
			{
				len = sizeof(reltime_buf);
				buf = &reltime_buf;
			}
				
			break;
			
		case TINTERVALOID:
			tinterval = DatumGetTimeInterval(datum);
			
			/*
			 * check if a valid interval. the '0' status code
			 * stands for T_INTERVAL_INVAL which is defined in
			 * nabstime.c. We use the actual value instead
			 * of defining it again here.
			 */
			if(tinterval->status == 0 ||
			   tinterval->data[0] == INVALID_ABSTIME ||
			   tinterval->data[1] == INVALID_ABSTIME)
			{
				/* hash to a constant value */
				invalidbuf = INVALID_VAL;
				len = sizeof(invalidbuf);
				buf = &invalidbuf;				
			}
			else
			{
				/* normalize on length of the time interval */
				tinterval_len = tinterval->data[1] -  tinterval->data[0];
				len = sizeof(tinterval_len);
				buf = &tinterval_len;	
			}

			break;
			
		/*
		 * ======= NETWORK TYPES ========
		 */
		case INETOID:
		case CIDROID:
			
			inetptr = DatumGetInetP(datum);
			len = inet_getkey(inetptr, inet_hkey, sizeof(inet_hkey)); /* fill-in inet_key & get len */
			buf = inet_hkey;
			break;
		
		case MACADDROID:
			
			macptr = DatumGetMacaddrP(datum);
			len = sizeof(macaddr);
			buf = (unsigned char *) macptr;
			break;
			
		/*
		 * ======== BIT STRINGS ========
		 */
		case BITOID:
		case VARBITOID:
			
			/*
			 * Note that these are essentially strings.
			 * we don't need to worry about '10' and '010'
			 * to compare, b/c they will not, by design.
			 * (see SQL standard, and varbit.c)
			 */
			vbitptr = DatumGetVarBitP(datum);
			len = VARBITBYTES(vbitptr);
			buf = (char *) VARBITS(vbitptr);
			break;

		/*
		 * ======= other types =======
		 */
		case BOOLOID:			/* boolean, 'true'/'false' */
			bool_buf = DatumGetBool(datum);
			buf = &bool_buf;
			len = sizeof(bool_buf);
			break;
			
		/*
		 * We prepare the hash key for aclitems just like postgresql does.
		 * (see code and comment in acl.c: hash_aclitem() ).
		 */
		case ACLITEMOID:
			aclitem_ptr = DatumGetAclItemP(datum);
			aclitem_buf = (uint32) (aclitem_ptr->ai_privs + aclitem_ptr->ai_grantee + aclitem_ptr->ai_grantor);
			buf = &aclitem_buf;
			len = sizeof(aclitem_buf);
			break;
			
		/*
		 * ANYARRAY is a pseudo-type. We use it to include
		 * any of the array types (OIDs 1007-1033 in pg_type.h).
		 * caller needs to be sure the type is ANYARRAYOID
		 * before calling cdbhash on an array (INSERT and COPY do so).
		 */
		case ANYARRAYOID:	
					
			arrbuf = DatumGetArrayTypeP(datum);
			len = VARSIZE(arrbuf) - VARHDRSZ;
			buf = VARDATA(arrbuf);
			break;
			
		case INT2VECTOROID:
			i2vec_buf = (int2vector *) DatumGetPointer(datum);
			len = i2vec_buf->dim1 * sizeof(int2);
			buf = (void *)i2vec_buf->values;
			break;
			
		case OIDVECTOROID:	
			oidvec_buf = (oidvector *) DatumGetPointer(datum);
			len = oidvec_buf->dim1 * sizeof(Oid);
			buf = oidvec_buf->values;
			break;
			
		case CASHOID: /* cash is stored in int32 internally */
			cash_buf = (* (Cash *)DatumGetPointer(datum));
			len = sizeof(Cash);
			buf = &cash_buf;
			break;
				
		default:
			ereport(ERROR,
					(errcode(ERRCODE_CDB_FEATURE_NOT_YET),
					 errmsg("Type %u is not hashable.", type)));

	}							/* switch(type) */

	/* do the hash using the selected algorithm */
	hashFn(clientData, buf, len);
	if(tofree)
		pfree(tofree);
}