/* Decode a date type */ static int decode_date(const char *buffer, unsigned int buff_size, unsigned int *out_size) { const char *new_buffer = (const char *) TYPEALIGN(sizeof(int32), (uintptr_t) buffer); unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer); int32 jd, year, month, day; if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < sizeof(int32)) return -2; *out_size = sizeof(int32) + delta; jd = *(int32 *) buffer + POSTGRES_EPOCH_JDATE; j2date(jd, &year, &month, &day); CopyAppendFmt("%04d-%02d-%02d%s", (year <= 0) ? -year + 1 : year, month, day, (year <= 0) ? " BC" : ""); return 0; }
/* Decode a timetz type */ static int decode_timetz(const char *buffer, unsigned int buff_size, unsigned int *out_size) { const char *new_buffer = (const char *) TYPEALIGN(sizeof(int64), (uintptr_t) buffer); unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer); int64 timestamp, timestamp_sec; int32 tz_sec, tz_min; if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < (sizeof(int64) + sizeof(int32))) return -2; timestamp = *(int64 *) buffer; tz_sec = *(int32 *) (buffer + sizeof(int64)); timestamp_sec = timestamp / 1000000; tz_min = -(tz_sec / 60); *out_size = sizeof(int64) + sizeof(int32) + delta; CopyAppendFmt("%02ld:%02ld:%02ld.%06ld%c%02d:%02d", timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, timestamp % 1000000, (tz_min > 0 ? '+' : '-'), abs(tz_min / 60), abs(tz_min % 60)); return 0; }
/* Decode a time type */ static int decode_time(const char *buffer, unsigned int buff_size, unsigned int *out_size) { const char *new_buffer = (const char *) TYPEALIGN(sizeof(int64), (uintptr_t) buffer); unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer); int64 timestamp, timestamp_sec; if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < sizeof(int64)) return -2; timestamp = *(int64 *) buffer; timestamp_sec = timestamp / 1000000; *out_size = sizeof(int64) + delta; CopyAppendFmt("%02ld:%02ld:%02ld.%06ld", timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, timestamp % 1000000); return 0; }
/* Decode an Oid as int type and pass value out. */ static int DecodeOidBinary(const char *buffer, unsigned int buff_size, unsigned int *processed_size, Oid *result) { const char *new_buffer = (const char*)TYPEALIGN(sizeof(Oid), (uintptr_t)buffer); unsigned int delta = (unsigned int)((uintptr_t)new_buffer - (uintptr_t)buffer); if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < sizeof(int32)) return -2; *result = *(Oid *)buffer; *processed_size = sizeof(Oid) + delta; return 0; }
static void prepare_buf(void) { int ops; /* write random data into buffer */ for (ops = 0; ops < XLOG_SEG_SIZE; ops++) full_buf[ops] = random(); buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf); }
static inline int compute_null_bitmap_extra_size(TupleDesc tupdesc, int col_align) { int nbytes = (tupdesc->natts + 7) >> 3; int avail_bytes = (tupdesc->tdhasoid || col_align == 4) ? 0 : 4; Assert(col_align == 4 || col_align == 8); if (nbytes <= avail_bytes) return 0; return TYPEALIGN(col_align, (nbytes - avail_bytes)); }
/* Decode a timestamp type */ static int decode_timestamp(const char *buffer, unsigned int buff_size, unsigned int *out_size) { const char *new_buffer = (const char *) TYPEALIGN(sizeof(int64), (uintptr_t) buffer); unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer); int64 timestamp, timestamp_sec; int32 jd, year, month, day; if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < sizeof(int64)) return -2; *out_size = sizeof(int64) + delta; timestamp = *(int64 *) buffer; jd = timestamp / USECS_PER_DAY; if (jd != 0) timestamp -= jd * USECS_PER_DAY; if (timestamp < INT64CONST(0)) { timestamp += USECS_PER_DAY; jd -= 1; } /* add offset to go from J2000 back to standard Julian date */ jd += POSTGRES_EPOCH_JDATE; j2date(jd, &year, &month, &day); timestamp_sec = timestamp / 1000000; CopyAppendFmt("%04d-%02d-%02d %02ld:%02ld:%02ld.%06ld%s", (year <= 0) ? -year + 1 : year, month, day, timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, timestamp % 1000000, (year <= 0) ? " BC" : ""); return 0; }
/* Decode a float8 type */ static int decode_float8(const char *buffer, unsigned int buff_size, unsigned int *out_size) { const char *new_buffer = (const char *) TYPEALIGN(sizeof(double), (uintptr_t) buffer); unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer); if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < sizeof(double)) return -2; CopyAppendFmt("%.12lf", *(double *) buffer); *out_size = sizeof(double) + delta; return 0; }
/* Decode a bigint type */ static int decode_bigint(const char *buffer, unsigned int buff_size, unsigned int *out_size) { const char *new_buffer = (const char *) TYPEALIGN(sizeof(int64), (uintptr_t) buffer); unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer); if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < sizeof(int64)) return -2; CopyAppendFmt("%ld", *(int64 *) buffer); *out_size = sizeof(int64) + delta; return 0; }
/* Decode a name type (used mostly in catalog tables) */ static int decode_name(const char *buffer, unsigned int buff_size, unsigned int *out_size) { const char *new_buffer = (const char *) TYPEALIGN(sizeof(uint32), (uintptr_t) buffer); unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer); if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < NAMEDATALEN) return -2; CopyAppendEncode(buffer, strnlen(buffer, NAMEDATALEN)); *out_size = NAMEDATALEN + delta; return 0; }
/* Decode a macaddr type */ static int decode_macaddr(const char *buffer, unsigned int buff_size, unsigned int *out_size) { unsigned char macaddr[6]; const char *new_buffer = (const char *) TYPEALIGN(sizeof(int32), (uintptr_t) buffer); unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer); if (buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if (buff_size < sizeof(macaddr)) return -2; memcpy(macaddr, buffer, sizeof(macaddr)); CopyAppendFmt("%02x:%02x:%02x:%02x:%02x:%02x", macaddr[0], macaddr[1], macaddr[2], macaddr[3], macaddr[4], macaddr[5] ); *out_size = sizeof(macaddr) + delta; return 0; }
/* * Serialize a tuple directly into a buffer. * * We're called with at least enough space for a tuple-chunk-header. */ int SerializeTupleDirect(HeapTuple tuple, SerTupInfo * pSerInfo, struct directTransportBuffer *b) { int natts; int dataSize = TUPLE_CHUNK_HEADER_SIZE; TupleDesc tupdesc; AssertArg(tuple != NULL); AssertArg(pSerInfo != NULL); AssertArg(b != NULL); tupdesc = pSerInfo->tupdesc; natts = tupdesc->natts; do { if (natts == 0) { /* TC_EMTPY is just one chunk */ SetChunkType(b->pri, TC_EMPTY); SetChunkDataSize(b->pri, 0); break; } /* easy case */ if (is_heaptuple_memtuple(tuple)) { int tupleSize; int paddedSize; tupleSize = memtuple_get_size((MemTuple)tuple, NULL); paddedSize = TYPEALIGN(TUPLE_CHUNK_ALIGN, tupleSize); if (paddedSize + TUPLE_CHUNK_HEADER_SIZE > b->prilen) return 0; /* will fit. */ memcpy(b->pri + TUPLE_CHUNK_HEADER_SIZE, tuple, tupleSize); memset(b->pri + TUPLE_CHUNK_HEADER_SIZE + tupleSize, 0, paddedSize - tupleSize); dataSize += paddedSize; SetChunkType(b->pri, TC_WHOLE); SetChunkDataSize(b->pri, dataSize - TUPLE_CHUNK_HEADER_SIZE); break; } else { TupSerHeader tsh; unsigned int datalen; unsigned int nullslen; HeapTupleHeader t_data = tuple->t_data; unsigned char *pos; datalen = tuple->t_len - t_data->t_hoff; if (HeapTupleHasNulls(tuple)) nullslen = BITMAPLEN(HeapTupleHeaderGetNatts(t_data)); else nullslen = 0; tsh.tuplen = sizeof(TupSerHeader) + TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen) + TYPEALIGN(TUPLE_CHUNK_ALIGN, datalen); tsh.natts = HeapTupleHeaderGetNatts(t_data); tsh.infomask = t_data->t_infomask; if (dataSize + tsh.tuplen > b->prilen || (tsh.infomask & HEAP_HASEXTERNAL) != 0) return 0; pos = b->pri + TUPLE_CHUNK_HEADER_SIZE; memcpy(pos, (char *)&tsh, sizeof(TupSerHeader)); pos += sizeof(TupSerHeader); if (nullslen) { memcpy(pos, (char *)t_data->t_bits, nullslen); pos += nullslen; memset(pos, 0, TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen) - nullslen); pos += TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen) - nullslen; } memcpy(pos, (char *)t_data + t_data->t_hoff, datalen); pos += datalen; memset(pos, 0, TYPEALIGN(TUPLE_CHUNK_ALIGN, datalen) - datalen); pos += TYPEALIGN(TUPLE_CHUNK_ALIGN, datalen) - datalen; dataSize += tsh.tuplen; SetChunkType(b->pri, TC_WHOLE); SetChunkDataSize(b->pri, dataSize - TUPLE_CHUNK_HEADER_SIZE); break; } /* tuple that we can't handle here (big ?) -- do the older "out-of-line" serialization */ return 0; } while (0); return dataSize; }
static inline void skipPadding(StringInfo serialTup) { serialTup->cursor = TYPEALIGN(TUPLE_CHUNK_ALIGN,serialTup->cursor); }
/* * Convert a HeapTuple into a byte-sequence, and store it directly * into a chunklist for transmission. * * This code is based on the printtup_internal_20() function in printtup.c. */ void SerializeTupleIntoChunks(HeapTuple tuple, SerTupInfo * pSerInfo, TupleChunkList tcList) { TupleChunkListItem tcItem = NULL; MemoryContext oldCtxt; TupleDesc tupdesc; int i, natts; bool fHandled; AssertArg(tcList != NULL); AssertArg(tuple != NULL); AssertArg(pSerInfo != NULL); tupdesc = pSerInfo->tupdesc; natts = tupdesc->natts; /* get ready to go */ tcList->p_first = NULL; tcList->p_last = NULL; tcList->num_chunks = 0; tcList->serialized_data_length = 0; tcList->max_chunk_length = Gp_max_tuple_chunk_size; if (natts == 0) { tcItem = getChunkFromCache(&pSerInfo->chunkCache); if (tcItem == NULL) { ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Could not allocate space for first chunk item in new chunk list."))); } /* TC_EMTPY is just one chunk */ SetChunkType(tcItem->chunk_data, TC_EMPTY); tcItem->chunk_length = TUPLE_CHUNK_HEADER_SIZE; appendChunkToTCList(tcList, tcItem); return; } tcItem = getChunkFromCache(&pSerInfo->chunkCache); if (tcItem == NULL) { ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Could not allocate space for first chunk item in new chunk list."))); } /* assume that we'll take a single chunk */ SetChunkType(tcItem->chunk_data, TC_WHOLE); tcItem->chunk_length = TUPLE_CHUNK_HEADER_SIZE; appendChunkToTCList(tcList, tcItem); AssertState(s_tupSerMemCtxt != NULL); if (is_heaptuple_memtuple(tuple)) { addByteStringToChunkList(tcList, (char *)tuple, memtuple_get_size((MemTuple)tuple, NULL), &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, memtuple_get_size((MemTuple)tuple, NULL)); } else { TupSerHeader tsh; unsigned int datalen; unsigned int nullslen; HeapTupleHeader t_data = tuple->t_data; datalen = tuple->t_len - t_data->t_hoff; if (HeapTupleHasNulls(tuple)) nullslen = BITMAPLEN(HeapTupleHeaderGetNatts(t_data)); else nullslen = 0; tsh.tuplen = sizeof(TupSerHeader) + TYPEALIGN(TUPLE_CHUNK_ALIGN,nullslen) + datalen; tsh.natts = HeapTupleHeaderGetNatts(t_data); tsh.infomask = t_data->t_infomask; addByteStringToChunkList(tcList, (char *)&tsh, sizeof(TupSerHeader), &pSerInfo->chunkCache); /* If we don't have any attributes which have been toasted, we * can be very very simple: just send the raw data. */ if ((tsh.infomask & HEAP_HASEXTERNAL) == 0) { if (nullslen) { addByteStringToChunkList(tcList, (char *)t_data->t_bits, nullslen, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,nullslen); } addByteStringToChunkList(tcList, (char *)t_data + t_data->t_hoff, datalen, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,datalen); } else { /* We have to be more careful when we have tuples that * have been toasted. Ideally we'd like to send the * untoasted attributes in as "raw" a format as possible * but that makes rebuilding the tuple harder . */ oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt); /* deconstruct the tuple (faster than a heap_getattr loop) */ heap_deform_tuple(tuple, tupdesc, pSerInfo->values, pSerInfo->nulls); MemoryContextSwitchTo(oldCtxt); /* Send the nulls character-array. */ addByteStringToChunkList(tcList, pSerInfo->nulls, natts, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,natts); /* * send the attributes of this tuple: NOTE anything which allocates * temporary space (e.g. could result in a PG_DETOAST_DATUM) should be * executed with the memory context set to s_tupSerMemCtxt */ for (i = 0; i < natts; ++i) { SerAttrInfo *attrInfo = pSerInfo->myinfo + i; Datum origattr = pSerInfo->values[i], attr; bytea *outputbytes=0; /* skip null attributes (already taken care of above) */ if (pSerInfo->nulls[i]) continue; /* * If we have a toasted datum, forcibly detoast it here to avoid * memory leakage: we want to force the detoast allocation(s) to * happen in our reset-able serialization context. */ if (attrInfo->typisvarlena) { oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt); /* we want to detoast but leave compressed, if * possible, but we have to handle varlena * attributes (and others ?) differently than we * currently do (first step is to use * heap_tuple_fetch_attr() instead of * PG_DETOAST_DATUM()). */ attr = PointerGetDatum(PG_DETOAST_DATUM(origattr)); MemoryContextSwitchTo(oldCtxt); } else attr = origattr; /* * Assume that the data's output will be handled by the special IO * code, and if not then we can handle it the slow way. */ fHandled = true; switch (attrInfo->atttypid) { case INT4OID: addInt32ToChunkList(tcList, DatumGetInt32(attr), &pSerInfo->chunkCache); break; case CHAROID: addCharToChunkList(tcList, DatumGetChar(attr), &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,1); break; case BPCHAROID: case VARCHAROID: case INT2VECTOROID: /* postgres serialization logic broken, use our own */ case OIDVECTOROID: /* postgres serialization logic broken, use our own */ case ANYARRAYOID: { text *pText = DatumGetTextP(attr); int32 textSize = VARSIZE(pText) - VARHDRSZ; addInt32ToChunkList(tcList, textSize, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, (char *) VARDATA(pText), textSize, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,textSize); break; } case DATEOID: { DateADT date = DatumGetDateADT(attr); addByteStringToChunkList(tcList, (char *) &date, sizeof(DateADT), &pSerInfo->chunkCache); break; } case NUMERICOID: { /* * Treat the numeric as a varlena variable, and just push * the whole shebang to the output-buffer. We don't care * about the guts of the numeric. */ Numeric num = DatumGetNumeric(attr); int32 numSize = VARSIZE(num) - VARHDRSZ; addInt32ToChunkList(tcList, numSize, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, (char *) VARDATA(num), numSize, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,numSize); break; } case ACLITEMOID: { AclItem *aip = DatumGetAclItemP(attr); char *outputstring; int32 aclSize ; outputstring = DatumGetCString(DirectFunctionCall1(aclitemout, PointerGetDatum(aip))); aclSize = strlen(outputstring); addInt32ToChunkList(tcList, aclSize, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, outputstring,aclSize, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,aclSize); break; } case 210: /* storage manager */ { char *smgrstr; int32 strsize; smgrstr = DatumGetCString(DirectFunctionCall1(smgrout, 0)); strsize = strlen(smgrstr); addInt32ToChunkList(tcList, strsize, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, smgrstr, strsize, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,strsize); break; } default: fHandled = false; } if (fHandled) continue; /* * the FunctionCall2 call into the send function may result in some * allocations which we'd like to have contained by our reset-able * context */ oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt); /* Call the attribute type's binary input converter. */ if (attrInfo->send_finfo.fn_nargs == 1) outputbytes = DatumGetByteaP(FunctionCall1(&attrInfo->send_finfo, attr)); else if (attrInfo->send_finfo.fn_nargs == 2) outputbytes = DatumGetByteaP(FunctionCall2(&attrInfo->send_finfo, attr, ObjectIdGetDatum(attrInfo->send_typio_param))); else if (attrInfo->send_finfo.fn_nargs == 3) outputbytes = DatumGetByteaP(FunctionCall3(&attrInfo->send_finfo, attr, ObjectIdGetDatum(attrInfo->send_typio_param), Int32GetDatum(tupdesc->attrs[i]->atttypmod))); else { ereport(ERROR, (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), errmsg("Conversion function takes %d args",attrInfo->recv_finfo.fn_nargs))); } MemoryContextSwitchTo(oldCtxt); /* We assume the result will not have been toasted */ addInt32ToChunkList(tcList, VARSIZE(outputbytes) - VARHDRSZ, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,VARSIZE(outputbytes) - VARHDRSZ); /* * this was allocated in our reset-able context, but we *are* done * with it; and for tuples with several large columns it'd be nice to * free the memory back to the context */ pfree(outputbytes); } MemoryContextReset(s_tupSerMemCtxt); } } /* * if we have more than 1 chunk we have to set the chunk types on our * first chunk and last chunk */ if (tcList->num_chunks > 1) { TupleChunkListItem first, last; first = tcList->p_first; last = tcList->p_last; Assert(first != NULL); Assert(first != last); Assert(last != NULL); SetChunkType(first->chunk_data, TC_PARTIAL_START); SetChunkType(last->chunk_data, TC_PARTIAL_END); /* * any intervening chunks are already set to TC_PARTIAL_MID when * allocated */ } return; }
HeapTuple CvtChunksToHeapTup(TupleChunkList tcList, SerTupInfo * pSerInfo) { StringInfoData serData; TupleChunkListItem tcItem; int i; HeapTuple htup; TupleChunkType tcType; AssertArg(tcList != NULL); AssertArg(tcList->p_first != NULL); AssertArg(pSerInfo != NULL); tcItem = tcList->p_first; if (tcList->num_chunks == 1) { GetChunkType(tcItem, &tcType); if (tcType == TC_EMPTY) { /* * the sender is indicating that there was a row with no attributes: * return a NULL tuple */ clearTCList(NULL, tcList); htup = heap_form_tuple(pSerInfo->tupdesc, pSerInfo->values, pSerInfo->nulls); return htup; } } /* * Dump all of the data in the tuple chunk list into a single StringInfo, * so that we can convert it into a HeapTuple. Check chunk types based on * whether there is only one chunk, or multiple chunks. * * We know roughly how much space we'll need, allocate all in one go. * */ initStringInfoOfSize(&serData, tcList->num_chunks * tcList->max_chunk_length); i = 0; do { /* Make sure that the type of this tuple chunk is correct! */ GetChunkType(tcItem, &tcType); if (i == 0) { if (tcItem->p_next == NULL) { if (tcType != TC_WHOLE) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Single chunk's type must be TC_WHOLE."))); } } else /* tcItem->p_next != NULL */ { if (tcType != TC_PARTIAL_START) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("First chunk of collection must have type" " TC_PARTIAL_START."))); } } } else /* i > 0 */ { if (tcItem->p_next == NULL) { if (tcType != TC_PARTIAL_END) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Last chunk of collection must have type" " TC_PARTIAL_END."))); } } else /* tcItem->p_next != NULL */ { if (tcType != TC_PARTIAL_MID) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("Last chunk of collection must have type" " TC_PARTIAL_MID."))); } } } /* Copy this chunk into the tuple data. Don't include the header! */ appendBinaryStringInfo(&serData, (const char *) GetChunkDataPtr(tcItem) + TUPLE_CHUNK_HEADER_SIZE, tcItem->chunk_length - TUPLE_CHUNK_HEADER_SIZE); /* Go to the next chunk. */ tcItem = tcItem->p_next; i++; } while (tcItem != NULL); /* we've finished with the TCList, free it now. */ clearTCList(NULL, tcList); { TupSerHeader *tshp; unsigned int datalen; unsigned int nullslen; unsigned int hoff; HeapTupleHeader t_data; char *pos = (char *)serData.data; tshp = (TupSerHeader *)pos; if ((tshp->tuplen & MEMTUP_LEAD_BIT) != 0) { uint32 tuplen = memtuple_size_from_uint32(tshp->tuplen); htup = (HeapTuple) palloc(tuplen); memcpy(htup, pos, tuplen); pos += TYPEALIGN(TUPLE_CHUNK_ALIGN,tuplen); } else { pos += sizeof(TupSerHeader); /* if the tuple had toasted elements we have to deserialize * the old slow way. */ if ((tshp->infomask & HEAP_HASEXTERNAL) != 0) { serData.cursor += sizeof(TupSerHeader); htup = DeserializeTuple(pSerInfo, &serData); /* Free up memory we used. */ pfree(serData.data); return htup; } /* reconstruct lengths of null bitmap and data part */ if (tshp->infomask & HEAP_HASNULL) nullslen = BITMAPLEN(tshp->natts); else nullslen = 0; if (tshp->tuplen < sizeof(TupSerHeader) + nullslen) ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Interconnect error: cannot convert chunks to a heap tuple."), errdetail("tuple len %d < nullslen %d + headersize (%d)", tshp->tuplen, nullslen, (int)sizeof(TupSerHeader)))); datalen = tshp->tuplen - sizeof(TupSerHeader) - TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen); /* determine overhead size of tuple (should match heap_form_tuple) */ hoff = offsetof(HeapTupleHeaderData, t_bits) + TYPEALIGN(TUPLE_CHUNK_ALIGN, nullslen); if (tshp->infomask & HEAP_HASOID) hoff += sizeof(Oid); hoff = MAXALIGN(hoff); /* Allocate the space in one chunk, like heap_form_tuple */ htup = (HeapTuple)palloc(HEAPTUPLESIZE + hoff + datalen); t_data = (HeapTupleHeader) ((char *)htup + HEAPTUPLESIZE); /* make sure unused header fields are zeroed */ MemSetAligned(t_data, 0, hoff); /* reconstruct the HeapTupleData fields */ htup->t_len = hoff + datalen; ItemPointerSetInvalid(&(htup->t_self)); htup->t_data = t_data; /* reconstruct the HeapTupleHeaderData fields */ ItemPointerSetInvalid(&(t_data->t_ctid)); HeapTupleHeaderSetNatts(t_data, tshp->natts); t_data->t_infomask = tshp->infomask & ~HEAP_XACT_MASK; t_data->t_infomask |= HEAP_XMIN_INVALID | HEAP_XMAX_INVALID; t_data->t_hoff = hoff; if (nullslen) { memcpy((void *)t_data->t_bits, pos, nullslen); pos += TYPEALIGN(TUPLE_CHUNK_ALIGN,nullslen); } /* does the tuple descriptor expect an OID ? Note: we don't * have to set the oid itself, just the flag! (see heap_formtuple()) */ if (pSerInfo->tupdesc->tdhasoid) /* else leave infomask = 0 */ { t_data->t_infomask |= HEAP_HASOID; } /* and now the data proper (it would be nice if we could just * point our caller into our existing buffer in-place, but * we'll leave that for another day) */ memcpy((char *)t_data + hoff, pos, datalen); } } /* Free up memory we used. */ pfree(serData.data); return htup; }
int main(int argc, char *argv[]) { struct timeval start_t; struct timeval elapse_t; int tmpfile, i, loops = 1000; char *full_buf = (char *) malloc(XLOG_SEG_SIZE), *buf; char *filename = FSYNC_FILENAME; if (argc > 2 && strcmp(argv[1], "-f") == 0) { filename = argv[2]; argv += 2; argc -= 2; } if (argc > 1) loops = atoi(argv[1]); for (i = 0; i < XLOG_SEG_SIZE; i++) full_buf[i] = 'a'; if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR, 0)) == -1) die("Cannot open output file."); if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) die("write failed"); /* fsync so later fsync's don't have to do it */ if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); buf = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, full_buf); printf("Simple write timing:\n"); /* write only */ gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); close(tmpfile); } gettimeofday(&elapse_t, NULL); printf("\twrite "); print_elapse(start_t, elapse_t); printf("\n"); printf("\nCompare fsync times on write() and non-write() descriptor:\n"); printf("(If the times are similar, fsync() can sync data written\n on a different descriptor.)\n"); /* write, fsync, close */ gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); /* do nothing but the open/close the tests are consistent. */ close(tmpfile); } gettimeofday(&elapse_t, NULL); printf("\twrite, fsync, close "); print_elapse(start_t, elapse_t); printf("\n"); /* write, close, fsync */ gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); close(tmpfile); /* reopen file */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); } gettimeofday(&elapse_t, NULL); printf("\twrite, close, fsync "); print_elapse(start_t, elapse_t); printf("\n"); printf("\nCompare one o_sync write to two:\n"); #ifdef OPEN_SYNC_FLAG /* 16k o_sync write */ if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\tone 16k o_sync write "); print_elapse(start_t, elapse_t); printf("\n"); /* 2*8k o_sync writes */ if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); } gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\ttwo 8k o_sync writes "); print_elapse(start_t, elapse_t); printf("\n"); printf("\nCompare file sync methods with one 8k write:\n"); #else printf("\t(o_sync unavailable) "); #endif printf("\n"); #ifdef OPEN_DATASYNC_FLAG /* open_dsync, write */ if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\topen o_dsync, write "); print_elapse(start_t, elapse_t); printf("\n"); #ifdef OPEN_SYNC_FLAG /* open_fsync, write */ if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\topen o_sync, write "); print_elapse(start_t, elapse_t); #endif #else printf("\t(o_dsync unavailable) "); #endif printf("\n"); #ifdef HAVE_FDATASYNC /* write, fdatasync */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); fdatasync(tmpfile); } gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\twrite, fdatasync "); print_elapse(start_t, elapse_t); #else printf("\t(fdatasync unavailable)"); #endif printf("\n"); /* write, fsync, close */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); } gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\twrite, fsync, "); print_elapse(start_t, elapse_t); printf("\n"); printf("\nCompare file sync methods with 2 8k writes:\n"); #ifdef OPEN_DATASYNC_FLAG /* open_dsync, write */ if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); } gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\topen o_dsync, write "); print_elapse(start_t, elapse_t); #else printf("\t(o_dsync unavailable) "); #endif printf("\n"); #ifdef OPEN_SYNC_FLAG /* open_fsync, write */ if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); } gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\topen o_sync, write "); print_elapse(start_t, elapse_t); printf("\n"); #endif #ifdef HAVE_FDATASYNC /* write, fdatasync */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); fdatasync(tmpfile); } gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\twrite, fdatasync "); print_elapse(start_t, elapse_t); #else printf("\t(fdatasync unavailable)"); #endif printf("\n"); /* write, fsync, close */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE / 2) != WRITE_SIZE / 2) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); } gettimeofday(&elapse_t, NULL); close(tmpfile); printf("\twrite, fsync, "); print_elapse(start_t, elapse_t); printf("\n"); free(full_buf); unlink(filename); return 0; }
/* * Convert a HeapTuple into a byte-sequence, and store it directly * into a chunklist for transmission. * * This code is based on the printtup_internal_20() function in printtup.c. */ void SerializeTupleIntoChunks(GenericTuple gtuple, SerTupInfo *pSerInfo, TupleChunkList tcList) { TupleChunkListItem tcItem = NULL; MemoryContext oldCtxt; TupleDesc tupdesc; int i, natts; AssertArg(tcList != NULL); AssertArg(gtuple != NULL); AssertArg(pSerInfo != NULL); tupdesc = pSerInfo->tupdesc; natts = tupdesc->natts; /* get ready to go */ tcList->p_first = NULL; tcList->p_last = NULL; tcList->num_chunks = 0; tcList->serialized_data_length = 0; tcList->max_chunk_length = Gp_max_tuple_chunk_size; if (natts == 0) { tcItem = getChunkFromCache(&pSerInfo->chunkCache); if (tcItem == NULL) { ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Could not allocate space for first chunk item in new chunk list."))); } /* TC_EMTPY is just one chunk */ SetChunkType(tcItem->chunk_data, TC_EMPTY); tcItem->chunk_length = TUPLE_CHUNK_HEADER_SIZE; appendChunkToTCList(tcList, tcItem); return; } tcItem = getChunkFromCache(&pSerInfo->chunkCache); if (tcItem == NULL) { ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Could not allocate space for first chunk item in new chunk list."))); } /* assume that we'll take a single chunk */ SetChunkType(tcItem->chunk_data, TC_WHOLE); tcItem->chunk_length = TUPLE_CHUNK_HEADER_SIZE; appendChunkToTCList(tcList, tcItem); AssertState(s_tupSerMemCtxt != NULL); if (is_memtuple(gtuple)) { MemTuple mtuple = (MemTuple) gtuple; addByteStringToChunkList(tcList, (char *) mtuple, memtuple_get_size(mtuple), &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, memtuple_get_size(mtuple)); } else { HeapTuple tuple = (HeapTuple) gtuple; HeapTupleHeader t_data = tuple->t_data; TupSerHeader tsh; unsigned int datalen; unsigned int nullslen; datalen = tuple->t_len - t_data->t_hoff; if (HeapTupleHasNulls(tuple)) nullslen = BITMAPLEN(HeapTupleHeaderGetNatts(t_data)); else nullslen = 0; tsh.tuplen = sizeof(TupSerHeader) + TYPEALIGN(TUPLE_CHUNK_ALIGN,nullslen) + datalen; tsh.natts = HeapTupleHeaderGetNatts(t_data); tsh.infomask = t_data->t_infomask; addByteStringToChunkList(tcList, (char *)&tsh, sizeof(TupSerHeader), &pSerInfo->chunkCache); /* If we don't have any attributes which have been toasted, we * can be very very simple: just send the raw data. */ if ((tsh.infomask & HEAP_HASEXTERNAL) == 0) { if (nullslen) { addByteStringToChunkList(tcList, (char *)t_data->t_bits, nullslen, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,nullslen); } addByteStringToChunkList(tcList, (char *)t_data + t_data->t_hoff, datalen, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,datalen); } else { /* We have to be more careful when we have tuples that * have been toasted. Ideally we'd like to send the * untoasted attributes in as "raw" a format as possible * but that makes rebuilding the tuple harder . */ oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt); /* deconstruct the tuple (faster than a heap_getattr loop) */ heap_deform_tuple(tuple, tupdesc, pSerInfo->values, pSerInfo->nulls); MemoryContextSwitchTo(oldCtxt); /* Send the nulls character-array. */ addByteStringToChunkList(tcList, pSerInfo->nulls, natts, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,natts); /* * send the attributes of this tuple: NOTE anything which allocates * temporary space (e.g. could result in a PG_DETOAST_DATUM) should be * executed with the memory context set to s_tupSerMemCtxt */ for (i = 0; i < natts; ++i) { SerAttrInfo *attrInfo = pSerInfo->myinfo + i; Datum origattr = pSerInfo->values[i], attr; /* skip null attributes (already taken care of above) */ if (pSerInfo->nulls[i]) continue; if (attrInfo->typlen == -1) { int32 sz; char *data; /* * If we have a toasted datum, forcibly detoast it here to avoid * memory leakage: we want to force the detoast allocation(s) to * happen in our reset-able serialization context. */ oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt); attr = PointerGetDatum(PG_DETOAST_DATUM_PACKED(origattr)); MemoryContextSwitchTo(oldCtxt); sz = VARSIZE_ANY_EXHDR(attr); data = VARDATA_ANY(attr); /* Send length first, then data */ addInt32ToChunkList(tcList, sz, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, data, sz, &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, sz); } else if (attrInfo->typlen == -2) { int32 sz; char *data; /* CString, we would send the string with the terminating '\0' */ data = DatumGetCString(origattr); sz = strlen(data) + 1; /* Send length first, then data */ addInt32ToChunkList(tcList, sz, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, data, sz, &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, sz); } else if (attrInfo->typbyval) { /* * We send a full-width Datum for all pass-by-value types, regardless of * the actual size. */ addByteStringToChunkList(tcList, (char *) &origattr, sizeof(Datum), &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, sizeof(Datum)); } else { addByteStringToChunkList(tcList, DatumGetPointer(origattr), attrInfo->typlen, &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, attrInfo->typlen); attr = origattr; } } MemoryContextReset(s_tupSerMemCtxt); } } /* * if we have more than 1 chunk we have to set the chunk types on our * first chunk and last chunk */ if (tcList->num_chunks > 1) { TupleChunkListItem first, last; first = tcList->p_first; last = tcList->p_last; Assert(first != NULL); Assert(first != last); Assert(last != NULL); SetChunkType(first->chunk_data, TC_PARTIAL_START); SetChunkType(last->chunk_data, TC_PARTIAL_END); /* * any intervening chunks are already set to TC_PARTIAL_MID when * allocated */ } return; }
int main(int argc, char *argv[]) { struct timeval start_t; struct timeval stop_t; int tmpfile, i; char *full_buf = (char *) malloc(XLOG_SEG_SIZE), *buf; char *filename = FSYNC_FILENAME; if (argc > 2 && strcmp(argv[1], "-f") == 0) { filename = argv[2]; argv += 2; argc -= 2; } if (argc > 1) loops = atoi(argv[1]); for (i = 0; i < XLOG_SEG_SIZE; i++) full_buf[i] = random(); if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR, 0)) == -1) die("Cannot open output file."); if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) die("write failed"); /* fsync now so later fsync's don't have to do it */ if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); buf = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, full_buf); printf("Loops = %d\n\n", loops); /* * Simple write */ printf("Simple write:\n"); printf(LABEL_FORMAT, "8k write"); fflush(stdout); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); close(tmpfile); } gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); /* * Compare file sync methods with one 8k write */ printf("\nCompare file sync methods using one write:\n"); #ifdef OPEN_DATASYNC_FLAG printf(LABEL_FORMAT, "open_datasync 8k write"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); #else printf("\t(unavailable: open_datasync)\n"); #endif #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "open_sync 8k write"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); #else printf("\t(unavailable: open_sync)\n"); #endif #ifdef HAVE_FDATASYNC printf(LABEL_FORMAT, "8k write, fdatasync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); fdatasync(tmpfile); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); #else printf("\t(unavailable: fdatasync)\n"); #endif printf(LABEL_FORMAT, "8k write, fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); /* * Compare file sync methods with two 8k write */ printf("\nCompare file sync methods using two writes:\n"); #ifdef OPEN_DATASYNC_FLAG printf(LABEL_FORMAT, "2 open_datasync 8k writes"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); #else printf("\t(unavailable: open_datasync)\n"); #endif #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "2 open_sync 8k writes"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); #endif #ifdef HAVE_FDATASYNC printf(LABEL_FORMAT, "8k write, 8k write, fdatasync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); fdatasync(tmpfile); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); #else printf("\t(unavailable: fdatasync)\n"); #endif printf(LABEL_FORMAT, "8k write, 8k write, fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); /* * Compare 1 to 2 writes */ printf("\nCompare open_sync with different sizes:\n"); #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "open_sync 16k write"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE * 2) != WRITE_SIZE * 2) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); printf(LABEL_FORMAT, "2 open_sync 8k writes"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1) die("Cannot open output file."); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); #else printf("\t(unavailable: open_sync)\n"); #endif /* * Fsync another file descriptor? */ printf("\nTest if fsync on non-write file descriptor is honored:\n"); printf("(If the times are similar, fsync() can sync data written\n"); printf("on a different descriptor.)\n"); printf(LABEL_FORMAT, "8k write, fsync, close"); fflush(stdout); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); /* do nothing but the open/close the tests are consistent. */ close(tmpfile); } gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); printf(LABEL_FORMAT, "8k write, close, fsync"); fflush(stdout); gettimeofday(&start_t, NULL); for (i = 0; i < loops; i++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) die("write failed"); close(tmpfile); /* reopen file */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); } gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); /* cleanup */ free(full_buf); unlink(filename); return 0; }