/* ** Write the current contents of the in-memory linked-list to a PMA. Return ** SQLITE_OK if successful, or an SQLite error code otherwise. ** ** The format of a PMA is: ** ** * A varint. This varint contains the total number of bytes of content ** in the PMA (not including the varint itself). ** ** * One or more records packed end-to-end in order of ascending keys. ** Each record consists of a varint followed by a blob of data (the ** key). The varint is the number of bytes in the blob of data. */ static int vdbeSorterListToPMA(sqlite3 *db, VdbeCursor *pCsr){ int rc = SQLITE_OK; /* Return code */ VdbeSorter *pSorter = pCsr->pSorter; if( pSorter->nInMemory==0 ){ assert( pSorter->pRecord==0 ); return rc; } rc = vdbeSorterSort(pCsr); /* If the first temporary PMA file has not been opened, open it now. */ if( rc==SQLITE_OK && pSorter->pTemp1==0 ){ rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1); assert( rc!=SQLITE_OK || pSorter->pTemp1 ); assert( pSorter->iWriteOff==0 ); assert( pSorter->nPMA==0 ); } if( rc==SQLITE_OK ){ i64 iOff = pSorter->iWriteOff; SorterRecord *p; SorterRecord *pNext = 0; static const char eightZeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; pSorter->nPMA++; rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nInMemory, &iOff); for(p=pSorter->pRecord; rc==SQLITE_OK && p; p=pNext){ pNext = p->pNext; rc = vdbeSorterWriteVarint(pSorter->pTemp1, p->nVal, &iOff); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pSorter->pTemp1, p->pVal, p->nVal, iOff); iOff += p->nVal; } sqlite3DbFree(db, p); } /* This assert verifies that unless an error has occurred, the size of ** the PMA on disk is the same as the expected size stored in ** pSorter->nInMemory. */ assert( rc!=SQLITE_OK || pSorter->nInMemory==( iOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nInMemory) )); pSorter->iWriteOff = iOff; if( rc==SQLITE_OK ){ /* Terminate each file with 8 extra bytes so that from any offset ** in the file we can always read 9 bytes without a SHORT_READ error */ rc = sqlite3OsWrite(pSorter->pTemp1, eightZeros, 8, iOff); } pSorter->pRecord = p; } return rc; }
/* ** Write the current contents of the in-memory linked-list to a PMA. Return ** SQLITE_OK if successful, or an SQLite error code otherwise. ** ** The format of a PMA is: ** ** * A varint. This varint contains the total number of bytes of content ** in the PMA (not including the varint itself). ** ** * One or more records packed end-to-end in order of ascending keys. ** Each record consists of a varint followed by a blob of data (the ** key). The varint is the number of bytes in the blob of data. */ static int vdbeSorterListToPMA(sqlite3 *db, const VdbeCursor *pCsr){ int rc = SQLITE_OK; /* Return code */ VdbeSorter *pSorter = pCsr->pSorter; FileWriter writer; memset(&writer, 0, sizeof(FileWriter)); if( pSorter->nInMemory==0 ){ assert( pSorter->pRecord==0 ); return rc; } rc = vdbeSorterSort(pCsr); /* If the first temporary PMA file has not been opened, open it now. */ if( rc==SQLITE_OK && pSorter->pTemp1==0 ){ rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1); assert( rc!=SQLITE_OK || pSorter->pTemp1 ); assert( pSorter->iWriteOff==0 ); assert( pSorter->nPMA==0 ); } if( rc==SQLITE_OK ){ SorterRecord *p; SorterRecord *pNext = 0; fileWriterInit(db, pSorter->pTemp1, &writer, pSorter->iWriteOff); pSorter->nPMA++; fileWriterWriteVarint(&writer, pSorter->nInMemory); for(p=pSorter->pRecord; p; p=pNext){ pNext = p->pNext; fileWriterWriteVarint(&writer, p->nVal); fileWriterWrite(&writer, p->pVal, p->nVal); sqlite3DbFree(db, p); } pSorter->pRecord = p; rc = fileWriterFinish(db, &writer, &pSorter->iWriteOff); } return rc; }
/* ** Once the sorter has been populated, this function is called to prepare ** for iterating through its contents in sorted order. */ int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){ VdbeSorter *pSorter = pCsr->pSorter; int rc; /* Return code */ sqlite3_file *pTemp2 = 0; /* Second temp file to use */ i64 iWrite2 = 0; /* Write offset for pTemp2 */ int nIter; /* Number of iterators used */ int nByte; /* Bytes of space required for aIter/aTree */ int N = 2; /* Power of 2 >= nIter */ assert( pSorter ); /* If no data has been written to disk, then do not do so now. Instead, ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly ** from the in-memory list. */ if( pSorter->nPMA==0 ){ *pbEof = !pSorter->pRecord; assert( pSorter->aTree==0 ); return vdbeSorterSort(pCsr); } /* Write the current b-tree to a PMA. Close the b-tree cursor. */ rc = vdbeSorterListToPMA(db, pCsr); if( rc!=SQLITE_OK ) return rc; /* Allocate space for aIter[] and aTree[]. */ nIter = pSorter->nPMA; if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT; assert( nIter>0 ); while( N<nIter ) N += N; nByte = N * (sizeof(int) + sizeof(VdbeSorterIter)); pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte); if( !pSorter->aIter ) return SQLITE_NOMEM; pSorter->aTree = (int *)&pSorter->aIter[N]; pSorter->nTree = N; do { int iNew; /* Index of new, merged, PMA */ for(iNew=0; rc==SQLITE_OK && iNew*SORTER_MAX_MERGE_COUNT<pSorter->nPMA; iNew++ ){ i64 nWrite; /* Number of bytes in new PMA */ /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1, ** initialize an iterator for each of them and break out of the loop. ** These iterators will be incrementally merged as the VDBE layer calls ** sqlite3VdbeSorterNext(). ** ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs, ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs ** are merged into a single PMA that is written to file pTemp2. */ rc = vdbeSorterInitMerge(db, pCsr, &nWrite); assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile ); if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){ break; } /* Open the second temp file, if it is not already open. */ if( pTemp2==0 ){ assert( iWrite2==0 ); rc = vdbeSorterOpenTempFile(db, &pTemp2); } if( rc==SQLITE_OK ){ rc = vdbeSorterWriteVarint(pTemp2, nWrite, &iWrite2); } if( rc==SQLITE_OK ){ int bEof = 0; while( rc==SQLITE_OK && bEof==0 ){ int nToWrite; VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ]; assert( pIter->pFile ); nToWrite = pIter->nKey + sqlite3VarintLen(pIter->nKey); rc = sqlite3OsWrite(pTemp2, pIter->aAlloc, nToWrite, iWrite2); iWrite2 += nToWrite; if( rc==SQLITE_OK ){ rc = sqlite3VdbeSorterNext(db, pCsr, &bEof); } } } } if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){ break; }else{ sqlite3_file *pTmp = pSorter->pTemp1; pSorter->nPMA = iNew; pSorter->pTemp1 = pTemp2; pTemp2 = pTmp; pSorter->iWriteOff = iWrite2; pSorter->iReadOff = 0; iWrite2 = 0; } }while( rc==SQLITE_OK ); if( pTemp2 ){ sqlite3OsCloseFree(pTemp2); } *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); return rc; }