Example #1
0
void insert_bigfile(chidb *db, int i)
{
  int rc;
  int datalen = ((bigfile_pkeys[i] % 3) + 1) * 64;
  uint8_t buf[192];
  
  for (int j=0; j<48; j++)
    put4byte(buf + (4*j), bigfile_ikeys[i]);
  
  rc = chidb_Btree_insertInTable(db->bt, 1, bigfile_pkeys[i], buf, datalen);
  CU_ASSERT(rc == CHIDB_OK);
  
}
Example #2
0
void test_bigfile(chidb *db)
{
  int rc;
  
  for (int i=0; i<bigfile_nvalues; i++) {
    uint8_t* buf;
    uint16_t size;
    uint8_t data[192];
    int datalen = ((bigfile_pkeys[i] % 3) + 1) * 64;
    
    for (int j=0; j<48; j++)
      put4byte(data + (4*j), bigfile_ikeys[i]);
    
    rc = chidb_Btree_find(db->bt, 1, bigfile_pkeys[i], &buf, &size);
    CU_ASSERT(rc == CHIDB_OK);
    CU_ASSERT(size == datalen);
    CU_ASSERT(!memcmp(buf, data, datalen));
  }
}
Example #3
0
void test_index_bigfile(chidb *db, npage_t index_nroot)
{
  int rc;
  for (int i=0; i<bigfile_nvalues; i++) {
    uint8_t* buf;
    uint16_t size;
    uint8_t data[192];
    key_t pkey;
    
    rc = chidb_Btree_findInIndex(db->bt, index_nroot, bigfile_ikeys[i], &pkey);
    CU_ASSERT(rc == CHIDB_OK);
    
    int datalen = ((pkey % 3) + 1) * 64;
    
    for(int j=0; j<48; j++)
      put4byte(data + (4*j), bigfile_ikeys[i]);
    
    rc = chidb_Btree_find(db->bt, 1, pkey, &buf, &size);
    CU_ASSERT(rc == CHIDB_OK);
    CU_ASSERT(size == datalen);
    CU_ASSERT(!memcmp(buf, data, datalen));
  }
}
Example #4
0
// return number of bytes written
static int wal_iterate(Wal *pWal, iterate_resource *iter, u8 *buf, int bufsize, u8 *hdr, u32 *done)
{
	// db_thread* const thr = enif_tsd_get(g_tsd_thread);
	#if ATOMIC
	db_thread *thr 		 = g_tsd_thread;
	#else
	db_thread* thr = enif_tsd_get(g_tsd_thread);
	#endif
	mdbinf* const mdb 	 = &thr->mdb;
	u32 mxPage;
	u64 readSafeEvnum, readSafeTerm;
	
	#ifndef _TESTAPP_
	enif_mutex_lock(pWal->mtx);
	#endif
	readSafeEvnum = pWal->lastCompleteEvnum;
	readSafeTerm = pWal->lastCompleteTerm;
	mxPage = pWal->mxPage;
	#ifndef _TESTAPP_
	enif_mutex_unlock(pWal->mtx);
	#endif

	if (!iter->started)
	{
		if (iter->evnum + iter->evterm == 0)
		{
			// If any writes come after iterator started, we must ignore those pages.
			iter->evnum = readSafeEvnum;
			iter->evterm = readSafeTerm;
			iter->pgnoPos = 1;
			iter->entiredb = 1;
			iter->mxPage = mxPage;
			if (pWal->mxPage == 0)
			{
				DBG("ERROR: Iterate on empty DB %llu",pWal->lastCompleteEvnum);
				*done = 1;
				return 0;
			}
		}
		else
		{
			// set mxPage to highest pgno we find.
			iter->pgnoPos = iter->mxPage = 0;
			DBG("Iterate rsterm=%llu rsevnum=%llu",readSafeTerm,readSafeEvnum);
		}
		iter->started = 1;
	}

	// send entire db (without history)
	if (iter->entiredb)
	{
		u32 iRead = 0;
		findframe(thr, pWal, iter->pgnoPos, &iRead, iter->evterm, iter->evnum, NULL, NULL);

		if (!iRead)
		{
			DBG("Iterate did not find page");
			*done = iter->mxPage;
			return 0;
		}
		DBG("Iter pos=%u, mx=%u, safemx=%u",iter->pgnoPos, iter->mxPage, mxPage);
		if (iter->pgnoPos == iter->mxPage)
			*done = iter->mxPage;
		put8byte(hdr,                           iter->evterm);
		put8byte(hdr+sizeof(u64),               iter->evnum);
		put4byte(hdr+sizeof(u64)*2,             iter->pgnoPos);
		put4byte(hdr+sizeof(u64)*2+sizeof(u32), *done);
		iter->pgnoPos++;
		return fillbuff(thr, pWal, iter, buf, bufsize);
	}
	else
	{
		MDB_val logKey, logVal;
		int logop;
		u8 logKeyBuf[sizeof(u64)*3];
		int rc;
		// ** - Log DB: {<<ActorIndex:64, Evterm:64, Evnum:64>>, <<Pgno:32/unsigned>>}

		memcpy(logKeyBuf,                 &pWal->index,  sizeof(u64));
		memcpy(logKeyBuf + sizeof(u64),   &iter->evterm, sizeof(u64));
		memcpy(logKeyBuf + sizeof(u64)*2, &iter->evnum, sizeof(u64));
		logKey.mv_data = logKeyBuf;
		logKey.mv_size = sizeof(logKeyBuf);
		DBG("iterate looking for, matchterm=%llu matchevnum=%llu",iter->evterm,iter->evnum);
		if (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_SET) != MDB_SUCCESS)
		{
			// Evterm/evnum combination not found. Check if evnum is there.
			// If so return evterm. It will mean a node is in conflict.
			DBG("Key not found in log");
			if (readSafeEvnum == iter->evnum)
			{
				iter->evterm = readSafeTerm;
				iter->termMismatch = 1;
			}
			else
			{
				memcpy(logKeyBuf,                 &pWal->index,  sizeof(u64));
				memcpy(logKeyBuf + sizeof(u64),   &readSafeTerm, sizeof(u64));
				memcpy(logKeyBuf + sizeof(u64)*2, &readSafeEvnum,sizeof(u64));
				if (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_SET) != MDB_SUCCESS)
				{
					DBG("Key not found in log for undo");
					*done = 1;
					return 0;
				}
				while (mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,MDB_PREV_NODUP) == MDB_SUCCESS)
				{
					u64 aindex, term, evnum;

					mdb_cursor_get(mdb->cursorLog,&logKey, &logVal, MDB_GET_CURRENT);
					memcpy(&aindex, logKey.mv_data,              sizeof(u64));
					memcpy(&term,   (u8*)logKey.mv_data+sizeof(u64),  sizeof(u64));
					memcpy(&evnum,  (u8*)logKey.mv_data+sizeof(u64)*2,sizeof(u64));

					DBG("Iterate on term=%llu, evnum=%llu, looking for=%llu",term,evnum,iter->evnum);

					if (aindex != pWal->index)
						break;
					if (iter->evnum == evnum)
					{
						iter->evterm = term;
						iter->termMismatch = 1;
						break;
					}
				}
			}

			*done = 1;
			return 0;
		}

		// We start iterate from next evnum not current. Input evterm/evnum is match_index and match_term.
		// It needs next.
		if (iter->started == 1 &&
			(rc = mdb_cursor_get(mdb->cursorLog,&logKey, &logVal, MDB_NEXT_NODUP)) != MDB_SUCCESS)
		{
			*done = 1;
			return 0;
		}
		else
		{
			u64 aindex;

			rc = mdb_cursor_get(mdb->cursorLog,&logKey, &logVal, MDB_GET_CURRENT);
			if (rc != MDB_SUCCESS)
			{
				*done = 1;
				return 0;
			}
			memcpy(&aindex,       (u8*)logKey.mv_data,              sizeof(u64));
			memcpy(&iter->evterm, (u8*)logKey.mv_data+sizeof(u64),  sizeof(u64));
			memcpy(&iter->evnum,  (u8*)logKey.mv_data+sizeof(u64)*2,sizeof(u64));
			if (aindex != pWal->index)
			{
				*done = 1;
				return 0;
			}
			// To keep from moving iter->evterm/iter->evnum forward more than once.
			iter->started = 2;
		}

		logop = MDB_FIRST_DUP;
		while ((rc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,logop)) == MDB_SUCCESS)
		{
			u64 evnum,evterm;
			u32 pgno;
			u32 iRead;

			logop = MDB_NEXT_DUP;

			mdb_cursor_get(mdb->cursorLog,&logKey, &logVal, MDB_GET_CURRENT);
			memcpy(&pgno,logVal.mv_data,sizeof(u32));

			DBG("iterate at pgno=%u, pgnopos=%u",pgno,iter->pgnoPos);

			if (pgno <= iter->pgnoPos)
				continue;

			findframe(thr, pWal, pgno, &iRead, iter->evterm, iter->evnum, &evterm, &evnum);

			if (iRead == 0)
			{
				DBG("ERROR: Did not find frame for pgno=%u, evterm=%llu, evnum=%llu",
					pgno, iter->evterm, iter->evnum);
				*done = 1;
				return 0;
			}

			if (evterm != iter->evterm || evnum != iter->evnum)
			{
				DBG("ERROR: Evterm/evnum does not match,looking for: evterm=%llu, evnum=%llu, "
				"got: evterm=%llu, evnum=%llu", iter->evterm, iter->evnum, evterm, evnum);
				*done = 1;
				return 0;
			}

			iter->pgnoPos = pgno;
			if ((rc = mdb_cursor_get(mdb->cursorLog,&logKey,&logVal,logop)) == MDB_SUCCESS)
				*done = 0;
			else
				*done = iter->pgnoPos;
			DBG( "logcursor get next %d, done=%u",rc,*done);
			put8byte(hdr,                           iter->evterm);
			put8byte(hdr+sizeof(u64),               iter->evnum);
			put4byte(hdr+sizeof(u64)*2,             iter->pgnoPos);
			put4byte(hdr+sizeof(u64)*2+sizeof(u32), *done);
			return fillbuff(thr, pWal, iter, buf, bufsize);
		}
		*done = 1;
		return 0;
	}
}
Example #5
0
/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int szPage, PgHdr *pList, Pgno nTruncate, int isCommit, int sync_flags)
{
	PgHdr *p;
	MDB_val key, data;
	int rc;
	mdbinf* mdb;
	MDB_txn* txn;
	#if ATOMIC
	db_thread *thr      = g_tsd_thread;
	db_connection* pCon	= g_tsd_conn;
	#else
	db_thread* thr = enif_tsd_get(g_tsd_thread);
	db_connection* pCon = enif_tsd_get(g_tsd_conn);
	#endif

	#if ATOMIC
	if (!g_tsd_wmdb)
		lock_wtxn(thr->nEnv);
	mdb = g_tsd_wmdb;
	#else
	mdb = enif_tsd_get(g_tsd_wmdb);
	if (!mdb)
		lock_wtxn(thr->nEnv);
	mdb = enif_tsd_get(g_tsd_wmdb);
	#endif
	txn = mdb->txn;

	if (!mdb)
		return SQLITE_ERROR;

	key.mv_size = sizeof(u64);
	key.mv_data = (void*)&pWal->index;

	// Term/evnum must always be increasing
	if ((pWal->inProgressTerm > 0 && pWal->inProgressTerm < pWal->lastCompleteTerm) ||
		(pWal->inProgressEvnum > 0 && pWal->inProgressEvnum < pWal->lastCompleteEvnum))
		return SQLITE_ERROR;

	track_time(2,thr);
	// ** - Pages DB: {<<ActorIndex:64, Pgno:32/unsigned>>, <<Evterm:64,Evnum:64,Fragment,CompressedPage/binary>>}
	for(p=pList; p; p=p->pDirty)
	{
		u8 pagesKeyBuf[sizeof(u64)+sizeof(u32)];
		u8 pagesBuf[PAGE_BUFF_SIZE];
		int full_size = 0;
		int page_size = LZ4_compress_default((char*)p->pData,(char*)pagesBuf+sizeof(u64)*2+1,szPage,sizeof(pagesBuf));
		char fragment_index = 0;
		int skipped = 0;
		track_time(3,thr);

		DBG("Insert frame, actor=%lld, pgno=%u, "
			"term=%lld, evnum=%lld, commit=%d, truncate=%d, compressedsize=%d",
		pWal->index,p->pgno,pWal->inProgressTerm,pWal->inProgressEvnum,
		isCommit,nTruncate,page_size);

		if (pCon->doReplicate)
		{
			u8 hdr[sizeof(u64)*2+sizeof(u32)*2];
			put8byte(hdr,               pWal->inProgressTerm);
			put8byte(hdr+sizeof(u64),   pWal->inProgressEvnum);
			put4byte(hdr+sizeof(u64)*2, p->pgno);
			if (p->pDirty)
				put4byte(hdr+sizeof(u64)*2+sizeof(u32), 0);
			else
				put4byte(hdr+sizeof(u64)*2+sizeof(u32), nTruncate);
			#ifndef _TESTAPP_
			wal_page_hook(thr,pagesBuf+sizeof(u64)*2+1, page_size, hdr, sizeof(hdr));
			#endif
		}

		memcpy(pagesKeyBuf,               &pWal->index,sizeof(u64));
		memcpy(pagesKeyBuf + sizeof(u64), &p->pgno,    sizeof(u32));
		key.mv_size = sizeof(pagesKeyBuf);
		key.mv_data = pagesKeyBuf;
		

		// Check if there are pages with the same or higher evnum/evterm. If there are, delete them.
		// This can happen if sqlite flushed some page to disk before commiting, because there were
		// so many pages that they could not be held in memory. Or it could happen if pages need to be
		// overwritten because there was a write that did not pass raft consensus.
		rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_SET_KEY);
		if (rc == MDB_SUCCESS)
		{
			size_t ndupl;
			mdb_cursor_count(mdb->cursorPages,&ndupl);

			rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_LAST_DUP);
			if (rc == MDB_SUCCESS)
			{
				MDB_val pgDelKey = {0,NULL}, pgDelVal = {0,NULL};
				u64 evnum, evterm;
				u8 frag = *((u8*)data.mv_data+sizeof(u64)*2);
				memcpy(&evterm, data.mv_data,               sizeof(u64));
				memcpy(&evnum,  (u8*)data.mv_data + sizeof(u64), sizeof(u64));

				while ((evterm > pWal->inProgressTerm || evnum >= pWal->inProgressEvnum))
						//(pWal->inProgressTerm + pWal->inProgressEvnum) > 0)
				{
					DBG("Deleting pages higher or equal to current. "
					"Evterm=%llu, evnum=%llu, curterm=%llu, curevn=%llu, dupl=%ld",
					evterm,evnum,pWal->inProgressTerm,pWal->inProgressEvnum,ndupl);

					if (pgDelKey.mv_data != NULL)
					{
						if ((rc = mdb_del(txn,mdb->pagesdb,&pgDelKey,&pgDelVal)) != MDB_SUCCESS)
						{
							DBG("Unable to cleanup page from pagedb %d",rc);
							break;
						}
						pgDelKey.mv_data = NULL;
					}
					mdb_cursor_get(mdb->cursorPages,&pgDelKey,&pgDelVal,MDB_GET_CURRENT);

					// if (mdb_cursor_del(mdb->cursorPages,0) != MDB_SUCCESS)
					// {
					// 	DBG("Cant delete!");
					// 	break;
					// }

					if (frag == 0)
						pWal->allPages--;
					ndupl--;
					if (!ndupl)
						break;
					rc = mdb_cursor_get(mdb->cursorPages,&key,&data,MDB_PREV_DUP);
					if (rc != MDB_SUCCESS)
						break;
					memcpy(&evterm, data.mv_data,               sizeof(u64));
					memcpy(&evnum,  (u8*)data.mv_data + sizeof(u64), sizeof(u64));
					frag = *((u8*)data.mv_data+sizeof(u64)*2);
				}
				if (pgDelKey.mv_data != NULL)
				{
					if ((rc = mdb_del(txn,mdb->pagesdb,&pgDelKey,&pgDelVal)) != MDB_SUCCESS)
					{
						DBG("Unable to cleanup page from pagedb %d",rc);
						break;
					}
					pgDelKey.mv_data = NULL;
				}
			}
			memcpy(pagesKeyBuf,               &pWal->index,sizeof(u64));
			memcpy(pagesKeyBuf + sizeof(u64), &p->pgno,    sizeof(u32));
			key.mv_size = sizeof(pagesKeyBuf);
			key.mv_data = pagesKeyBuf;
		}
		track_time(4,thr);

		memcpy(pagesBuf,               &pWal->inProgressTerm,  sizeof(u64));
		memcpy(pagesBuf + sizeof(u64), &pWal->inProgressEvnum, sizeof(u64));

		full_size = page_size + sizeof(u64)*2 + 1;
		if (full_size < thr->maxvalsize)
			fragment_index = 0;
		else
		{
			full_size = page_size;
			skipped = thr->maxvalsize - sizeof(u64)*2 - 1;
			full_size -= skipped;
			while(full_size > 0)
			{
				full_size -= (thr->maxvalsize - sizeof(u64)*2 - 1);
				fragment_index++;
			}
			full_size = page_size + sizeof(u64)*2 +1;
		}

		pagesBuf[sizeof(u64)*2] = fragment_index;
		data.mv_size = fragment_index == 0 ? full_size : thr->maxvalsize;
		data.mv_data = pagesBuf;

		// fragment_index == 0 ? MDB_APPENDDUP : 0
		if ((rc = mdb_cursor_put(mdb->cursorPages,&key,&data,0)) != MDB_SUCCESS)
		{
			// printf("Cursor put failed to pages %d",rc);
			DBG("ERROR: cursor put failed=%d, datasize=%d",rc,full_size);
			return SQLITE_ERROR;
		}

		fragment_index--;
		skipped = data.mv_size;
		while (fragment_index >= 0)
		{
			DBG("Insert fragment %d",(int)fragment_index);
			if (fragment_index == 0)
				data.mv_size = full_size - skipped + sizeof(u64)*2 + 1;
			else
				data.mv_size = thr->maxvalsize;
			data.mv_data = pagesBuf + skipped - (sizeof(u64)*2+1);
			memcpy(pagesBuf + skipped - (sizeof(u64)*2+1), &pWal->inProgressTerm,  sizeof(u64));
			memcpy(pagesBuf + skipped - (sizeof(u64)+1),   &pWal->inProgressEvnum, sizeof(u64));
			pagesBuf[skipped-1] = fragment_index;

			if ((rc = mdb_cursor_put(mdb->cursorPages,&key,&data,0)) != MDB_SUCCESS)
			{
				DBG("ERROR: cursor secondary put failed: err=%d, datasize=%d, skipped=%d, frag=%d",
				rc,full_size, skipped, (int)fragment_index);
				return SQLITE_ERROR;
			}
			skipped += data.mv_size - sizeof(u64)*2 - 1;
			fragment_index--;
		}

		thr->pagesChanged++;
	}
	// printf("");
	// ** - Log DB: {<<ActorIndex:64, Evterm:64, Evnum:64>>, <<Pgno:32/unsigned>>}
	if (pWal->inProgressTerm > 0)
	{
		for(p=pList; p; p=p->pDirty)
		{
			u8 logKeyBuf[sizeof(u64)*3];

			DBG("Inserting to log");

			memcpy(logKeyBuf,                 &pWal->index,           sizeof(u64));
			memcpy(logKeyBuf + sizeof(u64),   &pWal->inProgressTerm,  sizeof(u64));
			memcpy(logKeyBuf + sizeof(u64)*2, &pWal->inProgressEvnum, sizeof(u64));
			key.mv_size = sizeof(logKeyBuf);
			key.mv_data = logKeyBuf;

			data.mv_size = sizeof(u32);
			data.mv_data = &p->pgno;

			if (mdb_cursor_put(mdb->cursorLog,&key,&data,0) != MDB_SUCCESS)
			{
				// printf("Cursor put failed to log");
				DBG("ERROR: cursor put to log failed: %d",rc);
				return SQLITE_ERROR;
			}

			pWal->allPages++;
		}
	}
	else
	{
		DBG("Skipping log");
		for(p=pList; p; p=p->pDirty)
			pWal->allPages++;
	}
  /** - Info DB: {<<ActorIndex:64>>, <<V,FirstCompleteTerm:64,FirstCompleteEvnum:64,
										LastCompleteTerm:64,LastCompleteEvnum:64,
										InprogressTerm:64,InProgressEvnum:64>>} */
	{
		if (isCommit)
		{
			DBG("Commit actor=%llu fct=%llu, fcev=%llu, lct=%llu, lcev=%llu, int=%llu, inev=%llu",
				pWal->index,
				pWal->firstCompleteTerm, pWal->firstCompleteEvnum, pWal->lastCompleteTerm,
				pWal->lastCompleteEvnum, pWal->inProgressTerm,pWal->inProgressEvnum);

			#ifndef _TESTAPP_
			enif_mutex_lock(pWal->mtx);
			#endif
			pWal->lastCompleteTerm = pWal->inProgressTerm > 0 ? 
				pWal->inProgressTerm : pWal->lastCompleteTerm;
			pWal->lastCompleteEvnum = pWal->inProgressEvnum > 0 ? 
				pWal->inProgressEvnum : pWal->lastCompleteEvnum;
			if (pWal->firstCompleteTerm == 0)
			{
				pWal->firstCompleteTerm = pWal->inProgressTerm;
				pWal->firstCompleteEvnum = pWal->inProgressEvnum;
			}
			pWal->inProgressTerm = pWal->inProgressEvnum = 0;
			pWal->mxPage =  pWal->mxPage > nTruncate ? pWal->mxPage : nTruncate;
			// pWal->changed = 0;
			thr->forceCommit = 1;
			pCon->dirty = 0;
			#ifndef _TESTAPP_
			enif_mutex_unlock(pWal->mtx);
			#endif
			DBG("cur mxpage=%u",pWal->mxPage);
		}
		else
		{
			// pWal->changed = 1;
			pCon->dirty = 1;
		}
		thr->pagesChanged++;

		rc = storeinfo(pWal,0,0,NULL);
		if (rc != SQLITE_OK)
			return rc;

		track_time(5,thr);
	}

	return SQLITE_OK;
}