//-----------------------------------------------------------------------
void PagingLandScapePageManager::processUnloadQueues()
{
  // Check for pages that need to be unloaded.
  // if touched, that means they didn't have been touch by any cameras
  // for several frames and thus need to be unloaded.

  // LIST CHECKS
  for (PagingLandScapePageList::iterator itl = mLoadedPages.begin(); itl != mLoadedPages.end();) {
    if ((*itl)->unloadUntouched()) {
      releasePage(*itl);
      itl = mLoadedPages.erase(itl);
    } else {
      ++itl;
    }
  }

  // QUEUES CHECKS
  // check queues for page that need to be excluded from queues
  PagingLandScapePage* p = 0;
  for (PagingLandScapeQueue<PagingLandScapePage>::MsgQueType::iterator itq = mPageLoadQueue.begin(); itq != mPageLoadQueue.end();) {
    assert(!(*itq)->isLoaded());
    assert((*itq)->isInLoadQueue());
    if ((*itq)->unloadUntouched()) {
      p = *itq;
      // remove from queue
      p->setInQueue(PagingLandScapePage::QUEUE_NONE);
      itq = mPageLoadQueue.erase(itq);
      // remove from active pages 
      //(must be removed from queue first)				 
      releasePage(p);
    } else {
      ++itq;
    }
  }
}
Esempio n. 2
0
size_t dataPage::write_bytes(const byte * buf, ssize_t remaining, Page ** latch_p) {
    if(latch_p) {
        *latch_p  = NULL;
    }
    recordid chunk = calc_chunk_from_offset(write_offset_);
    if(chunk.size > remaining) {
        chunk.size = remaining;
    }
    if(chunk.page >= first_page_ + page_count_) {
        chunk.size = 0; // no space (should not happen)
    } else {
        Page *p = alloc_ ? alloc_->load_page(xid_, chunk.page) : loadPage(xid_, chunk.page);
        assert(chunk.size);
        memcpy(data_at_offset_ptr(p, chunk.slot), buf, chunk.size);
        stasis_page_lsn_write(xid_, p, alloc_->get_lsn(xid_));
        if(latch_p && !*latch_p) {
            writelock(p->rwlatch,0);
            *latch_p = p;
        } else {
            releasePage(p);
        }
        write_offset_ += chunk.size;
    }
    return chunk.size;
}
Esempio n. 3
0
void dataPage::initialize_page(pageid_t pageid) {
    //load the first page
    Page *p;
#ifdef CHECK_FOR_SCRIBBLING
    p = alloc_ ? alloc->load_page(xid_, pageid) : loadPage(xid_, pageid);
    if(*stasis_page_type_ptr(p) == DATA_PAGE) {
        printf("Collision on page %lld\n", (long long)pageid);
        fflush(stdout);
        assert(*stasis_page_type_ptr(p) != DATA_PAGE);
    }
#else
    p = loadUninitializedPage(xid_, pageid);
#endif

    DEBUG("\t\t\t\t\t\t->%lld\n", pageid);

    //initialize header
    p->pageType = DATA_PAGE;

    //clear page (arranges for null-padding)  XXX null pad more carefully and use sentinel value instead?
    memset(p->memAddr, 0, PAGE_SIZE);

    //we're the last page for now.
    *is_another_page_ptr(p) = 0;

    //write 0 to first data size
    *length_at_offset_ptr(p, calc_chunk_from_offset(write_offset_).slot) = 0;

    //set the page dirty
    stasis_page_lsn_write(xid_, p, alloc_->get_lsn(xid_));

    releasePage(p);
}
Esempio n. 4
0
int main(int argc, char * argv[]) {
  if(argc != 3) { printf(usage, argv[0]); abort(); }
  char * endptr;
  numthreads = strtoul(argv[1], &endptr, 10);
  if(*endptr != 0) { printf(usage, argv[0]); abort(); }
  numops= strtoul(argv[2], &endptr, 10) / numthreads;
  if(*endptr != 0) { printf(usage, argv[0]); abort(); }

  pthread_t workers[numthreads];

  Page * p;
  Tinit();

  dpt = stasis_runtime_dirty_page_table();

  p = loadPage(-1,0);

  for(int i = 0; i < numthreads; i++) {
    pthread_create(&workers[i], 0, worker, p);
  }
  for(int i = 0; i < numthreads; i++) {
    pthread_join(workers[i], 0);
  }

  releasePage(p);

  Tdeinit();
}
Esempio n. 5
0
static void stasis_alloc_register_old_regions(stasis_alloc_t* alloc) {
  pageid_t boundary = REGION_FIRST_TAG;
  boundary_tag t;
  DEBUG("registering old regions\n");
  int succ = TregionReadBoundaryTag(-1, boundary, &t);
  if(succ) {
    do {
      DEBUG("boundary tag %lld type %d\n", boundary, t.allocation_manager);
      if(t.allocation_manager == STORAGE_MANAGER_TALLOC) {
        for(pageid_t i = 0; i < t.size; i++) {
          Page * p = loadPage(-1, boundary + i);
          readlock(p->rwlatch,0);
          if(p->pageType == SLOTTED_PAGE) {
            stasis_allocation_policy_register_new_page(alloc->allocPolicy, p->id, stasis_record_freespace(-1, p));
            DEBUG("registered page %lld\n", boundary+i);
          } else {
            abort();
          }
          unlock(p->rwlatch);
          releasePage(p);
        }
      }
    } while(TregionNextBoundaryTag(-1, &boundary, &t, 0));  //STORAGE_MANAGER_TALLOC)) {
  }
}
Esempio n. 6
0
int TrecordType(int xid, recordid rid) {
  Page * p;
  p = loadPage(xid, rid.page);
  readlock(p->rwlatch,0);
  int ret;
  ret = stasis_record_type_read(xid, p, rid);
  unlock(p->rwlatch);
  releasePage(p);
  return ret;
}
Esempio n. 7
0
dataTuple* dataPage::iterator::getnext() {
    len_t len;
    bool succ;
    if(dp == NULL) {
        return NULL;
    }
    // XXX hack: read latch the page that the record will live on.
    // This should be handled by a read_data_in_latch function, or something...
    Page * p = loadPage(dp->xid_, dp->calc_chunk_from_offset(read_offset_).page);
    readlock(p->rwlatch, 0);
    succ = dp->read_data((byte*)&len, read_offset_, sizeof(len));
    if((!succ) || (len == 0)) {
        unlock(p->rwlatch);
        releasePage(p);
        return NULL;
    }
    read_offset_ += sizeof(len);

    byte * buf = (byte*)malloc(len);
    succ = dp->read_data(buf, read_offset_, len);

    // release hacky latch
    unlock(p->rwlatch);
    releasePage(p);

    if(!succ) {
        read_offset_ -= sizeof(len);
        free(buf);
        return NULL;
    }

    read_offset_ += len;

    dataTuple *ret = dataTuple::from_bytes(buf);

    free(buf);

    return ret;
}
Esempio n. 8
0
 void freeIndex(NativeNaturalType index, PageRefType pageRef) {
     assert(getSize(index) > 0);
     if(isFull()) {
         assert(superPage->fullBlobBuckets.erase<Key>(pageRef));
         assert(superPage->freeBlobBuckets[header.type].insert(pageRef));
     }
     --header.count;
     if(isEmpty()) {
         assert(superPage->freeBlobBuckets[header.type].erase<Key>(pageRef));
         releasePage(pageRef);
     } else {
         setSize(index, 0);
         setSymbol(index, header.freeIndex);
         header.freeIndex = index;
     }
 }
Esempio n. 9
0
int TrecordSize(int xid, recordid rid) {
  int ret;
  Page * p;
  p = loadPage(xid, rid.page);
  readlock(p->rwlatch,0);
  rid.size = stasis_record_length_read(xid, p, rid);
  if(stasis_record_type_read(xid,p,rid) == BLOB_SLOT) {
    blob_record_t r;
    stasis_record_read(xid,p,rid,(byte*)&r);
    ret = r.size;
  } else {
    ret = rid.size;
  }
  unlock(p->rwlatch);
  releasePage(p);
  return ret;
}
Esempio n. 10
0
dataPage::dataPage(int xid, regionAllocator * alloc, pageid_t pid):  // XXX Hack!! The read-only constructor signature is too close to the other's
    xid_(xid),
    page_count_(1), // will be opportunistically incremented as we scan the datapage.
    initial_page_count_(-1), // used by append.
    alloc_(alloc),  // read-only, and we don't free data pages one at a time.
    first_page_(pid),
    write_offset_(-1)
{
    assert(pid!=0);
    Page *p = alloc_ ? alloc_->load_page(xid, first_page_) : loadPage(xid, first_page_);
    if(!(*is_another_page_ptr(p) == 0 || *is_another_page_ptr(p) == 2)) {
        printf("Page %lld is not the start of a datapage\n", first_page_);
        fflush(stdout);
        abort();
    }
    assert(*is_another_page_ptr(p) == 0 || *is_another_page_ptr(p) == 2); // would be 1 for page in the middle of a datapage
    releasePage(p);
}
Esempio n. 11
0
recordid TallocFromPage(int xid, pageid_t page, unsigned long size) {
  stasis_alloc_t* alloc = stasis_runtime_alloc_state();
  short type;
  if(size >= BLOB_THRESHOLD_SIZE) {
    type = BLOB_SLOT;
  } else {
    assert(size > 0);
    type = size;
  }

  pthread_mutex_lock(&alloc->mut);
  if(!stasis_allocation_policy_can_xid_alloc_from_page(alloc->allocPolicy, xid, page)) {
    pthread_mutex_unlock(&alloc->mut);
    return NULLRID;
  }
  Page * p = loadPage(xid, page);
  writelock(p->rwlatch,0);
  recordid rid = stasis_record_alloc_begin(xid, p, type);


  if(rid.size != INVALID_SLOT) {
    stasis_record_alloc_done(xid,p,rid);
    stasis_allocation_policy_alloced_from_page(alloc->allocPolicy, xid, page);
    unlock(p->rwlatch);

    alloc_arg a = { rid.slot, type };

    Tupdate(xid, rid.page, &a, sizeof(a), OPERATION_ALLOC);

    if(type == BLOB_SLOT) {
      rid.size = size;
      stasis_blob_alloc(xid,rid);
    }
  } else {
    unlock(p->rwlatch);
  }

  releasePage(p);
  pthread_mutex_unlock(&alloc->mut);

  stasis_transaction_table_set_argument(alloc->xact_table, xid, alloc->callback_id,
					AT_COMMIT, alloc);
  return rid;
}
Esempio n. 12
0
static void stasis_alloc_reserve_new_region(stasis_alloc_t* alloc, int xid) {
     void* nta = TbeginNestedTopAction(xid, OPERATION_NOOP, 0,0);

     pageid_t firstPage = TregionAlloc(xid, TALLOC_REGION_SIZE, STORAGE_MANAGER_TALLOC);
     int initialFreespace = -1;

     for(pageid_t i = 0; i < TALLOC_REGION_SIZE; i++) {
       TinitializeSlottedPage(xid, firstPage + i);
       if(initialFreespace == -1) {
         Page * p = loadPage(xid, firstPage);
         readlock(p->rwlatch,0);
         initialFreespace = stasis_record_freespace(xid, p);
         unlock(p->rwlatch);
         releasePage(p);
       }
       stasis_allocation_policy_register_new_page(alloc->allocPolicy, firstPage + i, initialFreespace);
     }

     TendNestedTopAction(xid, nta);
}
Esempio n. 13
0
size_t dataPage::read_bytes(byte * buf, off_t offset, ssize_t remaining) {
    recordid chunk = calc_chunk_from_offset(offset);
    if(chunk.size > remaining) {
        chunk.size = remaining;
    }
    if(chunk.page >= first_page_ + page_count_) {
        chunk.size = 0; // eof
    } else {
        Page *p = alloc_ ? alloc_->load_page(xid_, chunk.page) : loadPage(xid_, chunk.page);
        if(p->pageType != DATA_PAGE) {
            fprintf(stderr, "Page type %d, id %lld lsn %lld\n", (int)p->pageType, (long long)p->id, (long long)p->LSN);
            assert(p->pageType == DATA_PAGE);
        }
        if((chunk.page + 1 == page_count_ + first_page_)
                && (*is_another_page_ptr(p))) {
            page_count_++;
        }
        memcpy(buf, data_at_offset_ptr(p, chunk.slot), chunk.size);
        releasePage(p);
    }
    return chunk.size;
}
Esempio n. 14
0
Page * dataPage::write_data_and_latch(const byte * buf, size_t len, bool init_next, bool latch) {
    bool first = true;
    Page * p = 0;
    while(1) {
        assert(len > 0);
        size_t written;
        if(latch && first ) {
            written = write_bytes(buf, len, &p);
        } else {
            written = write_bytes(buf, len);
        }
        if(written == 0) {
            assert(!p);
            return 0; // fail
        }
        if(written == len) {
            if(latch) {
                return p;
            } else {
                return (Page*)1;
            }
        }
        if(len > PAGE_SIZE && ! first) {
            assert(written > 4000);
        }
        buf += written;
        len -= written;
        if(init_next) {
            if(!initialize_next_page()) {
                if(p) {
                    unlock(p->rwlatch);
                    releasePage(p);
                }
                return 0; // fail
            }
        }
        first = false;
    }
}
size_t PageCache::releaseFromStart(size_t maxBytes) {
    size_t bytesReleased = 0;

    while (maxBytes > 0 && !mActivePages.empty()) {
        List<Page *>::iterator it = mActivePages.begin();

        Page *page = *it;

        if (maxBytes < page->mSize) {
            break;
        }

        mActivePages.erase(it);

        maxBytes -= page->mSize;
        bytesReleased += page->mSize;

        releasePage(page);
    }

    mTotalSize -= bytesReleased;
    return bytesReleased;
}
Esempio n. 16
0
bool dataPage::initialize_next_page() {
    recordid rid = calc_chunk_from_offset(write_offset_);
    assert(rid.slot == 0);
    DEBUG("\t\t%lld\n", (long long)rid.page);

    if(rid.page >= first_page_ + page_count_) {
        assert(rid.page == first_page_ + page_count_);
        if(alloc_->grow_extent(1)) {
            page_count_++;
        } else {
            return false; // The region is full
        }
    } else {
        abort();
    }

    Page *p = alloc_ ? alloc_->load_page(xid_, rid.page-1) : loadPage(xid_, rid.page-1);
    *is_another_page_ptr(p) = (rid.page-1 == first_page_) ? 2 : 1;
    stasis_page_lsn_write(xid_, p, alloc_->get_lsn(xid_));
    releasePage(p);

    initialize_page(rid.page);
    return true;
}
Esempio n. 17
0
int main (int argc, char * argv[]) {
	double MB = 1024 * 1024;
	uint64_t mb = 20000; // size of run, in megabytes.

	enum run_type mode = ALL;

	const uint64_t num_pages = mb * (MB / PAGE_SIZE);

	stasis_buffer_manager_size = (512 * MB) / PAGE_SIZE;

//	stasis_buffer_manager_hint_writes_are_sequential = 1;
//	stasis_dirty_page_table_flush_quantum = (8 * MB) / PAGE_SIZE; // XXX if set to high-> segfault
//	stasis_dirty_page_count_hard_limit = (16 * MB) / PAGE_SIZE;
//	stasis_dirty_page_count_soft_limit = (10 * MB) / PAGE_SIZE;
//	stasis_dirty_page_low_water_mark = (8 * MB) / PAGE_SIZE;

	// Hard disk preferred.
	/*	stasis_dirty_page_table_flush_quantum = (4 * MB) / PAGE_SIZE; // XXX if set to high-> segfault
	stasis_dirty_page_count_hard_limit = (12 * MB) / PAGE_SIZE;
	stasis_dirty_page_count_soft_limit = (8 * MB) / PAGE_SIZE;
	stasis_dirty_page_low_water_mark = (4 * MB) / PAGE_SIZE;*/

	// SSD preferred.
	stasis_dirty_page_table_flush_quantum = (4 * MB) / PAGE_SIZE; // XXX if set to high-> segfault
	stasis_dirty_page_count_hard_limit = (40 * MB) / PAGE_SIZE;
	stasis_dirty_page_count_soft_limit = (32 * MB) / PAGE_SIZE;
	stasis_dirty_page_low_water_mark   = (16 * MB) / PAGE_SIZE;

	stasis_dirty_page_table_flush_quantum = (4 * MB) / PAGE_SIZE; // XXX if set to high-> segfault
	stasis_dirty_page_count_hard_limit = (48 * MB) / PAGE_SIZE;
	stasis_dirty_page_count_soft_limit = (40 * MB) / PAGE_SIZE;
	stasis_dirty_page_low_water_mark   = (32 * MB) / PAGE_SIZE;

	printf("stasis_buffer_manager_size=%lld\n", (long long)stasis_buffer_manager_size * PAGE_SIZE);
	printf("Hard limit=%lld\n", (long long)((stasis_dirty_page_count_hard_limit*PAGE_SIZE)/MB));
	printf("Hard limit is %f pct.\n", 100.0 * ((double)stasis_dirty_page_count_hard_limit)/((double)stasis_buffer_manager_size));

	bLSM::init_stasis();

	regionAllocator * readableAlloc = NULL;
	if(!mode) {
		int xid = Tbegin();
		regionAllocator * alloc = new regionAllocator(xid, num_pages);
		printf("Starting first write of %lld mb\n", (long long)mb);
		struct timeval start, start_sync, stop; double elapsed;
		gettimeofday(&start, 0);
		pageid_t extent = alloc->alloc_extent(xid, num_pages);
		for(uint64_t i = 0; i < num_pages; i++) {
			Page * p = loadUninitializedPage(xid, i+extent);
			stasis_dirty_page_table_set_dirty((stasis_dirty_page_table_t*)stasis_runtime_dirty_page_table(), p);
			releasePage(p);
		}
		gettimeofday(&start_sync,0);
		alloc->force_regions(xid);
		readableAlloc = alloc;
		Tcommit(xid);
//		alloc = new RegionAllocator(xid, num_pages);
		gettimeofday(&stop, 0);
		elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
		printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)mb)/elapsed);
		printf("Sync took %f seconds.\n", stasis_timeval_to_double(stasis_subtract_timeval(stop, start_sync)));

	}

	if(!mode) {
		int xid = Tbegin();
		regionAllocator * alloc = new regionAllocator(xid, num_pages);
		printf("Starting write with parallel read of %lld mb\n", (long long)mb);
		struct timeval start, start_sync, stop; double elapsed;
		gettimeofday(&start, 0);

		pageid_t region_length;
		pageid_t region_count;
		pageid_t * old_extents = readableAlloc->list_regions(xid, &region_length, &region_count);
		pageid_t extent = alloc->alloc_extent(xid, num_pages);
		assert(region_count == 1);
		for(uint64_t i = 0; i < num_pages/2; i++) {
			Page * p = loadUninitializedPage(xid, i+extent);
			stasis_dirty_page_table_set_dirty((stasis_dirty_page_table_t*)stasis_runtime_dirty_page_table(), p);
			releasePage(p);
			p = loadPage(xid, i+old_extents[0]);
			releasePage(p);
		}
		gettimeofday(&start_sync,0);
		alloc->force_regions(xid);
		delete alloc;
		Tcommit(xid);
//		alloc = new RegionAllocator(xid, num_pages);
		gettimeofday(&stop, 0);
		elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
		printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)mb)/elapsed);
		printf("Sync took %f seconds.\n", stasis_timeval_to_double(stasis_subtract_timeval(stop, start_sync)));

	}

	if(!mode) {
		int xid = Tbegin();
		struct timeval start, start_sync, stop; double elapsed;
		printf("Starting write of giant datapage\n");
		gettimeofday(&start, 0);
		regionAllocator * alloc = new regionAllocator(xid, num_pages);
		dataPage * dp = new DataPage(xid, num_pages-1, alloc);
		byte * key = (byte*)calloc(100, 1);
		byte * val = (byte*)calloc(900, 1);
		dataTuple * tup = dataTuple::create(key, 100, val, 900);
		free(key);
		free(val);
		while(1) {
			if(!dp->append(tup)) {
				break;
			}
		}
		gettimeofday(&start_sync,0);
		alloc->force_regions(xid);

		gettimeofday(&stop, 0);
		Tcommit(xid);
		elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
		printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)mb)/elapsed);
		printf("Sync took %f seconds.\n", stasis_timeval_to_double(stasis_subtract_timeval(stop, start_sync)));
	}
	if(!mode) {
		int xid = Tbegin();
		struct timeval start, start_sync, stop; double elapsed;
		printf("Starting write of many small datapages\n");
		gettimeofday(&start, 0);
		regionAllocator * alloc = new regionAllocator(xid, num_pages);
		byte * key = (byte*)calloc(100, 1);
		byte * val = (byte*)calloc(900, 1);
		dataTuple * tup = dataTuple::create(key, 100, val, 900);
		free(key);
		free(val);
		dataPage * dp = 0;
		uint64_t this_count = 0;
		uint64_t count  = 0;
		uint64_t dp_count = 0;
		while((count * 1000) < (mb * 1024*1024)) {
			if((!dp) || !dp->append(tup)) {
				dp = new DataPage(xid, 2, alloc);
				dp_count++;
			}
			count++;
			this_count++;
//			if(((this_count * 1000) > (1024 * 1024 * 16))) {
//				alloc->force_regions(xid);
//				this_count = 0;
//				gettimeofday(&stop, 0);
//				elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
//				printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)(count*1000))/(1024*1024*elapsed));
//			}
		}
		gettimeofday(&start_sync,0);
		alloc->force_regions(xid);
		gettimeofday(&stop, 0);
		Tcommit(xid);
		elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
		printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)(count*1000))/(elapsed*1024*1024));
		printf("Sync took %f seconds.\n", stasis_timeval_to_double(stasis_subtract_timeval(stop, start_sync)));
	}

	if(!mode) {
		int xid = Tbegin();
		struct timeval start, start_sync, stop; double elapsed;
		printf("Starting two parallel writes of many small datapages\n");
		gettimeofday(&start, 0);
		regionAllocator * alloc = new regionAllocator(xid, num_pages/2);
		regionAllocator * alloc2 = new regionAllocator(xid, num_pages/2);
		byte * key = (byte*)calloc(100, 1);
		byte * val = (byte*)calloc(900, 1);
		dataTuple * tup = dataTuple::create(key, 100, val, 900);
		free(key);
		free(val);
		dataPage * dp = 0;
		dataPage * dp2 = 0;
		uint64_t this_count = 0;
		uint64_t count  = 0;
		uint64_t dp_count = 0;
		while((count * 1000) < (mb * 1024*1024)) {
			if((!dp) || !dp->append(tup)) {
				dp = new DataPage(xid, 2, alloc);
				dp_count++;
			}
			if((!dp2) || !dp2->append(tup)) {
				dp2 = new DataPage(xid, 2, alloc2);
				//dp_count++;
			}
			count += 2;
			this_count++;
//			if(((this_count * 1000) > (1024 * 1024 * 16))) {
//				alloc->force_regions(xid);
//				this_count = 0;
//				gettimeofday(&stop, 0);
//				elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
//				printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)(count*1000))/(1024*1024*elapsed));
//			}
		}
		gettimeofday(&start_sync,0);
		alloc->force_regions(xid);
		alloc2->force_regions(xid);
		gettimeofday(&stop, 0);
		Tcommit(xid);
		elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
		printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)(count*1000))/(elapsed*1024*1024));
		printf("Sync took %f seconds.\n", stasis_timeval_to_double(stasis_subtract_timeval(stop, start_sync)));

	}

	regionAllocator * read_alloc = NULL;
	regionAllocator * read_alloc2 = NULL;
	regionAllocator * read_alloc3 = NULL;
	regionAllocator * read_alloc4 = NULL;

	if(!mode) {
		int xid = Tbegin();
		struct timeval start, start_sync, stop; double elapsed;
		printf("Starting four parallel writes of many small datapages\n");
		gettimeofday(&start, 0);
		regionAllocator * alloc = new regionAllocator(xid, num_pages/4);
		regionAllocator * alloc2 = new regionAllocator(xid, num_pages/4);
		regionAllocator * alloc3 = new regionAllocator(xid, num_pages/4);
		regionAllocator * alloc4 = new regionAllocator(xid, num_pages/4);
		byte * key = (byte*)calloc(100, 1);
		byte * val = (byte*)calloc(900, 1);
		dataTuple * tup = dataTuple::create(key, 100, val, 900);
		free(key);
		free(val);
		dataPage * dp = 0;
		dataPage * dp2 = 0;
		dataPage * dp3 = 0;
		dataPage * dp4 = 0;
		uint64_t this_count = 0;
		uint64_t count  = 0;
		uint64_t dp_count = 0;

		while((count * 1000) < (mb * 1024*1024)) {
			if((!dp) || !dp->append(tup)) {
				dp = new DataPage(xid, 2, alloc);
				dp_count++;
			}
			if((!dp2) || !dp2->append(tup)) {
				dp2 = new DataPage(xid, 2, alloc2);
				//dp_count++;
			}
			if((!dp3) || !dp3->append(tup)) {
				dp3 = new DataPage(xid, 2, alloc3);
				//dp_count++;
			}
			if((!dp4) || !dp4->append(tup)) {
				dp4 = new DataPage(xid, 2, alloc4);
				//dp_count++;
			}
			count += 4;
			this_count++;
//			if(((this_count * 1000) > (1024 * 1024 * 16))) {
//				alloc->force_regions(xid);
//				this_count = 0;
//				gettimeofday(&stop, 0);
//				elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
//				printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)(count*1000))/(1024*1024*elapsed));
//			}
		}
		gettimeofday(&start_sync,0);
		alloc->force_regions(xid);
		alloc2->force_regions(xid);
		alloc3->force_regions(xid);
		alloc4->force_regions(xid);
		gettimeofday(&stop, 0);
		Tcommit(xid);
		elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
		printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)(count*1000))/(elapsed*1024*1024));
		printf("Sync took %f seconds.\n", stasis_timeval_to_double(stasis_subtract_timeval(stop, start_sync)));
		read_alloc = alloc;
		read_alloc2 = alloc2;
		read_alloc3 = alloc3;
		read_alloc4 = alloc4;

	}

	if(!mode) {
		int xid = Tbegin();
		struct timeval start, start_sync, stop; double elapsed;
		printf("Starting four parallel writes of many small datapages\n");
		gettimeofday(&start, 0);
		regionAllocator * alloc = new regionAllocator(xid, num_pages/4);
		regionAllocator * alloc2 = new regionAllocator(xid, num_pages/4);
		regionAllocator * alloc3 = new regionAllocator(xid, num_pages/4);
		regionAllocator * alloc4 = new regionAllocator(xid, num_pages/4);
		byte * key = (byte*)calloc(100, 1);
		byte * val = (byte*)calloc(900, 1);
		dataTuple * tup = dataTuple::create(key, 100, val, 900);
		free(key);
		free(val);
		dataPage * dp = 0;
		dataPage * dp2 = 0;
		dataPage * dp3 = 0;
		dataPage * dp4 = 0;
		uint64_t this_count = 0;
		uint64_t count  = 0;
		uint64_t dp_count = 0;

		pageid_t n1, n2, n3, n4;
		pageid_t l1, l2, l3, l4;
		pageid_t * regions1, * regions2, * regions3, * regions4;
 
		regions1 = read_alloc->list_regions(xid, &l1, &n1);
		regions2 = read_alloc2->list_regions(xid, &l2, &n2);
		regions3 = read_alloc3->list_regions(xid, &l3, &n3);
		regions4 = read_alloc4->list_regions(xid, &l4, &n4);

		pageid_t i1 = regions1[0];
		pageid_t i2 = regions2[0];
		pageid_t i3 = regions3[0];
		pageid_t i4 = regions4[0];

		dataPage * rdp  = new DataPage(xid, 0, i1);
		dataPage * rdp2 = new DataPage(xid, 0, i2);
		dataPage * rdp3 = new DataPage(xid, 0, i3);
		dataPage * rdp4 = new DataPage(xid, 0, i4);

		dataPage::iterator it1 = rdp->begin();
		dataPage::iterator it2 = rdp2->begin();
		dataPage::iterator it3 = rdp3->begin();
		dataPage::iterator it4 = rdp4->begin();

		while((count * 1000) < (mb * 1024*1024)) {
			if((!dp) || !dp->append(tup)) {
				dp = new DataPage(xid, 2, alloc);
				dp_count++;
			}
			if((!dp2) || !dp2->append(tup)) {
				dp2 = new DataPage(xid, 2, alloc2);
				//dp_count++;
			}
			if((!dp3) || !dp3->append(tup)) {
				dp3 = new DataPage(xid, 2, alloc3);
				//dp_count++;
			}
			if((!dp4) || !dp4->append(tup)) {
				dp4 = new DataPage(xid, 2, alloc4);
				//dp_count++;
			}
			dataTuple * t;
			if((!rdp) || !(t = it1.getnext())) {
			  i1+= rdp->get_page_count();
			  if(rdp) delete rdp;
			  rdp = new DataPage(xid, 0, i1);
			  //			  i1++;
			  it1 = rdp->begin();
			  t = it1.getnext();
			}
			if(t) dataTuple::freetuple(t);
			if((!rdp2) || !(t = it2.getnext())) {
			  i2+= rdp2->get_page_count();
			  if(rdp2) delete rdp2;
			  rdp2 = new DataPage(xid, 0, i2);
			  //			  i2++;
			  it2 = rdp2->begin();
			  t = it2.getnext();
			}
			if(t) dataTuple::freetuple(t);
			if((!rdp3) || !(t = it3.getnext())) {
			  i3+= rdp3->get_page_count();
			  if(rdp3) delete rdp3;
			  rdp3 = new DataPage(xid, 0, i3);
			  //			  i3++;
			  it3 = rdp3->begin();
			  t = it3.getnext();
			}
			if(t) dataTuple::freetuple(t);
			if((!rdp4) || !(t = it4.getnext())) {
			  i4+= rdp4->get_page_count();
			  if(rdp4) delete rdp4;
			  rdp4 = new DataPage(xid, 0, i4);
			  //			  i4++;
			  it4 = rdp4->begin();
			  t = it4.getnext();
			}
			if(t) dataTuple::freetuple(t);

			count += 8;
			this_count++;
//			if(((this_count * 1000) > (1024 * 1024 * 16))) {
//				alloc->force_regions(xid);
//				this_count = 0;
//				gettimeofday(&stop, 0);
//				elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
//				printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)(count*1000))/(1024*1024*elapsed));
//			}
		}
		gettimeofday(&start_sync,0);
		alloc->force_regions(xid);
		alloc2->force_regions(xid);
		alloc3->force_regions(xid);
		alloc4->force_regions(xid);
		gettimeofday(&stop, 0);
		Tcommit(xid);
		elapsed = stasis_timeval_to_double(stasis_subtract_timeval(stop, start));
		printf("Write took %f seconds (%f mb/sec)\n", elapsed, ((double)(count*1000))/(elapsed*1024*1024));
		printf("Sync took %f seconds.\n", stasis_timeval_to_double(stasis_subtract_timeval(stop, start_sync)));
		read_alloc = alloc;
		read_alloc2 = alloc2;
		read_alloc3 = alloc3;
		read_alloc4 = alloc4;

	}


	bLSM::deinit_stasis();
}
Esempio n. 18
0
recordid Talloc(int xid, unsigned long size) {
  stasis_alloc_t* alloc = stasis_runtime_alloc_state();
  short type;
  if(size >= BLOB_THRESHOLD_SIZE) {
    type = BLOB_SLOT;
  } else {
    assert(size >= 0);
    type = size;
  }

  recordid rid;

  pthread_mutex_lock(&alloc->mut);

  pageid_t pageid =
      stasis_allocation_policy_pick_suitable_page(alloc->allocPolicy, xid,
                               stasis_record_type_to_size(type));

  if(pageid == INVALID_PAGE) {
    stasis_alloc_reserve_new_region(alloc, xid);
    pageid = stasis_allocation_policy_pick_suitable_page(alloc->allocPolicy, xid,
                                    stasis_record_type_to_size(type));
  }
  alloc->lastFreepage = pageid;

  Page * p = loadPage(xid, alloc->lastFreepage);

  writelock(p->rwlatch, 0);
  int rec_size = stasis_record_type_to_size(type);
  if(rec_size < 4) { rec_size = 4; }
  while(stasis_record_freespace(xid, p) < rec_size) {
    stasis_record_compact(p);
    int newFreespace = stasis_record_freespace(xid, p);

    if(newFreespace >= rec_size) {
      break;
    }

    unlock(p->rwlatch);
    stasis_allocation_policy_update_freespace(alloc->allocPolicy, pageid, newFreespace);
    releasePage(p);

    pageid = stasis_allocation_policy_pick_suitable_page(alloc->allocPolicy, xid,
                                    rec_size);

    if(pageid == INVALID_PAGE) {
      stasis_alloc_reserve_new_region(alloc, xid);
      pageid = stasis_allocation_policy_pick_suitable_page(alloc->allocPolicy, xid,
                                                       rec_size);
    }

    alloc->lastFreepage = pageid;

    p = loadPage(xid, alloc->lastFreepage);
    writelock(p->rwlatch, 0);
  }

  rid = stasis_record_alloc_begin(xid, p, type);

  assert(rid.size != INVALID_SLOT);

  stasis_record_alloc_done(xid, p, rid);
  int newFreespace = stasis_record_freespace(xid, p);
  stasis_allocation_policy_alloced_from_page(alloc->allocPolicy, xid, pageid);
  stasis_allocation_policy_update_freespace(alloc->allocPolicy, pageid, newFreespace);
  unlock(p->rwlatch);

  alloc_arg a = { rid.slot, type };

  Tupdate(xid, rid.page, &a, sizeof(a), OPERATION_ALLOC);

  if(type == BLOB_SLOT) {
    rid.size = size;
    stasis_blob_alloc(xid, rid);
  }

  releasePage(p);
  pthread_mutex_unlock(&alloc->mut);

  stasis_transaction_table_set_argument(alloc->xact_table, xid, alloc->callback_id,
					AT_COMMIT, alloc);

  return rid;  // TODO return NULLRID on error
}
Esempio n. 19
0
void Tdealloc(int xid, recordid rid) {
  stasis_alloc_t* alloc = stasis_runtime_alloc_state();

  // @todo this needs to garbage collect empty storage regions.

  pthread_mutex_lock(&alloc->mut);
  Page * p = loadPage(xid, rid.page);

  readlock(p->rwlatch,0);

  recordid newrid = stasis_record_dereference(xid, p, rid);
  stasis_allocation_policy_dealloced_from_page(alloc->allocPolicy, xid, newrid.page);

  int64_t size = stasis_record_length_read(xid,p,rid);
  int64_t type = stasis_record_type_read(xid,p,rid);

  if(type == NORMAL_SLOT) { type = size; }

  byte * preimage = malloc(sizeof(alloc_arg)+size);

  ((alloc_arg*)preimage)->slot = rid.slot;
  ((alloc_arg*)preimage)->type = type;

  // stasis_record_read() wants rid to have its raw size to prevent
  // code that doesn't know about record types from introducing memory
  // bugs.
  rid.size = size;
  stasis_record_read(xid, p, rid, preimage+sizeof(alloc_arg));
  // restore rid to valid state.
  rid.size = type;

  // Ok to release latch; page is still pinned (so no WAL problems).
  // allocationPolicy protects us from running out of space due to concurrent
  // xacts.

  // Also, there can be no reordering of allocations / deallocations ,
  // since we're holding alloc->mutex.  However, we might reorder a Tset()
  // to and a Tdealloc() or Talloc() on the same page.  If this happens,
  // it's an unsafe race in the application, and not technically our problem.

  // @todo  Tupdate forces allocation to release a latch, leading to potentially nasty application bugs.  Perhaps this is the wrong API!

  // @todo application-level allocation races can lead to unrecoverable logs.
  unlock(p->rwlatch);

  Tupdate(xid, rid.page, preimage,
          sizeof(alloc_arg)+size, OPERATION_DEALLOC);

  releasePage(p);

  pthread_mutex_unlock(&alloc->mut);

  if(type==BLOB_SLOT) {
    stasis_blob_dealloc(xid,(blob_record_t*)(preimage+sizeof(alloc_arg)));
  }

  free(preimage);

  stasis_transaction_table_set_argument(alloc->xact_table, xid, alloc->callback_id,
					AT_COMMIT, alloc);

}
Esempio n. 20
0
bool dataPage::append(dataTuple const * dat)
{
    // First, decide if we should append to this datapage, based on whether
    // appending will waste more or less space than starting a new datapage

    bool accept_tuple;
    len_t tup_len = dat->byte_length();
    // Decsion tree
    if(write_offset_ > (initial_page_count_ * PAGE_SIZE)) {
        // we already exceeded the page budget
        if(write_offset_ > (2 * initial_page_count_ * PAGE_SIZE)) {
            // ... by a lot.  Reject regardless.  This prevents small tuples from
            //     being stuck behind giant ones without sacrificing much space
            //     (as a percentage of the whole index), because this path only
            //     can happen once per giant object.
            accept_tuple = false;
        } else {
            // ... by a little bit.
            accept_tuple = true;
            //Accept tuple if it fits on this page, or if it's big..
            //accept_tuple = (((write_offset_-1) & ~(PAGE_SIZE-1)) == (((write_offset_ + tup_len)-1) & ~(PAGE_SIZE-1)));
        }
    } else {
        if(write_offset_ + tup_len < (initial_page_count_ * PAGE_SIZE)) {
            // tuple fits.  contractually obligated to accept it.
            accept_tuple = true;
        } else if(write_offset_ == 0) {
            // datapage is empty.  contractually obligated to accept tuple.
            accept_tuple = true;
        } else {
            if(tup_len > initial_page_count_ * PAGE_SIZE) {
                // this is a "big tuple"
                len_t reject_padding = PAGE_SIZE - (write_offset_ & (PAGE_SIZE-1));
                len_t accept_padding = PAGE_SIZE - ((write_offset_ + tup_len) & (PAGE_SIZE-1));
                accept_tuple = accept_padding < reject_padding;
            } else {
                // this is a "small tuple"; only exceed budget if doing so leads to < 33% overhead for this data.
                len_t accept_padding = PAGE_SIZE - (write_offset_ & (PAGE_SIZE-1));
                accept_tuple = (3*accept_padding) < tup_len;
            }
        }
    }

    if(!accept_tuple) {
        DEBUG("offset %lld closing datapage\n", write_offset_);
        return false;
    }

    DEBUG("offset %lld continuing datapage\n", write_offset_);

    // TODO could be more efficient; this does a malloc and memcpy.
    // The alternative couples us more strongly to datatuple, but simplifies
    // datapage.
    byte * buf = dat->to_bytes();
    len_t dat_len = dat->byte_length();

    Page * p = write_data_and_latch((const byte*)&dat_len, sizeof(dat_len));
    bool succ = false;
    if(p) {
        succ = write_data(buf, dat_len);
        unlock(p->rwlatch);
        releasePage(p);
    }

    free(buf);

    return succ;
}