int32 _append_file (int32 dir_no, directory *p_file_dir, const void *p_addr, int32 len) { int32 i_first_page_alloc = PAGE_NULL, i_pre_page_no = PAGE_NULL, i_page_offset, i_remained_bytes, _len = len; if (len <= 0) return 0; INIT_SUPER_PAGE (); if (g_p_super_page->idle_pages_num < PAGES_OCCUPIED (len)) return E_LACK_SPACE; i_page_offset = p_file_dir->parent_filesize & ~(PAGE_SIZE - 1); if (i_page_offset) { i_remained_bytes = PAGE_SIZE - i_page_offset; if (_write_page_offset (p_file_dir->first_child_lastpage, i_page_offset, p_addr, MIN (len, i_remained_bytes)) < 0) { return E_WT; } p_addr += MIN (len, i_remained_bytes); p_file_dir->parent_filesize += MIN (len, i_remained_bytes); len -= MIN (len, i_remained_bytes); } while (len > 0) { if ((i_page_offset = _alloc_page (1, &i_remained_bytes)) < 0) { _free_page (i_remained_bytes); return i_page_offset; } if ((i_page_offset = _write_page (i_remained_bytes, p_addr, MIN (PAGE_SIZE, len))) < 0) goto error; if ((i_page_offset = _mark_page (i_remained_bytes, PAGE_NULL)) < 0) goto error; if (PAGE_NULL != i_first_page_alloc) i_first_page_alloc = i_remained_bytes; if (PAGE_NULL != i_pre_page_no) { if ((i_page_offset = _mark_page (i_pre_page_no, i_remained_bytes)) < 0) goto error; } p_addr += PAGE_SIZE; p_file_dir->parent_filesize += PAGE_SIZE; len -= MIN (len, PAGE_SIZE); i_pre_page_no = i_remained_bytes; } if (PAGE_NULL != i_first_page_alloc) { if ((i_page_offset = _mark_page (p_file_dir->first_child_lastpage, i_first_page_alloc)) < 0) return i_page_offset; p_file_dir->first_child_lastpage = i_remained_bytes; } if ((i_page_offset = _write_directory_node_value (dir_no, p_file_dir)) < 0) return i_page_offset; return _len; error: _free_page (i_remained_bytes); return i_page_offset; }
/** * @brief Use a timeout_ms value of 0 to wait until a page appears or * the tuple_fifo is closed (normal behavior). Use a negative * timeout_ms value to avoid waiting. If the tuple_fifo contains no * pages, we return immediately with a value of 0. Use a positive * timeout_ms value to wait for a max length of time. * * @return 1 if we got a page. -1 if the tuple_fifo has been * closed. If 'timeout_ms' is negative and this method returns 0, it * means the tuple_fifo is empty. If the timeout_ms value is positive * and we return 0, it means we timed out. */ int tuple_fifo::_get_read_page(int timeout_ms) { // * * * BEGIN CRITICAL SECTION * * * critical_section_t cs(_lock); _termination_check(); /* Free the page so the writer can use it. */ if (is_in_memory() && (_read_page != SENTINEL_PAGE)) { /* We are still maintaining an in-memory page list from which we are pulling pages. We release them to _free_pages as we are done with them. */ _read_page->clear(); _free_pages.push_back(_read_page.release()); _set_read_page(SENTINEL_PAGE); } /* If 'wait_on_empty' and the buffer is currently empty, we must wait for space to open up. Once we start waiting we continue waiting until either space for '_threshold' pages is available OR the writer has invoked send_eof() or terminate(). */ for(size_t t=1; (timeout_ms >= 0) && !is_done_writing() && (_available_fifo_reads() < t); t = _threshold) { /* We are to either wait for a page or wait for timeout_ms. */ if(!wait_for_writer(timeout_ms)) /* Timed out! */ break; _termination_check(); } TRACE(TRACE_ALWAYS&TRACE_MASK_DISK, "available reads = %d\n", (int)_available_fifo_reads()); if(_available_fifo_reads() == 0) { /* If we are here, we exited the loop above because one of the other conditions failed. We either noticed that the tuple_fifo has been closed or we've timed out. */ if(is_done_writing()) { /* notify caller that the tuple_fifo is closed */ TRACE(TRACE_ALWAYS&TRACE_MASK_DISK, "Returning -1\n"); return -1; } if(timeout_ms != 0) /* notify caller that we timed out */ return 0; unreachable(); } switch(_state.current()) { case tuple_fifo_state_t::IN_MEMORY: case tuple_fifo_state_t::IN_MEMORY_DONE_WRITING: { /* pull the page from page_list */ assert(!_pages.empty()); _set_read_page(_pages.front()); _pages.pop_front(); assert(_pages_in_memory > 0); _pages_in_memory--; break; } case tuple_fifo_state_t::ON_DISK: case tuple_fifo_state_t::ON_DISK_DONE_WRITING: { /* We are on disk. We should not be releasing _read_page after iterating over its entries. However, we still need to be prepared against code which extracts pages from the tuple_fifo using get_page(). get_page() sets _read_page to the SENTINEL_PAGE. */ if (_read_page == SENTINEL_PAGE) _set_read_page(_alloc_page()); else { /* We are reusing the same read page... do a reset */ _read_page->clear(); _set_read_page(_read_page.release()); } /* Make sure that at this point, we are not dealing with the SENTINAL_PAGE. */ assert(_read_page != SENTINEL_PAGE); assert(_read_page->page_size() == malloc_page_pool::instance()->page_size()); /* read page from disk file */ _read_page->clear(); TRACE(TRACE_ALWAYS&TRACE_MASK_DISK, "_next_page = %d\n", (int)_next_page); TRACE(TRACE_ALWAYS&TRACE_MASK_DISK, "_file_head_page = %d\n", (int)_file_head_page); unsigned long seek_pos = (_next_page - _file_head_page) * get_default_page_size(); TRACE(TRACE_ALWAYS&TRACE_MASK_DISK, "fseek to %lu\n", seek_pos); int fseek_ret = fseek(_page_file, seek_pos, SEEK_SET); assert(!fseek_ret); if (fseek_ret) THROW2(FileException, "fseek to %lu", seek_pos); int fread_ret = _read_page->fread_full_page(_page_file); assert(fread_ret); _set_read_page(_read_page.release()); size_t page_size = _read_page->page_size(); if (TRACE_ALWAYS&TRACE_MASK_DISK) { page* pg = _read_page.release(); unsigned char* pg_bytes = (unsigned char*)pg; for (size_t i = 0; i < page_size; i++) { printf("%02x", pg_bytes[i]); if (i % 2 == 0) printf("\t"); if (i % 16 == 0) printf("\n"); } _set_read_page(pg); } TRACE(TRACE_ALWAYS&TRACE_MASK_DISK, "Read %d %d-byte tuples\n", (int)_read_page->tuple_count(), (int)_read_page->tuple_size()); break; } default: unreachable(); } /* endof switch statement */ assert(_pages_in_fifo > 0); _pages_in_fifo--; _next_page++; /* wake the writer if necessary */ if(!FLUSH_TO_DISK_ON_FULL && (_available_in_memory_writes() >= _threshold) && !is_done_writing()) ensure_writer_running(); // * * * END CRITICAL SECTION * * * return 1; }
/** * @brief Get a page from the tuple_fifo. * * @return NULL if the tuple_fifo has been closed. A page otherwise. */ void tuple_fifo::_flush_write_page(bool done_writing) { // after the call to send_eof() the write page is NULL assert(!is_done_writing()); // * * * BEGIN CRITICAL SECTION * * * critical_section_t cs(_lock); _termination_check(); switch(_state.current()) { case tuple_fifo_state_t::IN_MEMORY: { /* Wait for space to free up if we are using a "no flush" policy. */ if (!FLUSH_TO_DISK_ON_FULL) { /* tuple_fifo stays in memory */ /* If the buffer is currently full, we must wait for space to open up. Once we start waiting we continue waiting until space for '_threshold' pages is available. */ for(size_t threshold=1; _available_in_memory_writes() < threshold; threshold = _threshold) { /* wait until something important changes */ wait_for_reader(); _termination_check(); } } /* At this point, we don't have to wait for space anymore. If we still don't have enough space, it must be because we are using a disk flush policy. Check whether we can proceed without flushing to disk. */ if (_available_in_memory_writes() >= 1) { /* Add _write_page to other tuple_fifo pages unless empty. */ if(!_write_page->empty()) { _pages.push_back(_write_page.release()); _pages_in_memory++; _pages_in_fifo++; } /* Allocate a new _write_page if necessary. */ if(done_writing) { /* Allocation of a new _write_page is not necessary (because we are done writing). Just do state transition. */ _state.transition(tuple_fifo_state_t::IN_MEMORY_DONE_WRITING); _write_page.done(); } else _write_page = _alloc_page(); /* wake the reader if necessary */ if(_available_in_memory_reads() >= _threshold || is_done_writing()) ensure_reader_running(); break; } /* If we are here, we need to flush to disk. */ /* Create on disk file. */ c_str filepath = tuple_fifo_directory_t::generate_filepath(_fifo_id); _page_file = fopen(filepath.data(), "w+"); assert(_page_file != NULL); if (_page_file == NULL) THROW2(FileException, "fopen(%s) failed", filepath.data()); TRACE(TRACE_ALWAYS, "Created tuple_fifo file %s\n", filepath.data()); /* Append this page to _pages and flush the entire page_list to disk. */ if(!_write_page->empty()) { _pages.push_back(_write_page.release()); _pages_in_memory++; _pages_in_fifo++; } for (page_list::iterator it = _pages.begin(); it != _pages.end(); ) { qpipe::page* p = *it; p->fwrite_full_page(_page_file); /* done with page */ p->clear(); _free_pages.push_back(p); it = _pages.erase(it); assert(_pages_in_memory > 0); _pages_in_memory--; } fflush(_page_file); /* update _file_head_page */ assert(_file_head_page == 0); _file_head_page = _next_page; _state.transition(tuple_fifo_state_t::ON_DISK); if (done_writing) { /* transition again! */ _state.transition(tuple_fifo_state_t::ON_DISK_DONE_WRITING); _write_page.done(); } else { /* allocate from free list */ assert(!_free_pages.empty()); _write_page = _alloc_page(); /* TODO It's clear whether we want to replace the SENTINAL_PAGE here. On the one hand, if we can replace it, we can free the rest of the pages in the free list. On the other, we still need to check for the SENTINAL_PAGE in _get_read_page since pages may be removed using get_page (instead of tuples removed with get_tuple). */ if (_read_page == SENTINEL_PAGE) { _set_read_page(_alloc_page()); /* After this point, we should not release either of these pages. */ } } /* wake the reader if necessary */ if(_available_fifo_reads() >= _threshold || is_done_writing()) ensure_reader_running(); break; } /* endof case: IN_MEMORY */ case tuple_fifo_state_t::ON_DISK: { int fseek_ret = fseek(_page_file, 0, SEEK_END); assert(!fseek_ret); if (fseek_ret) THROW1(FileException, "fseek to EOF"); _write_page->fwrite_full_page(_page_file); fflush(_page_file); _pages_in_fifo++; if (done_writing) { _state.transition(tuple_fifo_state_t::ON_DISK_DONE_WRITING); _write_page.done(); } else { /* simply reuse write page */ _write_page->clear(); } /* wake the reader if necessary */ if(_available_fifo_reads() >= _threshold || is_done_writing()) ensure_reader_running(); break; } default: unreachable(); } /* endof switch statement */ // * * * END CRITICAL SECTION * * * }