static int RemoveDatafile (TRI_document_collection_t* document, TRI_datafile_t* df) { TRI_primary_collection_t* primary; TRI_doc_datafile_info_t* dfi; size_t i; primary = &document->base; LOG_TRACE("removing empty datafile '%s'", df->getName(df)); // remove the datafile from the list of datafiles TRI_WRITE_LOCK_DATAFILES_DOC_COLLECTION(primary); if (! LocateDatafile(&primary->base._datafiles, df->_fid, &i)) { TRI_WRITE_UNLOCK_DATAFILES_DOC_COLLECTION(primary); LOG_ERROR("logic error: could not locate datafile"); return TRI_ERROR_INTERNAL; } TRI_RemoveVectorPointer(&primary->base._datafiles, i); // update dfi dfi = TRI_FindDatafileInfoPrimaryCollection(primary, df->_fid, false); if (dfi != NULL) { TRI_RemoveDatafileInfoPrimaryCollection(primary, df->_fid); TRI_Free(TRI_UNKNOWN_MEM_ZONE, dfi); } TRI_WRITE_UNLOCK_DATAFILES_DOC_COLLECTION(primary); return TRI_ERROR_NO_ERROR; }
static void DebugDatafileInfoDatafile (TRI_primary_collection_t* primary, TRI_datafile_t* datafile) { TRI_doc_datafile_info_t* dfi; printf("FILE '%s'\n", datafile->getName(datafile)); dfi = TRI_FindDatafileInfoPrimaryCollection(primary, datafile->_fid); if (dfi == NULL) { printf(" no info\n\n"); return; } printf(" number alive: %ld\n", (long) dfi->_numberAlive); printf(" size alive: %ld\n", (long) dfi->_sizeAlive); printf(" number dead: %ld\n", (long) dfi->_numberDead); printf(" size dead: %ld\n", (long) dfi->_sizeDead); printf(" deletion: %ld\n\n", (long) dfi->_numberDeletion); }
static void DebugDatafileInfoDatafile (TRI_primary_collection_t* primary, TRI_datafile_t* datafile) { TRI_doc_datafile_info_t* dfi; printf("FILE '%s'\n", datafile->getName(datafile)); dfi = TRI_FindDatafileInfoPrimaryCollection(primary, datafile->_fid, false); if (dfi == NULL) { printf(" no info\n\n"); } else { printf(" number alive: %llu\n", (unsigned long long) dfi->_numberAlive); printf(" size alive: %llu\n", (unsigned long long) dfi->_sizeAlive); printf(" number dead: %llu\n", (unsigned long long) dfi->_numberDead); printf(" size dead: %llu\n", (unsigned long long) dfi->_sizeDead); printf(" number shapes: %llu\n", (unsigned long long) dfi->_numberShapes); printf(" size shapes: %llu\n", (unsigned long long) dfi->_sizeShapes); printf(" number attributes: %llu\n", (unsigned long long) dfi->_numberAttributes); printf(" size attributes: %llu\n", (unsigned long long) dfi->_sizeAttributes); printf(" numberdeletion: %llu\n", (unsigned long long) dfi->_numberDeletion); printf("\n"); } }
static bool CompactifyDocumentCollection (TRI_document_collection_t* document) { TRI_primary_collection_t* primary; TRI_vector_t vector; int64_t numAlive; size_t i, n; bool compactNext; compactNext = false; primary = &document->base; // if we cannot acquire the read lock instantly, we will exit directly. // otherwise we'll risk a multi-thread deadlock between synchroniser, // compactor and data-modification threads (e.g. POST /_api/document) if (! TRI_TRY_READ_LOCK_DATAFILES_DOC_COLLECTION(primary)) { return false; } n = primary->base._datafiles._length; if (primary->base._compactors._length > 0 || n == 0) { // we already have created a compactor file in progress. // if this happens, then a previous compaction attempt for this collection failed // additionally, if there are no datafiles, then there's no need to compact TRI_READ_UNLOCK_DATAFILES_DOC_COLLECTION(primary); return false; } // copy datafile information TRI_InitVector(&vector, TRI_UNKNOWN_MEM_ZONE, sizeof(compaction_info_t)); numAlive = 0; for (i = 0; i < n; ++i) { TRI_datafile_t* df; TRI_doc_datafile_info_t* dfi; compaction_info_t compaction; bool shouldCompact; df = primary->base._datafiles._buffer[i]; assert(df != NULL); dfi = TRI_FindDatafileInfoPrimaryCollection(primary, df->_fid, true); if (dfi == NULL) { continue; } shouldCompact = false; if (! compactNext && df->_maximalSize < COMPACTOR_MIN_SIZE && i < n - 1) { // very small datafile. let's compact it so it's merged with others shouldCompact = true; compactNext = true; } else if (numAlive == 0 && dfi->_numberDeletion > 0) { // compact first datafile already if it has got some deletions shouldCompact = true; compactNext = true; } else { // in all other cases, only check the number and size of "dead" objects if (dfi->_sizeDead >= (int64_t) COMPACTOR_DEAD_SIZE_THRESHOLD) { shouldCompact = true; compactNext = true; } else if (dfi->_sizeDead > 0) { // the size of dead objects is above some threshold double share = (double) dfi->_sizeDead / ((double) dfi->_sizeDead + (double) dfi->_sizeAlive); if (share >= COMPACTOR_DEAD_SIZE_SHARE) { // the size of dead objects is above some share shouldCompact = true; compactNext = true; } } } if (! shouldCompact) { // only use those datafiles that contain dead objects if (! compactNext) { numAlive += (int64_t) dfi->_numberAlive; continue; } } LOG_TRACE("found datafile eligible for compaction. fid: %llu, size: %llu " "numberDead: %llu, numberAlive: %llu, numberTransaction: %llu, numberDeletion: %llu, " "sizeDead: %llu, sizeAlive: %llu, sizeTransaction: %llu", (unsigned long long) df->_fid, (unsigned long long) df->_maximalSize, (unsigned long long) dfi->_numberDead, (unsigned long long) dfi->_numberAlive, (unsigned long long) dfi->_numberTransaction, (unsigned long long) dfi->_numberDeletion, (unsigned long long) dfi->_sizeDead, (unsigned long long) dfi->_sizeAlive, (unsigned long long) dfi->_sizeTransaction); compaction._datafile = df; compaction._keepDeletions = (numAlive > 0 && i > 0); TRI_PushBackVector(&vector, &compaction); // we stop at the first few datafiles. // this is better than going over all datafiles in a collection in one go // because the compactor is single-threaded, and collecting all datafiles // might take a long time (it might even be that there is a request to // delete the collection in the middle of compaction, but the compactor // will not pick this up as it is read-locking the collection status) if (TRI_LengthVector(&vector) >= COMPACTOR_MAX_FILES) { // found enough to compact break; } numAlive += (int64_t) dfi->_numberAlive; } // can now continue without the lock TRI_READ_UNLOCK_DATAFILES_DOC_COLLECTION(primary); if (vector._length == 0) { // cleanup local variables TRI_DestroyVector(&vector); return false; } // handle datafiles with dead objects n = vector._length; assert(n >= 1); CompactifyDatafiles(document, &vector); // cleanup local variables TRI_DestroyVector(&vector); return true; }
static bool Compactifier (TRI_df_marker_t const* marker, void* data, TRI_datafile_t* datafile, bool journal) { TRI_df_marker_t* result; TRI_doc_mptr_t const* found; TRI_document_collection_t* document; TRI_primary_collection_t* primary; compaction_context_t* context; int res; context = data; document = context->_document; primary = &document->base; // new or updated document if (marker->_type == TRI_DOC_MARKER_KEY_DOCUMENT || marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_document_key_marker_t const* d; TRI_doc_mptr_t* found2; TRI_voc_key_t key; bool deleted; d = (TRI_doc_document_key_marker_t const*) marker; key = (char*) d + d->_offsetKey; // check if the document is still active TRI_READ_LOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary); found = TRI_LookupByKeyAssociativePointer(&primary->_primaryIndex, key); deleted = (found == NULL || found->_rid > d->_rid); TRI_READ_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary); if (deleted) { LOG_TRACE("found a stale document: %s", key); return true; } context->_keepDeletions = true; // write to compactor files res = CopyMarker(document, context->_compactor, marker, &result); if (res != TRI_ERROR_NO_ERROR) { LOG_FATAL_AND_EXIT("cannot write compactor file: %s", TRI_last_error()); } // check if the document is still active TRI_WRITE_LOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary); found = TRI_LookupByKeyAssociativePointer(&primary->_primaryIndex, key); deleted = found == NULL; if (deleted) { context->_dfi._numberDead += 1; context->_dfi._sizeDead += (int64_t) marker->_size; TRI_WRITE_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary); LOG_DEBUG("found a stale document after copying: %s", key); return true; } found2 = CONST_CAST(found); assert(found2->_data != NULL); assert(((TRI_df_marker_t*) found2->_data)->_size > 0); // the fid might change if (found->_fid != context->_compactor->_fid) { // update old datafile's info TRI_doc_datafile_info_t* dfi = TRI_FindDatafileInfoPrimaryCollection(primary, found->_fid, false); if (dfi != NULL) { dfi->_numberDead += 1; dfi->_sizeDead += (int64_t) marker->_size; } found2->_fid = context->_compactor->_fid; } // let marker point to the new position found2->_data = result; // let _key point to the new key position found2->_key = ((char*) result) + (((TRI_doc_document_key_marker_t*) result)->_offsetKey); // update datafile info context->_dfi._numberAlive += 1; context->_dfi._sizeAlive += (int64_t) marker->_size; TRI_WRITE_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary); } // deletion else if (marker->_type == TRI_DOC_MARKER_KEY_DELETION && context->_keepDeletions) { // write to compactor files res = CopyMarker(document, context->_compactor, marker, &result); if (res != TRI_ERROR_NO_ERROR) { LOG_FATAL_AND_EXIT("cannot write compactor file: %s", TRI_last_error()); } // update datafile info context->_dfi._numberDeletion++; } else if (marker->_type == TRI_DOC_MARKER_BEGIN_TRANSACTION || marker->_type == TRI_DOC_MARKER_COMMIT_TRANSACTION || marker->_type == TRI_DOC_MARKER_ABORT_TRANSACTION || marker->_type == TRI_DOC_MARKER_PREPARE_TRANSACTION) { // write to compactor files res = CopyMarker(document, context->_compactor, marker, &result); if (res != TRI_ERROR_NO_ERROR) { LOG_FATAL_AND_EXIT("cannot write compactor file: %s", TRI_last_error()); } context->_dfi._numberTransaction++; context->_dfi._sizeTransaction += (int64_t) marker->_size; } return true; }
static void RenameDatafileCallback (TRI_datafile_t* datafile, void* data) { compaction_context_t* context; TRI_datafile_t* compactor; TRI_primary_collection_t* primary; bool ok; context = data; compactor = context->_compactor; primary = &context->_document->base; ok = false; assert(datafile->_fid == compactor->_fid); if (datafile->isPhysical(datafile)) { char* number; char* jname; char* tempFilename; char* realName; realName = TRI_DuplicateString(datafile->_filename); // construct a suitable tempname number = TRI_StringUInt64(datafile->_fid); jname = TRI_Concatenate3String("temp-", number, ".db"); tempFilename = TRI_Concatenate2File(primary->base._directory, jname); TRI_FreeString(TRI_CORE_MEM_ZONE, number); TRI_FreeString(TRI_CORE_MEM_ZONE, jname); if (! TRI_RenameDatafile(datafile, tempFilename)) { LOG_ERROR("unable to rename datafile '%s' to '%s'", datafile->getName(datafile), tempFilename); } else { if (! TRI_RenameDatafile(compactor, realName)) { LOG_ERROR("unable to rename compaction file '%s' to '%s'", compactor->getName(compactor), realName); } else { ok = true; } } TRI_FreeString(TRI_CORE_MEM_ZONE, tempFilename); TRI_FreeString(TRI_CORE_MEM_ZONE, realName); } else { ok = true; } if (ok) { TRI_doc_datafile_info_t* dfi; size_t i; // must acquire a write-lock as we're about to change the datafiles vector TRI_WRITE_LOCK_DATAFILES_DOC_COLLECTION(primary); if (! LocateDatafile(&primary->base._datafiles, datafile->_fid, &i)) { TRI_WRITE_UNLOCK_DATAFILES_DOC_COLLECTION(primary); LOG_ERROR("logic error: could not locate datafile"); return; } // put the compactor in place of the datafile primary->base._datafiles._buffer[i] = compactor; // update dfi dfi = TRI_FindDatafileInfoPrimaryCollection(primary, compactor->_fid, false); if (dfi != NULL) { memcpy(dfi, &context->_dfi, sizeof(TRI_doc_datafile_info_t)); } else { LOG_ERROR("logic error: could not find compactor file information"); } if (! LocateDatafile(&primary->base._compactors, compactor->_fid, &i)) { TRI_WRITE_UNLOCK_DATAFILES_DOC_COLLECTION(primary); LOG_ERROR("logic error: could not locate compactor"); return; } // remove the compactor from the list of compactors TRI_RemoveVectorPointer(&primary->base._compactors, i); TRI_WRITE_UNLOCK_DATAFILES_DOC_COLLECTION(primary); DropDatafileCallback(datafile, primary); } TRI_Free(TRI_CORE_MEM_ZONE, context); }