void analyseLwo(const std::string& filename, bool modify) { std::cout << "--- " << filename << " ---" << std::endl; std::ifstream istream(filename.c_str(), std::ios::binary); std::vector<char> vectorBuffer((std::istreambuf_iterator<char>(istream)), std::istreambuf_iterator<char>()); Chunk form; form.id = "FORM"; form.size = vectorBuffer.size(); form.chunkSizeBytes = 4; parseFromStream(vectorBuffer, 0, form); if (!form.subChunks.empty()) { dumpChunks(form.subChunks[0]); } istream.close(); if (!modify) return; // Write modified LWO std::string modFile = filename.substr(0, filename.length() - 4) + "_modified.lwo"; std::ofstream output(modFile.c_str(), std::ios::binary); if (!form.subChunks.empty()) { filterFile(form.subChunks[0]); writeFile(output, form.subChunks[0]); } }
void dumpChunks(Chunk& chunk, int level = 0) { std::string indent(level * 2, ' '); std::cout << indent << chunk.id << " [" << chunk.size << "]" << std::endl; for (Chunk& subChunk : chunk.subChunks) { dumpChunks(subChunk, level + 1); } }
bool run(OperationContext* txn, const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result) { const std::string ns = parseNs(dbname, jsobj); md5digest d; md5_state_t st; md5_init(&st); int n = 0; bool partialOk = jsobj["partialOk"].trueValue(); if (partialOk) { // WARNING: This code depends on the binary layout of md5_state. It will not be // compatible with different md5 libraries or work correctly in an environment with // mongod's of different endians. It is ok for mongos to be a different endian since // it just passes the buffer through to another mongod. BSONElement stateElem = jsobj["md5state"]; if (!stateElem.eoo()) { int len; const char* data = stateElem.binDataClean(len); massert(16247, "md5 state not correct size", len == sizeof(st)); memcpy(&st, data, sizeof(st)); } n = jsobj["startAt"].numberInt(); } BSONObj query = BSON("files_id" << jsobj["filemd5"] << "n" << GTE << n); BSONObj sort = BSON("files_id" << 1 << "n" << 1); MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { CanonicalQuery* cq; if (!CanonicalQuery::canonicalize(ns, query, sort, BSONObj(), &cq).isOK()) { uasserted(17240, "Can't canonicalize query " + query.toString()); return 0; } // Check shard version at startup. // This will throw before we've done any work if shard version is outdated // We drop and re-acquire these locks every document because md5'ing is expensive unique_ptr<AutoGetCollectionForRead> ctx(new AutoGetCollectionForRead(txn, ns)); Collection* coll = ctx->getCollection(); PlanExecutor* rawExec; if (!getExecutor(txn, coll, cq, PlanExecutor::YIELD_MANUAL, &rawExec, QueryPlannerParams::NO_TABLE_SCAN).isOK()) { uasserted(17241, "Can't get executor for query " + query.toString()); return 0; } unique_ptr<PlanExecutor> exec(rawExec); // Process notifications when the lock is released/reacquired in the loop below exec->registerExec(); BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { BSONElement ne = obj["n"]; verify(ne.isNumber()); int myn = ne.numberInt(); if (n != myn) { if (partialOk) { break; // skipped chunk is probably on another shard } log() << "should have chunk: " << n << " have:" << myn << endl; dumpChunks(txn, ns, query, sort); uassert(10040, "chunks out of order", n == myn); } // make a copy of obj since we access data in it while yielding locks BSONObj owned = obj.getOwned(); exec->saveState(); // UNLOCKED ctx.reset(); int len; const char* data = owned["data"].binDataClean(len); // This is potentially an expensive operation, so do it out of the lock md5_append(&st, (const md5_byte_t*)(data), len); n++; try { // RELOCKED ctx.reset(new AutoGetCollectionForRead(txn, ns)); } catch (const SendStaleConfigException& ex) { LOG(1) << "chunk metadata changed during filemd5, will retarget and continue"; break; } // Have the lock again. See if we were killed. if (!exec->restoreState(txn)) { if (!partialOk) { uasserted(13281, "File deleted during filemd5 command"); } } } if (partialOk) result.appendBinData("md5state", sizeof(st), BinDataGeneral, &st); // This must be *after* the capture of md5state since it mutates st md5_finish(&st, d); result.append("numChunks", n); result.append("md5", digestToString(d)); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "filemd5", dbname); return true; }