Example #1
0
    /**
     * analyzeDiskStorage helper which processes a single record.
     */
    void processDeletedRecord(const DiskLoc& dl, const DeletedRecord* dr, const Extent* ex,
                              const AnalyzeParams& params, int bucketNum,
                              vector<DiskStorageData>& sliceData,
                              BSONArrayBuilder* deletedRecordsArrayBuilder) {

        killCurrentOp.checkForInterrupt();

        int extentOfs = ex->myLoc.getOfs();

        if (! (dl.a() == ex->myLoc.a() &&
               dl.getOfs() + dr->lengthWithHeaders() > extentOfs &&
               dl.getOfs() < extentOfs + ex->length) ) {

            return;
        }

        RecPos pos = RecPos::from(dl.getOfs(), dr->lengthWithHeaders(), extentOfs, params);
        bool spansRequestedArea = false;
        for (RecPos::SliceIterator it = pos.iterateSlices(); !it.end(); ++it) {

            DiskStorageData& slice = sliceData[it->sliceNum];
            slice.freeRecords[bucketNum] += it->ratioHere;
            spansRequestedArea = true;
        }

        if (deletedRecordsArrayBuilder != NULL && spansRequestedArea) {
            BSONObjBuilder(deletedRecordsArrayBuilder->subobjStart())
                .append("ofs", dl.getOfs() - extentOfs)
                .append("recBytes", dr->lengthWithHeaders());
        }
    }
Example #2
0
BSONObj Sync::getMissingDoc(const BSONObj& o) {
    OplogReader missingObjReader;
    const char *ns = o.getStringField("ns");

    // capped collections
    NamespaceDetails *nsd = nsdetails(ns);
    if ( nsd && nsd->isCapped() ) {
        log() << "replication missing doc, but this is okay for a capped collection (" << ns << ")" << endl;
        return BSONObj();
    }

    uassert(15916, str::stream() << "Can no longer connect to initial sync source: " << hn, missingObjReader.connect(hn));

    // might be more than just _id in the update criteria
    BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj();
    BSONObj missingObj;
    try {
        missingObj = missingObjReader.findOne(ns, query);
    } catch(DBException& e) {
        log() << "replication assertion fetching missing object: " << e.what() << endl;
        throw;
    }

    return missingObj;
}
Example #3
0
BSONObj SyncTail::getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) {
    OplogReader missingObjReader;  // why are we using OplogReader to run a non-oplog query?
    const char* ns = o.getStringField("ns");

    // capped collections
    Collection* collection = db->getCollection(ns);
    if (collection && collection->isCapped()) {
        log() << "missing doc, but this is okay for a capped collection (" << ns << ")";
        return BSONObj();
    }

    const int retryMax = 3;
    for (int retryCount = 1; retryCount <= retryMax; ++retryCount) {
        if (retryCount != 1) {
            // if we are retrying, sleep a bit to let the network possibly recover
            sleepsecs(retryCount * retryCount);
        }
        try {
            bool ok = missingObjReader.connect(HostAndPort(_hostname));
            if (!ok) {
                warning() << "network problem detected while connecting to the "
                          << "sync source, attempt " << retryCount << " of " << retryMax << endl;
                continue;  // try again
            }
        } catch (const SocketException&) {
            warning() << "network problem detected while connecting to the "
                      << "sync source, attempt " << retryCount << " of " << retryMax << endl;
            continue;  // try again
        }

        // get _id from oplog entry to create query to fetch document.
        const BSONElement opElem = o.getField("op");
        const bool isUpdate = !opElem.eoo() && opElem.str() == "u";
        const BSONElement idElem = o.getObjectField(isUpdate ? "o2" : "o")["_id"];

        if (idElem.eoo()) {
            severe() << "cannot fetch missing document without _id field: " << o.toString();
            fassertFailedNoTrace(28742);
        }

        BSONObj query = BSONObjBuilder().append(idElem).obj();
        BSONObj missingObj;
        try {
            missingObj = missingObjReader.findOne(ns, query);
        } catch (const SocketException&) {
            warning() << "network problem detected while fetching a missing document from the "
                      << "sync source, attempt " << retryCount << " of " << retryMax << endl;
            continue;  // try again
        } catch (DBException& e) {
            error() << "assertion fetching missing object: " << e.what() << endl;
            throw;
        }

        // success!
        return missingObj;
    }
    // retry count exceeded
    msgasserted(15916,
                str::stream() << "Can no longer connect to initial sync source: " << _hostname);
}
Example #4
0
BSONObjBuilder OpMsgBuilder::beginBody() {
    invariant(_state == kEmpty || _state == kDocSequence);
    _state = kBody;
    _buf.appendStruct(Section::kBody);
    invariant(_bodyStart == 0);
    _bodyStart = _buf.len();  // Cannot be 0.
    return BSONObjBuilder(_buf);
}
Example #5
0
File: sync.cpp Project: wjin/mongo
    BSONObj Sync::getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) {
        OplogReader missingObjReader; // why are we using OplogReader to run a non-oplog query?
        const char *ns = o.getStringField("ns");

        // capped collections
        Collection* collection = db->getCollection(ns);
        if ( collection && collection->isCapped() ) {
            log() << "replication missing doc, but this is okay for a capped collection (" << ns << ")" << endl;
            return BSONObj();
        }

        const int retryMax = 3;
        for (int retryCount = 1; retryCount <= retryMax; ++retryCount) {
            if (retryCount != 1) {
                // if we are retrying, sleep a bit to let the network possibly recover
                sleepsecs(retryCount * retryCount);
            }
            try {
                bool ok = missingObjReader.connect(HostAndPort(hn));
                if (!ok) {
                    warning() << "network problem detected while connecting to the "
                              << "sync source, attempt " << retryCount << " of "
                              << retryMax << endl;
                        continue;  // try again
                }
            } 
            catch (const SocketException&) {
                warning() << "network problem detected while connecting to the "
                          << "sync source, attempt " << retryCount << " of "
                          << retryMax << endl;
                continue; // try again
            }

            // might be more than just _id in the update criteria
            BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj();
            BSONObj missingObj;
            try {
                missingObj = missingObjReader.findOne(ns, query);
            } 
            catch (const SocketException&) {
                warning() << "network problem detected while fetching a missing document from the "
                          << "sync source, attempt " << retryCount << " of "
                          << retryMax << endl;
                continue; // try again
            } 
            catch (DBException& e) {
                log() << "replication assertion fetching missing object: " << e.what() << endl;
                throw;
            }

            // success!
            return missingObj;
        }
        // retry count exceeded
        msgasserted(15916, 
                    str::stream() << "Can no longer connect to initial sync source: " << hn);
    }
Example #6
0
BSONObj TransactionRouter::Participant::attachTxnFieldsIfNeeded(
    BSONObj cmd, bool isFirstStatementInThisParticipant) const {
    bool hasStartTxn = false;
    bool hasAutoCommit = false;
    bool hasTxnNum = false;

    BSONObjIterator iter(cmd);
    while (iter.more()) {
        auto elem = iter.next();

        if (OperationSessionInfoFromClient::kStartTransactionFieldName ==
            elem.fieldNameStringData()) {
            hasStartTxn = true;
        } else if (OperationSessionInfoFromClient::kAutocommitFieldName ==
                   elem.fieldNameStringData()) {
            hasAutoCommit = true;
        } else if (OperationSessionInfo::kTxnNumberFieldName == elem.fieldNameStringData()) {
            hasTxnNum = true;
        }
    }

    // TODO: SERVER-37045 assert when attaching startTransaction to killCursors command.

    // The first command sent to a participant must start a transaction, unless it is a transaction
    // command, which don't support the options that start transactions, i.e. startTransaction and
    // readConcern. Otherwise the command must not have a read concern.
    bool mustStartTransaction = isFirstStatementInThisParticipant && !isTransactionCommand(cmd);

    if (!mustStartTransaction) {
        dassert(!cmd.hasField(repl::ReadConcernArgs::kReadConcernFieldName));
    }

    BSONObjBuilder newCmd = mustStartTransaction
        ? appendFieldsForStartTransaction(std::move(cmd),
                                          sharedOptions.readConcernArgs,
                                          sharedOptions.atClusterTime,
                                          !hasStartTxn)
        : BSONObjBuilder(std::move(cmd));

    if (isCoordinator) {
        newCmd.append(kCoordinatorField, true);
    }

    if (!hasAutoCommit) {
        newCmd.append(OperationSessionInfoFromClient::kAutocommitFieldName, false);
    }

    if (!hasTxnNum) {
        newCmd.append(OperationSessionInfo::kTxnNumberFieldName, sharedOptions.txnNumber);
    } else {
        auto osi =
            OperationSessionInfoFromClient::parse("OperationSessionInfo"_sd, newCmd.asTempObj());
        invariant(sharedOptions.txnNumber == *osi.getTxnNumber());
    }

    return newCmd.obj();
}
Example #7
0
OpMsgRequest upconvertRequest(StringData db, BSONObj cmdObj, int queryFlags) {
    cmdObj = cmdObj.getOwned();  // Usually this is a no-op since it is already owned.

    auto readPrefContainer = BSONObj();
    const StringData firstFieldName = cmdObj.firstElementFieldName();
    if (firstFieldName == "$query" || firstFieldName == "query") {
        // Commands sent over OP_QUERY specify read preference by putting it at the top level and
        // putting the command in a nested field called either query or $query.

        // Check if legacyCommand has an invalid $maxTimeMS option.
        uassert(ErrorCodes::InvalidOptions,
                "cannot use $maxTimeMS query option with commands; use maxTimeMS command option "
                "instead",
                !cmdObj.hasField("$maxTimeMS"));

        if (auto readPref = cmdObj["$readPreference"])
            readPrefContainer = readPref.wrap();

        cmdObj = cmdObj.firstElement().Obj().shareOwnershipWith(cmdObj);
    } else if (auto queryOptions = cmdObj["$queryOptions"]) {
        // Mongos rewrites commands with $readPreference to put it in a field nested inside of
        // $queryOptions. Its command implementations often forward commands in that format to
        // shards. This function is responsible for rewriting it to a format that the shards
        // understand.
        readPrefContainer = queryOptions.Obj().shareOwnershipWith(cmdObj);
        cmdObj = cmdObj.removeField("$queryOptions");
    }

    if (!readPrefContainer.isEmpty()) {
        cmdObj = BSONObjBuilder(std::move(cmdObj)).appendElements(readPrefContainer).obj();
    } else if (!cmdObj.hasField("$readPreference") && (queryFlags & QueryOption_SlaveOk)) {
        BSONObjBuilder bodyBuilder(std::move(cmdObj));
        ReadPreferenceSetting(ReadPreference::SecondaryPreferred).toContainingBSON(&bodyBuilder);
        cmdObj = bodyBuilder.obj();
    }

    // Try to move supported array fields into document sequences.
    auto docSequenceIt = docSequenceFieldsForCommands.find(cmdObj.firstElementFieldName());
    auto docSequenceElem = docSequenceIt == docSequenceFieldsForCommands.end()
        ? BSONElement()
        : cmdObj[docSequenceIt->second];
    if (!isArrayOfObjects(docSequenceElem))
        return OpMsgRequest::fromDBAndBody(db, std::move(cmdObj));

    auto docSequenceName = docSequenceElem.fieldNameStringData();

    // Note: removing field before adding "$db" to avoid the need to copy the potentially large
    // array.
    auto out = OpMsgRequest::fromDBAndBody(db, cmdObj.removeField(docSequenceName));
    out.sequences.push_back({docSequenceName.toString()});
    for (auto elem : docSequenceElem.Obj()) {
        out.sequences[0].objs.push_back(elem.Obj().shareOwnershipWith(cmdObj));
    }
    return out;
}
void MongoListModel::mongoQueryDown(int position,  QString value) const
{

    BSONObj query;
    if (value != 0)
        query = BSONObjBuilder().appendRegex("data", value.toStdString()).obj();

    auto_ptr<DBClientCursor> cursor = mongoDatabase->query("mongolab_database.random_data",query, bufferSize, position);
      while (cursor->more()) {
         BSONObj object = cursor->next();
         MongoItems * it = new MongoItems(object);
         mongoDataList.append(it);
      }
}
Example #9
0
BSONObjBuilder OpMsgBuilder::resumeBody() {
    invariant(_state == kBody);
    invariant(_bodyStart != 0);
    return BSONObjBuilder(BSONObjBuilder::ResumeBuildingTag(), _buf, _bodyStart);
}
Example #10
0
    bool ReplSetImpl::_initialSyncOplogApplication(OplogReader& r, const Member *source,
        const OpTime& applyGTE, const OpTime& minValid) {

        const string hn = source->fullName();
        OplogReader missingObjReader;
        try {
            r.tailingQueryGTE( rsoplog, applyGTE );
            if ( !r.haveCursor() ) {
                log() << "replSet initial sync oplog query error" << rsLog;
                return false;
            }

            {
                if( !r.more() ) {
                    sethbmsg("replSet initial sync error reading remote oplog");
                    log() << "replSet initial sync error remote oplog (" << rsoplog << ") on host " << hn << " is empty?" << rsLog;
                    return false;
                }
                bo op = r.next();
                OpTime t = op["ts"]._opTime();
                r.putBack(op);

                if( op.firstElementFieldName() == string("$err") ) {
                    log() << "replSet initial sync error querying " << rsoplog << " on " << hn << " : " << op.toString() << rsLog;
                    return false;
                }

                uassert( 13508 , str::stream() << "no 'ts' in first op in oplog: " << op , !t.isNull() );
                if( t > applyGTE ) {
                    sethbmsg(str::stream() << "error " << hn << " oplog wrapped during initial sync");
                    log() << "replSet initial sync expected first optime of " << applyGTE << rsLog;
                    log() << "replSet initial sync but received a first optime of " << t << " from " << hn << rsLog;
                    return false;
                }

                sethbmsg(str::stream() << "initial oplog application from " << hn << " starting at "
                         << t.toStringPretty() << " to " << minValid.toStringPretty());
            }
        }
        catch(DBException& e) {
            log() << "replSet initial sync failing: " << e.toString() << rsLog;
            return false;
        }

        /* we lock outside the loop to avoid the overhead of locking on every operation. */
        writelock lk("");

        // todo : use exhaust
        OpTime ts;
        time_t start = time(0);
        unsigned long long n = 0;
        while( 1 ) {
            try {
                if( !r.more() )
                    break;
                BSONObj o = r.nextSafe(); /* note we might get "not master" at some point */
                ts = o["ts"]._opTime();

                {
                    if( (source->state() != MemberState::RS_PRIMARY &&
                            source->state() != MemberState::RS_SECONDARY) ||
                            replSetForceInitialSyncFailure ) {

                        int f = replSetForceInitialSyncFailure;
                        if( f > 0 ) {
                            replSetForceInitialSyncFailure = f-1;
                            log() << "replSet test code invoked, replSetForceInitialSyncFailure" << rsLog;
                            throw DBException("forced error",0);
                        }
                        log() << "replSet we are now primary" << rsLog;
                        throw DBException("primary changed",0);
                    }

                    if( ts >= applyGTE ) { // optimes before we started copying need not be applied.
                        bool failedUpdate = syncApply(o);
                        if( failedUpdate ) {
                            // we don't have the object yet, which is possible on initial sync.  get it.
                            log() << "replSet info adding missing object" << endl; // rare enough we can log
                            if( !missingObjReader.connect(hn) ) { // ok to call more than once
                                log() << "replSet initial sync fails, couldn't connect to " << hn << endl;
                                return false;
                            }
                            const char *ns = o.getStringField("ns");
                            BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj(); // might be more than just _id in the update criteria
                            BSONObj missingObj;
                            try {
                                missingObj = missingObjReader.findOne(
                                    ns, 
                                    query );
                            } catch(...) { 
                                log() << "replSet assertion fetching missing object" << endl;
                                throw;
                            }
                            if( missingObj.isEmpty() ) { 
                                log() << "replSet missing object not found on source. presumably deleted later in oplog" << endl;
                                log() << "replSet o2: " << o.getObjectField("o2").toString() << endl;
                                log() << "replSet o firstfield: " << o.getObjectField("o").firstElementFieldName() << endl;
                            }
                            else {
                                Client::Context ctx(ns);
                                try {
                                    DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize());
                                    assert( !d.isNull() );
                                } catch(...) { 
                                    log() << "replSet assertion during insert of missing object" << endl;
                                    throw;
                                }
                                // now reapply the update from above
                                bool failed = syncApply(o);
                                if( failed ) {
                                    log() << "replSet update still fails after adding missing object " << ns << endl;
                                    assert(false);
                                }
                            }
                        }
                    }
                    _logOpObjRS(o);   /* with repl sets we write the ops to our oplog too */
                }

                if ( ++n % 1000 == 0 ) {
                    time_t now = time(0);
                    if (now - start > 10) {
                        // simple progress metering
                        log() << "replSet initialSyncOplogApplication applied " << n << " operations, synced to "
                              << ts.toStringPretty() << rsLog;
                        start = now;
                    }
                }

                if ( ts > minValid ) {
                    break;
                }

                getDur().commitIfNeeded();
            }
            catch (DBException& e) {
                // skip duplicate key exceptions
                if( e.getCode() == 11000 || e.getCode() == 11001 ) {
                    continue;
                }
                
                // handle cursor not found (just requery)
                if( e.getCode() == 13127 ) {
                    log() << "replSet requerying oplog after cursor not found condition, ts: " << ts.toStringPretty() << endl;
                    r.resetCursor();
                    r.tailingQueryGTE(rsoplog, ts);
                    if( r.haveCursor() ) {
                        continue;
                    }
                }

                // TODO: handle server restart

                if( ts <= minValid ) {
                    // didn't make it far enough
                    log() << "replSet initial sync failing, error applying oplog : " << e.toString() << rsLog;
                    return false;
                }

                // otherwise, whatever
                break;
            }
        }
        return true;
    }
Example #11
0
namespace engine
{

   #define IXM_MAX_PREALLOCATED_UNDEFKEY        ( 10 )
   /*
      IXM Tool functions
   */
   BSONObj ixmGetUndefineKeyObj( INT32 fieldNum )
   {
      static BSONObj s_undefineKeys[ IXM_MAX_PREALLOCATED_UNDEFKEY ] ;
      static BOOLEAN s_init = FALSE ;
      static ossSpinXLatch s_latch ;

      if ( FALSE == s_init )
      {
         s_latch.get() ;
         if ( FALSE == s_init )
         {
            for ( SINT32 i = 0; i < IXM_MAX_PREALLOCATED_UNDEFKEY ; ++i )
            {
               BSONObjBuilder b ;
               for ( SINT32 j = 0; j <= i; ++j )
               {
                  b.appendUndefined("") ;
               }
               s_undefineKeys[i] = b.obj() ;
            }
            s_init = TRUE ;
         }
         s_latch.release() ;
      }

      if ( fieldNum > 0 && fieldNum <= IXM_MAX_PREALLOCATED_UNDEFKEY )
      {
         return s_undefineKeys[ fieldNum - 1 ] ;
      }
      else
      {
         BSONObjBuilder b ;
         for ( INT32 i = 0; i < fieldNum; ++i )
         {
            b.appendUndefined("") ;
         }
         return b.obj() ;
      }
   }

   /*
      IXM Global opt var
   */
   const static BSONObj gUndefinedObj =
          BSONObjBuilder().appendUndefined("").obj() ;
   const static BSONElement gUndefinedElt = gUndefinedObj.firstElement() ;

   class _ixmKeyGenerator
   {
   protected:
      const _ixmIndexKeyGen *_keygen ;
      mutable vector<BSONObj *> _objs ;
   public:
      _ixmKeyGenerator ( const _ixmIndexKeyGen *keygen )
      {
         _keygen = keygen ;
      }
      ~_ixmKeyGenerator()
      {
         vector<BSONObj *>::iterator itr = _objs.begin() ;
         for ( ; itr != _objs.end(); itr++ )
         {
            SDB_OSS_DEL *itr ;
         }
      }
      // PD_TRACE_DECLARE_FUNCTION ( SDB__IXMKEYGEN_GETKEYS, "_ixmKeyGenerator::getKeys" )
      INT32 getKeys ( const BSONObj &obj, BSONObjSet &keys,
                      BSONElement *pArrEle ) const
      {
         INT32 rc = SDB_OK ;
         PD_TRACE_ENTRY ( SDB__IXMKEYGEN_GETKEYS );
         SDB_ASSERT( _keygen, "spec can't be NULL" ) ;
         SDB_ASSERT( !_keygen->_fieldNames.empty(), "can not be empty" ) ;
         vector<const CHAR*> fieldNames ( _keygen->_fieldNames ) ;
         BSONElement arrEle ;
         try
         {
            rc = _getKeys( fieldNames, obj, keys, &arrEle ) ;
         }
         catch ( std::exception &e )
         {
            PD_LOG( PDERROR, "unexpected err:%s", e.what() ) ;
            rc = SDB_INVALIDARG ;
            goto error ;
         }

         if ( SDB_OK != rc )
         {
            PD_LOG ( PDERROR, "Failed to generate key from object: %s",
                     obj.toString().c_str() ) ;
            goto error ;
         }

         if ( keys.empty() )
         {
            keys.insert ( _keygen->_undefinedKey ) ;
         }

         if ( NULL != pArrEle && !arrEle.eoo() )
         {
            *pArrEle = arrEle ;
         }
      done :
         PD_TRACE_EXITRC ( SDB__IXMKEYGEN_GETKEYS, rc );
         return rc ;
      error :
         goto done ;
      }
   protected:
      // PD_TRACE_DECLARE_FUNCTION ( SDB__IXMKEYGEN__GETKEYS, "_ixmKeyGenerator::_getKeys" )
      INT32 _getKeys( vector<const CHAR *> &fieldNames,
                      const BSONObj &obj,
                      BSONObjSet &keys,
                      BSONElement *arrEle ) const
      {
         INT32 rc = SDB_OK ;
         PD_TRACE_ENTRY ( SDB__IXMKEYGEN__GETKEYS );
#define IXM_DEFAULT_FIELD_NUM 3
         BSONElement eleOnStack[IXM_DEFAULT_FIELD_NUM] ;
         BSONElement *keyEles = NULL ;
         const CHAR *arrEleName = NULL ;
         UINT32 arrElePos = 0 ;
         UINT32 eooNum = 0 ;

         if ( IXM_DEFAULT_FIELD_NUM < fieldNames.size() )
         {
            keyEles = new(std::nothrow) BSONElement[fieldNames.size()] ;
            if ( NULL == keyEles )
            {
               PD_LOG( PDERROR, "failed to allocalte mem." ) ;
               rc = SDB_OOM ;
               goto error ;
            }
         }
         else
         {
            keyEles = ( BSONElement* )eleOnStack ;
         }

         for ( UINT32 i = 0; i < fieldNames.size(); i++ )
         {
            const CHAR *name = fieldNames.at( i ) ;
            SDB_ASSERT( '\0' != name[0], "can not be empty" ) ;
            BSONElement &e = keyEles[i] ;
            e = obj.getFieldDottedOrArray( name ) ;
            if ( e.eoo() )
            {
               ++eooNum ;
               continue ;
            }
            else if ( Array == e.type() )
            {
               if ( !arrEle->eoo() )
               {
                  PD_LOG( PDERROR, "At most one array can be in the key:",
                          arrEle->fieldName(), e.fieldName() ) ;
                  rc = SDB_IXM_MULTIPLE_ARRAY ;
                  goto error ;
               }
               else
               {
                  *arrEle = e ;
                  arrEleName = name ;
                  arrElePos = i ;
               }
            }
            else
            {
               continue ;
            }
         }

         if ( fieldNames.size() == eooNum )
         {
            rc = SDB_OK ;
            goto done ;
         }
         else if ( !arrEle->eoo() )
         {
            rc = _genKeyWithArrayEle( keyEles, fieldNames.size(),
                                      arrEle,
                                      arrEleName, arrElePos,
                                      keys ) ;
            if ( SDB_OK != rc )
            {
               PD_LOG( PDERROR, "failed to gen keys with array element:%d", rc ) ;
               goto error ;
            }
         }
         else
         {
            rc = _genKeyWithNormalEle( keyEles, fieldNames.size(), keys ) ;
            if ( SDB_OK != rc )
            {
               PD_LOG( PDERROR, "failed to gen keys with normal element:%d", rc ) ;
               goto error ;
            }
         }
      done:
         if ( IXM_DEFAULT_FIELD_NUM < fieldNames.size() &&
              NULL != keyEles )
         {
            delete []keyEles ;
         }
         PD_TRACE_EXITRC ( SDB__IXMKEYGEN__GETKEYS, rc );
         return rc ;
      error:
         goto done ;
      }

      // PD_TRACE_DECLARE_FUNCTION ( SDB__IXMKEYGEN__GENKEYSWITHARRELE, "_ixmKeyGenerator::_genKeyWithArrayEle" )
      INT32 _genKeyWithArrayEle( BSONElement *keyEles,
                                 UINT32 eleNum,
                                 const BSONElement *arrElement,
                                 const CHAR *arrEleName,
                                 UINT32 arrElePos,
                                 BSONObjSet &keys ) const
      {
         PD_TRACE_ENTRY ( SDB__IXMKEYGEN__GENKEYSWITHARRELE );
         INT32 rc = SDB_OK ;
         BSONObj arrObj = arrElement->embeddedObject() ;
         if ( arrObj.firstElement().eoo() )
         {
            keyEles[arrElePos] = *arrElement ;
            rc = _genKeyWithNormalEle( keyEles, eleNum, keys ) ;
            if ( SDB_OK != rc )
            {
               goto error ;
            }
         }

         if ( '\0' == *arrEleName )
         {
            BSONObjIterator itr( arrObj ) ;
            BSONElement &e = keyEles[arrElePos] ;
            while ( itr.more() )
            {
               e = itr.next() ;
               rc = _genKeyWithNormalEle( keyEles, eleNum, keys ) ;
               if ( SDB_OK != rc )
               {
                  goto error ;
               }
            }
         }
         else
         {
            BSONObjIterator itr( arrObj ) ;
            while ( itr.more() )
            {
               const CHAR *dottedName = arrEleName ;
               BSONElement next = itr.next() ;
               if ( Object == next.type() )
               {
                  BSONElement e =
                     next.embeddedObject()
                     .getFieldDottedOrArray( dottedName ) ;
                  if ( Array == e.type() )
                  {
                     rc = _genKeyWithArrayEle(keyEles, eleNum,
                                              &e,
                                              dottedName, arrElePos,
                                              keys) ;
                     if ( SDB_OK != rc )
                     {
                        goto error ;
                     }
                     else
                     {
                        continue ;
                     }
                  }
                  else
                  {
                     keyEles[arrElePos] = e ;
                  }
               }
               else
               {
                  keyEles[arrElePos] = BSONElement() ;
               }

               rc = _genKeyWithNormalEle( keyEles, eleNum, keys ) ;
               if ( SDB_OK != rc )
               {
                  goto error ;
               }
            } 
         }
      done:
         PD_TRACE_EXITRC( SDB__IXMKEYGEN__GENKEYSWITHARRELE, rc ) ;
         return rc ;
      error:
         goto done ;
      }

      // PD_TRACE_DECLARE_FUNCTION ( SDB__IXMKEYGEN__GENKEYSWITHNORMALELE, "_ixmKeyGenerator::_genKeyWithNormalEle" )
      INT32 _genKeyWithNormalEle( BSONElement *keyELes,
                                  UINT32 eleNum,
                                  BSONObjSet &keys ) const
      {
         PD_TRACE_ENTRY ( SDB__IXMKEYGEN__GENKEYSWITHNORMALELE );
         INT32 rc = SDB_OK ;
         BSONObjBuilder builder ;
         for ( UINT32 i = 0; i < eleNum; i++ )
         {
            BSONElement &e = keyELes[i] ;
            if ( e.eoo() )
            {
               builder.appendAs( gUndefinedElt, "" ) ;
            }
            else
            {
               builder.appendAs( e, "" ) ;
            }
         }

         keys.insert( builder.obj() ) ;
         PD_TRACE_EXITRC ( SDB__IXMKEYGEN__GENKEYSWITHNORMALELE, rc );
         return rc ;
      }
   } ;
   typedef class _ixmKeyGenerator ixmKeyGenerator ;
   _ixmIndexKeyGen::_ixmIndexKeyGen ( const _ixmIndexCB *indexCB,
                                      IXM_KEYGEN_TYPE genType )
   {
      SDB_ASSERT ( indexCB, "details can't be NULL" ) ;
      _keyPattern = indexCB->keyPattern() ;
      _info = indexCB->_infoObj ;
      _type = indexCB->getIndexType() ;
      _keyGenType = genType ;
      _init() ;
   }
   _ixmIndexKeyGen::_ixmIndexKeyGen ( const BSONObj &keyDef,
                                      IXM_KEYGEN_TYPE genType )
   {
      _keyPattern = keyDef.copy () ;
      _type = IXM_EXTENT_TYPE_NONE ;
      _keyGenType = genType ;
      _init () ;
   }

   // PD_TRACE_DECLARE_FUNCTION ( SDB__IXMINXKEYGEN__INIT, "_ixmIndexKeyGen::_init" )
   void _ixmIndexKeyGen::_init()
   {
      PD_TRACE_ENTRY ( SDB__IXMINXKEYGEN__INIT );
      _nFields = _keyPattern.nFields () ;
      INT32 fieldNum = 0 ;
      {
         BSONObjIterator i(_keyPattern) ;
         while ( i.more())
         {
            BSONElement e = i.next() ;
            _fieldNames.push_back(e.fieldName()) ;
            _fixedElements.push_back(BSONElement()) ;
            ++fieldNum ;
         }
         _undefinedKey = ixmGetUndefineKeyObj( fieldNum ) ;
      }
      PD_TRACE_EXIT ( SDB__IXMINXKEYGEN__INIT );
   }

   INT32 _ixmIndexKeyGen::getKeys ( const BSONObj &obj, BSONObjSet &keys,
                                    BSONElement *pArrEle ) const
   {
      ixmKeyGenerator g (this) ;
      if ( pArrEle )
      {
         *pArrEle = BSONElement() ;
      }
      return g.getKeys ( obj, keys, pArrEle ) ;
   }

   static BOOLEAN anyElementNamesMatch( const BSONObj& a , const BSONObj& b )
   {
      BSONObjIterator x(a);
      while ( x.more() )
      {
         BSONElement e = x.next();
         BSONObjIterator y(b);
         while ( y.more() )
         {
            BSONElement f = y.next();
            FieldCompareResult res = compareDottedFieldNames( e.fieldName(),
                                                              f.fieldName()
                                                            ) ;
            if ( res == SAME || res == LEFT_SUBFIELD || res == RIGHT_SUBFIELD )
               return TRUE;
         }
      }
      return FALSE;
   }
   IndexSuitability ixmIndexKeyGen::suitability( const BSONObj &query ,
                                                 const BSONObj &order ) const
   {
      return _suitability( query , order );
   }

   IndexSuitability ixmIndexKeyGen::_suitability( const BSONObj& query ,
                                                  const BSONObj& order ) const
   {
       if ( anyElementNamesMatch( _keyPattern , query ) == 0 &&
            anyElementNamesMatch( _keyPattern , order ) == 0 )
          return USELESS;
       return HELPFUL;
   }
   
   // PD_TRACE_DECLARE_FUNCTION ( SDB_IXMINXKEYGEN, "ixmIndexKeyGen::reset" )
   INT32 ixmIndexKeyGen::reset ( const BSONObj & info )
   {
      INT32 rc = SDB_OK ;
      PD_TRACE_ENTRY ( SDB_IXMINXKEYGEN );
      _info = info ;
      try
      {
         _keyPattern = _info[IXM_KEY_FIELD].embeddedObjectUserCheck() ;
      }
      catch ( std::exception &e )
      {
         PD_LOG ( PDERROR, "Unable to locate valid key in index: %s",
                  e.what() ) ;
         rc = SDB_INVALIDARG ;
         goto error ;
      }
      if ( _keyPattern.objsize() == 0 )
      {
         PD_LOG ( PDERROR, "Empty key" ) ;
         rc = SDB_INVALIDARG ;
         goto error ;
      }
      _init() ;
   done :
      PD_TRACE_EXITRC ( SDB_IXMINXKEYGEN, rc );
      return rc ;
   error :
      goto done ;
   }
   INT32 ixmIndexKeyGen::reset ( const _ixmIndexCB *indexCB )
   {
      SDB_ASSERT ( indexCB, "details can't be NULL" ) ;
      return reset ( indexCB->_infoObj ) ;
   }
   BSONElement ixmIndexKeyGen::missingField() const
   {
      return gUndefinedElt ;
   }

   BOOLEAN _ixmIndexKeyGen::validateKeyDef ( const BSONObj &keyDef )
   {
      BSONObjIterator i ( keyDef ) ;
      INT32 count = 0 ;
      while ( i.more () )
      {
         ++count ;
         BSONElement ie = i.next () ;
         if ( ie.type() != NumberInt ||
              ( ie.numberInt() != -1 &&
                ie.numberInt() != 1 ) )
         {
            return FALSE ;
         }
      }
      return 0 != count ;
   }
}