Exemple #1
0
static IndexBuilder *ib_init_index_builder(ib_builder_ctx_t *pctx){
    IndexBuilder *pb = NULL;
    pb = IndexBuilder::getInstance();
    if(!pb){
        return NULL;
    }

    pb->setMaxDocNum(pctx->p_ibctx->doc_total);

    uint32_t i;
    int ret = 0;
    for(i = 0; i<pctx->conf->index_count;i++){
        if(pctx->conf->index[i].bind_to){
            continue;
        }  
        if(pctx->conf->index[i].package_only){
            continue;
        }
        ret = pb->addField(pctx->conf->index[i].name,
                (pctx->conf->index[i].index_type == FIELD_TYPE_TEXT)?TEXT_OCC_COUNT:0);
        if(ret<0){
            fprintf(stderr,"addField() failed. Field:%s\n",pctx->conf->index[i].name);
            return NULL;
        }
    }

    char path[PATH_MAX];
    snprintf(path,sizeof(path),"%s/index/",pctx->p_ibctx->output_dir);
    ret = pb->open(path);
    if(ret<0){
        pb = NULL;
        return NULL;
    }
    return pb;
}
Exemple #2
0
/**
 * 初始化 全量index 的 builder, 重新打开, 为了修改
 *
 * @param path     数据存放路径
 *
 * @return  0: success ;   -1: 程序处理失败
 */
int initIdxBuilder(const char * path)
{
    IndexBuilder * builder = IndexBuilder::getInstance();

    if ( NULL == builder )
    {
        TERR("IndexBuilder instance is null");
        return -1;
    }

    if ( (NULL == path) || strlen( path ) <= 0 )
    {
        TERR("index node's path attribute is null");
        return -1;
    }

    TLOG("begin to load full index! path:%s", path);

    if ( builder->reopen( path ) < 0)
    {
        TERR("load full index failed! path:%s", path);
        return -1;
    }

    TLOG("load full index success!");

    return 0;
}
Exemple #3
0
 void IndexBuilder::restoreIndexes(const std::string& ns, const std::vector<BSONObj>& indexes) {
     log() << "restarting " << indexes.size() << " index build(s)" << endl;
     for (int i = 0; i < static_cast<int>(indexes.size()); i++) {
         IndexBuilder* indexBuilder = new IndexBuilder(ns, indexes[i]);
         // This looks like a memory leak, but indexBuilder deletes itself when it finishes
         indexBuilder->go();
     }
 }
Exemple #4
0
 void IndexBuilder::restoreIndexes(OperationContext* txn, const std::vector<BSONObj>& indexes) {
     log() << "restarting " << indexes.size() << " background index build(s)" << endl;
     for (int i = 0; i < static_cast<int>(indexes.size()); i++) {
         IndexBuilder* indexBuilder = new IndexBuilder(indexes[i]);
         // This looks like a memory leak, but indexBuilder deletes itself when it finishes
         indexBuilder->go();
         Lock::TempRelease release(txn->lockState());
         IndexBuilder::waitForBgIndexStarting();
     }
 }
Exemple #5
0
    /** @param fromRepl false if from ApplyOpsCmd
        @return true if was and update should have happened and the document DNE.  see replset initial sync code.
     */
    bool applyOperation_inlock(OperationContext* txn,
                               Database* db,
                               const BSONObj& op,
                               bool fromRepl,
                               bool convertUpdateToUpsert) {
        LOG(3) << "applying op: " << op << endl;
        bool failedUpdate = false;

        OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters;

        const char *names[] = { "o", "ns", "op", "b", "o2" };
        BSONElement fields[5];
        op.getFields(5, names, fields);
        BSONElement& fieldO = fields[0];
        BSONElement& fieldNs = fields[1];
        BSONElement& fieldOp = fields[2];
        BSONElement& fieldB = fields[3];
        BSONElement& fieldO2 = fields[4];

        BSONObj o;
        if( fieldO.isABSONObj() )
            o = fieldO.embeddedObject();

        const char *ns = fieldNs.valuestrsafe();

        BSONObj o2;
        if (fieldO2.isABSONObj())
            o2 = fieldO2.Obj();

        bool valueB = fieldB.booleanSafe();

        txn->lockState()->assertWriteLocked(ns);

        Collection* collection = db->getCollection( txn, ns );
        IndexCatalog* indexCatalog = collection == NULL ? NULL : collection->getIndexCatalog();

        // operation type -- see logOp() comments for types
        const char *opType = fieldOp.valuestrsafe();

        if ( *opType == 'i' ) {
            opCounters->gotInsert();

            const char *p = strchr(ns, '.');
            if ( p && nsToCollectionSubstring( p ) == "system.indexes" ) {
                if (o["background"].trueValue()) {
                    IndexBuilder* builder = new IndexBuilder(o);
                    // This spawns a new thread and returns immediately.
                    builder->go();
                }
                else {
                    IndexBuilder builder(o);
                    Status status = builder.buildInForeground(txn, db);
                    if ( status.isOK() ) {
                        // yay
                    }
                    else if ( status.code() == ErrorCodes::IndexOptionsConflict ||
                              status.code() == ErrorCodes::IndexKeySpecsConflict ) {
                        // SERVER-13206, SERVER-13496
                        // 2.4 (and earlier) will add an ensureIndex to an oplog if its ok or not
                        // so in 2.6+ where we do stricter validation, it will fail
                        // but we shouldn't care as the primary is responsible
                        warning() << "index creation attempted on secondary that conflicts, "
                                  << "skipping: " << status;
                    }
                    else {
                        uassertStatusOK( status );
                    }
                }
            }
            else {
                // do upserts for inserts as we might get replayed more than once
                OpDebug debug;
                BSONElement _id;
                if( !o.getObjectID(_id) ) {
                    /* No _id.  This will be very slow. */
                    Timer t;

                    const NamespaceString requestNs(ns);
                    UpdateRequest request(txn, requestNs);

                    request.setQuery(o);
                    request.setUpdates(o);
                    request.setUpsert();
                    request.setFromReplication();
                    UpdateLifecycleImpl updateLifecycle(true, requestNs);
                    request.setLifecycle(&updateLifecycle);

                    update(db, request, &debug);

                    if( t.millis() >= 2 ) {
                        RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl;
                    }
                }
                else {
                    // probably don't need this since all replicated colls have _id indexes now
                    // but keep it just in case
                    RARELY if ( indexCatalog
                                 && !collection->isCapped()
                                 && !indexCatalog->haveIdIndex(txn) ) {
                        try {
                            Helpers::ensureIndex(txn, collection, BSON("_id" << 1), true, "_id_");
                        }
                        catch (const DBException& e) {
                            warning() << "Ignoring error building id index on " << collection->ns()
                                      << ": " << e.toString();
                        }
                    }

                    /* todo : it may be better to do an insert here, and then catch the dup key exception and do update
                              then.  very few upserts will not be inserts...
                              */
                    BSONObjBuilder b;
                    b.append(_id);

                    const NamespaceString requestNs(ns);
                    UpdateRequest request(txn, requestNs);

                    request.setQuery(b.done());
                    request.setUpdates(o);
                    request.setUpsert();
                    request.setFromReplication();
                    UpdateLifecycleImpl updateLifecycle(true, requestNs);
                    request.setLifecycle(&updateLifecycle);

                    update(db, request, &debug);
                }
            }
        }
Exemple #6
0
    /** @param fromRepl false if from ApplyOpsCmd
        @return true if was and update should have happened and the document DNE.  see replset initial sync code.
     */
    bool applyOperation_inlock(const BSONObj& op, bool fromRepl, bool convertUpdateToUpsert) {
        LOG(3) << "applying op: " << op << endl;
        bool failedUpdate = false;

        OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters;

        const char *names[] = { "o", "ns", "op", "b" };
        BSONElement fields[4];
        op.getFields(4, names, fields);

        BSONObj o;
        if( fields[0].isABSONObj() )
            o = fields[0].embeddedObject();
            
        const char *ns = fields[1].valuestrsafe();

        Lock::assertWriteLocked(ns);

        NamespaceDetails *nsd = nsdetails(ns);

        // operation type -- see logOp() comments for types
        const char *opType = fields[2].valuestrsafe();

        if ( *opType == 'i' ) {
            opCounters->gotInsert();

            const char *p = strchr(ns, '.');
            if ( p && strcmp(p, ".system.indexes") == 0 ) {
                if (o["background"].trueValue()) {
                    IndexBuilder* builder = new IndexBuilder(ns, o);
                    // This spawns a new thread and returns immediately.
                    builder->go();
                }
                else {
                    IndexBuilder builder(ns, o);
                    // Finish the foreground build before returning
                    builder.build();
                }
            }
            else {
                // do upserts for inserts as we might get replayed more than once
                OpDebug debug;
                BSONElement _id;
                if( !o.getObjectID(_id) ) {
                    /* No _id.  This will be very slow. */
                    Timer t;

                    const NamespaceString requestNs(ns);
                    UpdateRequest request(
                        requestNs, debug,
                        QueryPlanSelectionPolicy::idElseNatural());

                    request.setQuery(o);
                    request.setUpdates(o);
                    request.setUpsert();
                    request.setFromReplication();

                    update(request);

                    if( t.millis() >= 2 ) {
                        RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl;
                    }
                }
                else {
                    // probably don't need this since all replicated colls have _id indexes now
                    // but keep it just in case
                    RARELY if ( nsd && !nsd->isCapped() ) { ensureHaveIdIndex(ns, false); }

                    /* todo : it may be better to do an insert here, and then catch the dup key exception and do update
                              then.  very few upserts will not be inserts...
                              */
                    BSONObjBuilder b;
                    b.append(_id);

                    const NamespaceString requestNs(ns);
                    UpdateRequest request(
                        requestNs, debug,
                        QueryPlanSelectionPolicy::idElseNatural());

                    request.setQuery(b.done());
                    request.setUpdates(o);
                    request.setUpsert();
                    request.setFromReplication();

                    update(request);
                }
            }
        }
Exemple #7
0
int ib_build(ib_builder_ctx_t *pctx){
    int ret = 0;
    
    ib_build_queue_count = 0;
    ib_build_queue_ptr = 0;
    ret = ib_build_create_queue(pctx);
    if(ret!=0){
        fprintf(stderr,"ib_build_create_queue() failed.\n");
        return -1;
    }

    ib_build_thread_ctx_t *ctxs = NULL;
    ctxs = (ib_build_thread_ctx_t*)malloc(sizeof(ib_build_thread_ctx_t)*pctx->p_ibctx->thread_count);
    if(!ctxs){
        fprintf(stderr,"Out of memory.\n");
        return -1;
    }
    memset(ctxs,0,sizeof(ib_build_thread_ctx_t)*pctx->p_ibctx->thread_count);

    IndexBuilder *pib = NULL;
    if (pctx->p_ibctx->mode != MODE_DETAIL) {
        pib = ib_init_index_builder(pctx);
        if(!pib){
            fprintf(stderr,"ib_init_index_builder() failed.\n");
            free(ctxs);
            return -1;
        }
    }


    uint32_t i;
    for(i = 0; i < pctx->p_ibctx->thread_count; i++){

        ctxs[i].p_builder_ctx=pctx;
        ret = pthread_create(&ctxs[i].tid,NULL,ib_build_thread,&ctxs[i]);
        if(ret!=0){
            fprintf(stderr,"pthread_create() failed\n");
            return -1;
        }
    }

    int int_ret_val = 0;
    void *ret_val = NULL;
    for(i = 0; i < pctx->p_ibctx->thread_count; i++){
        ret_val = NULL;
        ret = pthread_join(ctxs[i].tid,&ret_val);
        if(ret!=0){
            fprintf(stderr,"pthread_join() failed.\n");
            continue;
        }
        if(ctxs[i].retval!=0){
            int_ret_val = -1;
        }
    }

    if (pctx->p_ibctx->mode != MODE_DETAIL) {
        ret = pib->dump();
        if(ret<0){
            fprintf(stderr,"IndexBuilder dump() failed.\n");
            return -1;
        }
        pib->close();
    }
    return int_ret_val;    
}
Exemple #8
0
static int ib_do_build_index_str(bq_node_t *pnode,ib_build_thread_ctx_t *pctx, uint32_t split_part){
    //IndexBuilder *pib = ib_init_index_builder(pctx->p_builder_ctx);
    IndexBuilder *pib = IndexBuilder::getInstance();
    if(!pib){
        return -1;
    }

    ib_num_sn_t *plist = NULL;
    uint64_t list_count = 0;
    uint64_t list_size = IB_NUM_INIT_SIZE;
    plist = (ib_num_sn_t*)malloc(sizeof(ib_num_sn_t)*list_size);
    if(!plist){
        return -1;
    }
    uint32_t docid = 0;
    uint32_t file_index = 0;
    gzFile fp = NULL;
    char filename[PATH_MAX];
    uint32_t term_count = 0;
    int failed = 0;
    uint32_t i = 0;
    int ret = 0;
    for(file_index = 0; file_index<pctx->p_builder_ctx->p_ibctx->thread_count; file_index++){
        snprintf(filename,sizeof(filename),pnode->fn_pattern,file_index,split_part);
//        fprintf(stderr,"OPENING:%s, DOCID=%u\n",filename,docid);
        fp = gzopen(filename,"rb");
        if(!fp){
            fprintf(stderr,"gzopen(\"%s\") failed,\n",filename);
            free(plist);
            plist = NULL;
            return -1;
        }
        while(1){
            ret = gzread(fp,&term_count,sizeof(term_count));
            if(ret==0){
                break;
            }
            if(ret!=sizeof(term_count)){
                fprintf(stderr,"file broken! 0\n");
                failed = 1;
                break;
            }
            if(list_count + term_count >= list_size) {
                ib_num_sn_t *ptmplist = plist;
                while (list_size < list_count + term_count) {
                    list_size*=2;
                }
                plist=(ib_num_sn_t*) realloc(ptmplist, list_size*sizeof(ib_num_sn_t));
                if(!plist){
                    fprintf(stderr,"Out of memory!\n");
                    return -1;
                }
            }
            ib_num_t terms[term_count];
            if (gzread(fp, terms, sizeof(terms)) != (int)sizeof(terms)) {
                failed = 1;
                break;
            }
            for(i=0;i<term_count;i++){
                plist[list_count].docid = docid;
                plist[list_count].node = terms[i];
                ++list_count;
            }
            ++docid;
        }
        gzclose(fp);
        fp = NULL;
        if(failed){
            free(plist);
            plist = NULL;
            return -1;
        }
    }

    if(!list_count){
        free(plist);
        plist = NULL;
        return 0;
    }
    
    qsort(plist,list_count,sizeof(ib_num_sn_t),ib_num_sn_compare);

    uint32_t *pdoclist = NULL;
    uint64_t doclist_count = 0;
    uint64_t doclist_size = IB_NUM_INIT_SIZE;
    pdoclist = (uint32_t*)malloc(sizeof(uint32_t)*doclist_size);
    if(!pdoclist){
        free(plist);
        plist = NULL;
        return -1;
    }
    uint64_t sign;
    uint64_t last_sign = plist[0].node.sign;
    doclist_count = 1;
    pdoclist[0] = plist[0].docid;
    uint32_t ii;

    for(i = 1; i<list_count; i++){
        sign = plist[i].node.sign;
        if(sign!=last_sign){
            //call addTerm
            ret = pib->addTerm(pnode->field_name,last_sign,pdoclist,doclist_count);
            if(ret<=0){
                fprintf(stderr,"addTerm failed no occ.\n");
                fprintf(stderr,"sign=%lu doclist_count = %lu\n",last_sign,doclist_count);
                for(ii=0;ii<doclist_count;ii++){
                    fprintf(stderr,"%d ",pdoclist[ii]);
                }
                fprintf(stderr,"\n\n");
            }
//            fprintf(stderr,"field:%s sign:%llu count:%u\n",pnode->field_name,last_sign,doclist_count);

            doclist_count = 0;
            last_sign = sign;
        }
        if(doclist_count>=doclist_size){
            doclist_size*=2;
            uint32_t * newDoclist = (uint32_t*)realloc(pdoclist,sizeof(uint32_t)*doclist_size);
            if (newDoclist == NULL) {
                fprintf(stderr, "realloc memory failed:%d\n", __LINE__);
                break;
            }
            else {
                pdoclist = newDoclist;           
            }
        }
        pdoclist[doclist_count] = plist[i].docid;
        doclist_count++;
    }

    if(doclist_count>0){
//        fprintf(stderr,"call addTerm. last_sign=%llu\n",last_sign);
        ret = pib->addTerm(pnode->field_name,last_sign,pdoclist,doclist_count);
        if(ret<=0){
            fprintf(stderr,"addTerm failed. line:%d\n",__LINE__);
            fprintf(stderr,"sign=%lu doclist_count = %lu\n",last_sign,doclist_count);
            for(ii=0;ii<doclist_count;ii++){
                fprintf(stderr,"%d ",pdoclist[ii]);
            }
            fprintf(stderr,"\n\n");
        }
    }
    free(pdoclist);
    pdoclist=NULL;
    free(plist);
    plist = NULL;
    fprintf(stderr,"Index field:%s . %u docs, %lu nodes\n",pnode->field_name,docid,list_count);
    return 0;
}