Пример #1
0
/* Search a q-gram database.
   `wdb' specifies the q-gram database object.
   `word' specifies the string of the word to be matched to.
   `np' specifies the pointer to the variable into which the number of elements of the return
   value is assigned.
   If successful, the return value is the pointer to an array of ID numbers of the corresponding
   records. */
static uint64_t *tcwdbsearchimpl(TCWDB *wdb, const char *word, int *np){
  assert(wdb && word && np);
  int wlen = strlen(word);
  if(wlen > WDBMAXWORDLEN){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    return NULL;
  }
  int vsiz;
  const char *vbuf = tcbdbget3(wdb->idx, word, wlen, &vsiz);
  if(!vbuf){
    vbuf = "";
    vsiz = 0;
  }
  uint64_t *res = tcmalloc(WDBRESUNIT * sizeof(*res));
  int rnum = 0;
  int ranum = WDBRESUNIT;
  while(vsiz > 0){
    int step;
    uint64_t id;
    TDREADVNUMBUF64(vbuf, id, step);
    vbuf += step;
    vsiz -= step;
    if(rnum >= ranum){
      ranum *= 2;
      res = tcrealloc(res, ranum * sizeof(*res));
    }
    res[rnum++] = id;
  }
  *np = rnum;
  return res;
}
Пример #2
0
/* Clear the cache of a word database object. */
bool tcwdbcacheclear(TCWDB *wdb){
  assert(wdb);
  if(!wdb->open){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    return false;
  }
  return tcbdbcacheclear(wdb->idx);
}
Пример #3
0
/* Unlock a method of the word database object.
   `bdb' specifies the word database object.
   If successful, the return value is true, else, it is false. */
static bool tcwdbunlockmethod(TCWDB *wdb){
  assert(wdb);
  if(pthread_rwlock_unlock(wdb->mmtx) != 0){
    tcbdbsetecode(wdb->idx, TCETHREAD, __FILE__, __LINE__, __func__);
    return false;
  }
  return true;
}
Пример #4
0
/* Close a word database object. */
bool tcwdbclose(TCWDB *wdb){
  assert(wdb);
  if(!tcwdblockmethod(wdb, true)) return false;
  if(!wdb->open){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return false;
  }
  bool rv = tcwdbcloseimpl(wdb);
  tcwdbunlockmethod(wdb);
  return rv;
}
Пример #5
0
/* Open a word database object. */
bool tcwdbopen(TCWDB *wdb, const char *path, int omode){
  assert(wdb && path);
  if(!tcwdblockmethod(wdb, true)) return false;
  if(wdb->open){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return false;
  }
  bool rv = tcwdbopenimpl(wdb, path, omode);
  tcwdbunlockmethod(wdb);
  return rv;
}
Пример #6
0
/* Set the maximum number of forward matching expansion of a word database object. */
bool tcwdbsetfwmmax(TCWDB *wdb, uint32_t fwmmax){
  assert(wdb);
  if(!tcwdblockmethod(wdb, true)) return false;
  if(wdb->open){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return false;
  }
  wdb->fwmmax = fwmmax;
  tcwdbunlockmethod(wdb);
  return true;
}
Пример #7
0
/* Synchronize updated contents of a word database object with the file and the device. */
bool tcwdbsync(TCWDB *wdb){
  assert(wdb);
  if(!tcwdblockmethod(wdb, true)) return false;
  if(!wdb->open || !wdb->cc){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return false;
  }
  bool err = false;
  if(!tcwdbmemsync(wdb, 2)) err = true;
  tcwdbunlockmethod(wdb);
  return !err;
}
Пример #8
0
/* Set the caching parameters of a word database object. */
bool tcwdbsetcache(TCWDB *wdb, int64_t icsiz, int32_t lcnum){
  assert(wdb);
  if(!tcwdblockmethod(wdb, true)) return false;
  if(wdb->open){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return false;
  }
  wdb->icsiz = (icsiz > 0) ? icsiz : WDBCCDEFICSIZ;
  wdb->lcnum = (lcnum > 0) ? lcnum : 0;
  tcwdbunlockmethod(wdb);
  return true;
}
Пример #9
0
/* Set the tuning parameters of a word database object. */
bool tcwdbtune(TCWDB *wdb, int64_t etnum, uint8_t opts){
  assert(wdb);
  if(!tcwdblockmethod(wdb, true)) return false;
  if(wdb->open){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return false;
  }
  wdb->etnum = (etnum > 0) ? etnum : WDBDEFETNUM;
  wdb->opts = opts;
  tcwdbunlockmethod(wdb);
  return true;
}
Пример #10
0
/* Copy the database file of a word database object. */
bool tcwdbcopy(TCWDB *wdb, const char *path){
  assert(wdb && path);
  if(!tcwdblockmethod(wdb, false)) return false;
  if(!wdb->open || !wdb->cc){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return false;
  }
  bool err = false;
  if(!tcwdbmemsync(wdb, 1)) err = true;
  if(!tcbdbcopy(wdb->idx, path)) err = true;
  tcwdbunlockmethod(wdb);
  return !err;
}
Пример #11
0
/* Store a record into a word database object. */
bool tcwdbput(TCWDB *wdb, int64_t id, const TCLIST *words){
  assert(wdb && id > 0 && words);
  if(!tcwdblockmethod(wdb, true)) return false;
  if(!wdb->open || !wdb->cc){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return false;
  }
  if(tcidsetcheck(wdb->dids, id) && !tcwdbmemsync(wdb, 0)){
    tcwdbunlockmethod(wdb);
    return false;
  }
  bool rv = tcwdbputimpl(wdb, id, words);
  tcwdbunlockmethod(wdb);
  return rv;
}
Пример #12
0
/* setcmpfunc */
JNIEXPORT jboolean JNICALL Java_tokyocabinet_BDB_setcmpfunc
(JNIEnv *env, jobject self, jint cmp){
  TCBDB *bdb = (TCBDB *)(intptr_t)(*env)->GetLongField(env, self, bdb_fid_ptr);
  TCCMPOP *cmpop = tcbdbcmpop(bdb);
  if(cmpop){
    (*env)->DeleteGlobalRef(env, cmpop->obj);
    tcfree(cmpop);
  }
  switch(cmp){
  case tokyocabinet_BDB_CMPLEXICAL: return tcbdbsetcmpfunc(bdb, tccmplexical, NULL);
  case tokyocabinet_BDB_CMPDECIMAL: return tcbdbsetcmpfunc(bdb, tccmpdecimal, NULL);
  case tokyocabinet_BDB_CMPINT32: return tcbdbsetcmpfunc(bdb, tccmpint32, NULL);
  case tokyocabinet_BDB_CMPINT64: return tcbdbsetcmpfunc(bdb, tccmpint64, NULL);
  default: break;
  }
  tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
  return false;
}
Пример #13
0
/* Open a word database object.
   `wdb' specifies the word database object.
   `path' specifies the path of the database file.
   `omode' specifies the connection mode.
   If successful, the return value is true, else, it is false. */
static bool tcwdbopenimpl(TCWDB *wdb, const char *path, int omode){
  assert(wdb && path);
  int bomode = BDBOREADER;
  if(omode & WDBOWRITER){
    bomode = BDBOWRITER;
    if(omode & WDBOCREAT) bomode |= BDBOCREAT;
    if(omode & WDBOTRUNC) bomode |= BDBOTRUNC;
    int64_t bnum = (wdb->etnum / WDBLMEMB) * 2 + 1;
    int bopts = 0;
    if(wdb->opts & WDBTLARGE) bopts |= BDBTLARGE;
    if(wdb->opts & WDBTDEFLATE) bopts |= BDBTDEFLATE;
    if(wdb->opts & WDBTBZIP) bopts |= BDBTBZIP;
    if(wdb->opts & WDBTTCBS) bopts |= BDBTTCBS;
    if(!tcbdbtune(wdb->idx, WDBLMEMB, WDBNMEMB, bnum, WDBAPOW, WDBFPOW, bopts)) return false;
    if(!tcbdbsetlsmax(wdb->idx, WDBLSMAX)) return false;
  }
  if(wdb->lcnum > 0){
    if(!tcbdbsetcache(wdb->idx, wdb->lcnum, wdb->lcnum / 4 + 1)) return false;
  } else {
    if(!tcbdbsetcache(wdb->idx, (omode & WDBOWRITER) ? WDBLCNUMW : WDBLCNUMR, WDBNCNUM))
      return false;
  }
  if(omode & WDBONOLCK) bomode |= BDBONOLCK;
  if(omode & WDBOLCKNB) bomode |= BDBOLCKNB;
  if(!tcbdbopen(wdb->idx, path, bomode)) return false;
  if((omode & WDBOWRITER) && tcbdbrnum(wdb->idx) < 1){
    memcpy(tcbdbopaque(wdb->idx), WDBMAGICDATA, strlen(WDBMAGICDATA));
  } else if(!(omode & WDBONOLCK) &&
            memcmp(tcbdbopaque(wdb->idx), WDBMAGICDATA, strlen(WDBMAGICDATA))){
    tcbdbclose(wdb->idx);
    tcbdbsetecode(wdb->idx, TCEMETA, __FILE__, __LINE__, __func__);
    return 0;
  }
  if(omode & WDBOWRITER){
    wdb->cc = tcmapnew2(WDBCCBNUM);
    wdb->dtokens = tcmapnew2(WDBDTKNBNUM);
    wdb->dids = tcidsetnew(WDBDIDSBNUM);
  }
  wdb->open = true;
  return true;
}
Пример #14
0
/* Search a word database. */
uint64_t *tcwdbsearch(TCWDB *wdb, const char *word, int *np){
  assert(wdb && word && np);
  if(!tcwdblockmethod(wdb, false)) return NULL;
  if(!wdb->open){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    tcwdbunlockmethod(wdb);
    return NULL;
  }
  if(wdb->cc && (tcmaprnum(wdb->cc) > 0 || tcmaprnum(wdb->dtokens) > 0)){
    tcwdbunlockmethod(wdb);
    if(!tcwdblockmethod(wdb, true)) return NULL;
    if(!tcwdbmemsync(wdb, 0)){
      tcwdbunlockmethod(wdb);
      return NULL;
    }
    tcwdbunlockmethod(wdb);
    if(!tcwdblockmethod(wdb, false)) return NULL;
  }
  uint64_t *rv = tcwdbsearchimpl(wdb, word, np);
  tcwdbunlockmethod(wdb);
  return rv;
}
Пример #15
0
/* Synchronize updating contents on memory of a word database object. */
bool tcwdbmemsync(TCWDB *wdb, int level){
  assert(wdb);
  if(!wdb->open || !wdb->cc){
    tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
    return false;
  }
  bool err = false;
  bool (*synccb)(int, int, const char *, void *) = wdb->synccb;
  void *syncopq = wdb->syncopq;
  bool (*addcb)(const char *, void *) = wdb->addcb;
  void *addopq = wdb->addopq;
  TCBDB *idx = wdb->idx;
  TCMAP *cc = wdb->cc;
  if(synccb && !synccb(0, 0, "started", syncopq)){
    tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
    return false;
  }
  if(tcmaprnum(cc) > 0){
    if(synccb && !synccb(0, 0, "getting tokens", syncopq)){
      tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
      return false;
    }
    int kn;
    const char **keys = tcmapkeys2(cc, &kn);
    if(synccb && !synccb(kn, 0, "sorting tokens", syncopq)){
      tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
      tcfree(keys);
      return false;
    }
    qsort(keys, kn, sizeof(*keys), (int(*)(const void *, const void *))tccmpwords);
    for(int i = 0; i < kn; i++){
      if(synccb && !synccb(kn, i + 1, "storing tokens", syncopq)){
        tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
        tcfree(keys);
        return false;
      }
      const char *kbuf = keys[i];
      int ksiz = strlen(kbuf);
      int vsiz;
      const char *vbuf = tcmapget(cc, kbuf, ksiz, &vsiz);
      if(!tcbdbputcat(idx, kbuf, ksiz, vbuf, vsiz)) err = true;
    }
    if(addcb){
      if(synccb && !synccb(0, 0, "storing keyword list", syncopq)){
        tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
        tcfree(keys);
        return false;
      }
      for(int i = 0; i < kn; i++){
        if(!addcb(keys[i], addopq)){
          tcfree(keys);
          return false;
        }
      }
    }
    tcfree(keys);
    tcmapclear(cc);
  }
  TCMAP *dtokens = wdb->dtokens;
  TCIDSET *dids = wdb->dids;
  if(tcmaprnum(dtokens) > 0){
    if(synccb && !synccb(0, 0, "getting deleted tokens", syncopq)){
      tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
      return false;
    }
    int kn;
    const char **keys = tcmapkeys2(dtokens, &kn);
    if(synccb && !synccb(kn, 0, "sorting deleted tokens", syncopq)){
      tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
      tcfree(keys);
      return false;
    }
    qsort(keys, kn, sizeof(*keys), (int(*)(const void *, const void *))tccmpwords);
    for(int i = 0; i < kn; i++){
      if(synccb && !synccb(kn, i + 1, "storing deleted tokens", syncopq)){
        tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
        tcfree(keys);
        return false;
      }
      const char *kbuf = keys[i];
      int ksiz = strlen(kbuf);
      int vsiz;
      const char *vbuf = tcbdbget3(idx, kbuf, ksiz, &vsiz);
      if(!vbuf) continue;
      char *nbuf = tcmalloc(vsiz + 1);
      char *wp = nbuf;
      const char *pv;
      while(vsiz > 0){
        pv = vbuf;
        int step;
        uint64_t id;
        TDREADVNUMBUF64(vbuf, id, step);
        vbuf += step;
        vsiz -= step;
        if(!tcidsetcheck(dids, id)){
          int len = vbuf - pv;
          memcpy(wp, pv, len);
          wp += len;
        }
      }
      int nsiz = wp - nbuf;
      if(nsiz > 0){
        if(!tcbdbput(idx, kbuf, ksiz, nbuf, nsiz)) err = true;
      } else {
        if(!tcbdbout(idx, kbuf, ksiz)) err = true;
      }
      tcfree(nbuf);
    }
    tcfree(keys);
    tcmapclear(dtokens);
    tcidsetclear(dids);
  }
  if(level > 0){
    if(synccb && !synccb(0, 0, "synchronizing database", syncopq)){
      tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
      return false;
    }
    if(!tcbdbmemsync(idx, level > 1)) err = true;
  }
  if(synccb && !synccb(0, 0, "finished", syncopq)){
    tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
    return false;
  }
  return !err;
}