Example #1
0
/* RTDB_tp_as_mapping.mp_ass_subscript */
static int
RTDB_SetItem(RTDB *self, PyObject *pykey, PyObject *pyvalue)
{
    void *key;
    int key_size;
    TCMAP *value;
    bool result;
    RDBBase *rdbbase = (RDBBase *)self;

    if (bytes_to_void(pykey, &key, &key_size)) {
        return -1;
    }
    if (pyvalue) {
        value = dict_to_tcmap(pyvalue);
        if (!value) {
            return -1;
        }
        Py_BEGIN_ALLOW_THREADS
        result = tcrdbtblput(rdbbase->rdb, key, key_size, value);
        Py_END_ALLOW_THREADS
        if (!result) {
            tcmapdel(value);
            set_rdb_error(rdbbase->rdb, NULL);
            return -1;
        }
        tcmapdel(value);
    }
    else {
Example #2
0
int 
xtcrdb_tblput(void* rdb, const char* pkey, void* cols)
{
        // XXX from sample code, I assume for safety...
        char pkbuf[256];
        int pksiz = sprintf(pkbuf, "%s", pkey);
        return tcrdbtblput(rdb, pkbuf, pksiz, cols);
}
Example #3
0
/* putimpl */
JNIEXPORT jboolean JNICALL Java_tokyotyrant_TCRDB_putimpl
(JNIEnv *env, jobject self, jbyteArray pkey, jobjectArray cols, jint dmode){
  if(!pkey || !cols){
    throwillarg(env);
    return false;
  }
  TCRDB *tcrdb = (TCRDB *)(intptr_t)(*env)->GetLongField(env, self, tcrdb_fid_ptr);
  jboolean ick;
  jbyte *kbuf = (*env)->GetByteArrayElements(env, pkey, &ick);
  if(!kbuf){
    throwoutmem(env);
    return false;
  }
  int ksiz = (*env)->GetArrayLength(env, pkey);
  jsize cnum = (*env)->GetArrayLength(env, cols);
  TCMAP *tcols = tcmapnew2(cnum + 1);
  cnum--;
  for(int i = 0; i < cnum; i += 2){
    jobject name = (*env)->GetObjectArrayElement(env, cols, i);
    jboolean icn;
    jbyte *nbuf = (*env)->GetByteArrayElements(env, name, &icn);
    if(!nbuf){
      throwoutmem(env);
      return false;
    }
    int nsiz = (*env)->GetArrayLength(env, name);
    jobject val = (*env)->GetObjectArrayElement(env, cols, i + 1);
    jboolean icv;
    jbyte *vbuf = (*env)->GetByteArrayElements(env, val, &icv);
    if(!vbuf){
      throwoutmem(env);
      return false;
    }
    int vsiz = (*env)->GetArrayLength(env, val);
    tcmapputkeep(tcols, nbuf, nsiz, vbuf, vsiz);
    if(icv) (*env)->ReleaseByteArrayElements(env, val, vbuf, JNI_ABORT);
    if(icn) (*env)->ReleaseByteArrayElements(env, name, nbuf, JNI_ABORT);
  }
  bool rv;
  switch(dmode){
  case 0:
    rv = tcrdbtblput(tcrdb, kbuf, ksiz, tcols);
    break;
  case 1:
    rv = tcrdbtblputkeep(tcrdb, kbuf, ksiz, tcols);
    break;
  case 2:
    rv = tcrdbtblputcat(tcrdb, kbuf, ksiz, tcols);
    break;
  default:
    rv = false;
    break;
  }
  tcmapdel(tcols);
  if(ick) (*env)->ReleaseByteArrayElements(env, pkey, kbuf, JNI_ABORT);
  return rv;
}
bool QCrawlerDB::storeRecord(QCrawlerRecord &rec) {
    QString url = rec.crawl_url().url();
    QString host = rec.crawl_url().host();
    QString url_md5 = md5_hash(url);
    QString parent_url_md5 = rec.crawl_url().parent_url_md5();

    int crawl_level = rec.crawl_url().crawl_level();
    QString anchor_text = rec.crawl_url().anchor_text();
    QString raw_html = rec.raw_html();
//    QString raw_title = rec.raw_title();
//    QString raw_content = rec.raw_content();
    QString raw_content_md5 = md5_hash(rec.raw_content());

    int links_size = rec.raw_sub_links().size();
    int download_time = rec.download_time();
    int last_modified = rec.last_modified();
    int loading_time =  rec.loading_time();

    TCMAP *cols = tcmapnew();
    tcmapput2(cols, "url", url.toUtf8().constData());
    tcmapput2(cols, "host", host.toUtf8().constData());
    tcmapput2(cols, "url_md5", url_md5.toUtf8().constData());
    tcmapput2(cols, "parent_url_md5", parent_url_md5.toUtf8().constData());
    tcmapput2(cols, "crawl_level", QByteArray::number(crawl_level).constData());
    tcmapput2(cols, "anchor_text", anchor_text.toUtf8().constData());
//    tcmapput2(cols, "raw_html", raw_html.toUtf8().constData());
//    tcmapput2(cols, "raw_title", raw_title.toUtf8().constData());
//    tcmapput2(cols, "raw_content", rec.raw_content().toUtf8().constData());
//    tcmapput2(cols, "raw_content_md5", md5_hash(rec.raw_content()).toUtf8().constData());

    tcmapput2(cols, "title", rec.title().toUtf8().constData());
    tcmapput2(cols, "content", rec.content().toUtf8().constData());

    tcmapput2(cols, "links_size", QByteArray::number(links_size).constData());
    tcmapput2(cols, "download_time", QByteArray::number(download_time).constData());
    tcmapput2(cols, "last_modified", QByteArray::number(last_modified).constData());
    tcmapput2(cols, "loading_time", QByteArray::number(loading_time).constData());

    bool status = true;
    if(!tcrdbtblput(record_db, url_md5.toUtf8().constData(), url_md5.toUtf8().size(), cols)){
        int ecode = tcrdbecode(record_db);
        fprintf(stderr, "store record put error: %s\n",  tcrdberrmsg(ecode));
        status = false;
    }
    tcmapdel(cols);

    // raw_html store
    if (!tcrdbput2(html_record_db, url_md5.toUtf8().constData(), raw_html.toUtf8().constData())) {
        int ecode = tcrdbecode(url_hash_db);
        fprintf(stderr, "update url status put error: %s\n",  tcrdberrmsg(ecode));
        return false;
    }

    return status;
}
Example #5
0
void put_cb(struct evhttp_request *req, struct evbuffer *evb, void *ctx)
{
    char *uri, *id, *data, *json, *key, *value;
    double lat, lng;
    int x, y;
    char buf[16];
    struct evkeyvalq args;
    struct json_object *jsobj;
    TCMAP *cols;

    if (rdb == NULL) {
        evhttp_send_error(req, 503, "database not connected");
        return;
    }
    uri = evhttp_decode_uri(req->uri);
    evhttp_parse_query(uri, &args);
    free(uri);

    argtof(&args, "lat", &lat, 0);
    argtof(&args, "lng", &lng, 0);
    id = (char *)evhttp_find_header(&args, "id");
    data = (char *)evhttp_find_header(&args, "data");
    
    if (id == NULL) {
        evhttp_send_error(req, 400, "id is required");
        evhttp_clear_headers(&args);
        return;
    }
    
    x = (lat * 10000) + 1800000;
    y = (lng * 10000) + 1800000;
      
    cols = tcmapnew();
    tcmapput2(cols, "data", data);
    sprintf(buf, "%d", x);
    tcmapput2(cols, "x", buf);
    sprintf(buf, "%d", y);
    tcmapput2(cols, "y", buf);
    sprintf(buf, "%f", lat);
    tcmapput2(cols, "lat", buf);
    sprintf(buf, "%f", lng);
    tcmapput2(cols, "lng", buf);
    
    jsobj = json_object_new_object();
    if (tcrdbtblput(rdb, id, strlen(id), cols)) {
        json_object_object_add(jsobj, "status", json_object_new_string("ok"));
    } else {
        db_status = tcrdbecode(rdb);
        db_error_to_json(db_status, jsobj);
    }
    
    tcmapdel(cols);

    finalize_json(req, evb, &args, jsobj);
}
Example #6
0
int QStore::process(QContentRecord &record)
{

    if (record.want_type == mimetype::image) {
        assert(need_media_db);
        const std::string &url_md5 = record.url_md5;
        std::string media_key = url_md5 + ".i";
        if (!tcrdbput(media_db, media_key.c_str(), media_key.size(), record.raw_content.c_str(), record.raw_content.size())) {
            int ecode = tcrdbecode(media_db);
            // TODO log
            LOG(ERROR) << "put media url " << record.url << " error " <<  tcrdberrmsg(ecode);
            return -1;
        }
    } else { // default text/html
        assert(need_html_db && need_record_db);
        std::string tmps;

        TCMAP *cols = tcmapnew();
        tcmapput2(cols, "url", record.url.c_str());
        tcmapput2(cols, "host", record.host.c_str());
        tcmapput2(cols, "url_md5", record.url_md5.c_str());
        tcmapput2(cols, "parent_url_md5", record.parent_url_md5.c_str());
        strtk::type_to_string(record.crawl_level, tmps);
        tcmapput2(cols, "crawl_level", tmps.c_str());
        strtk::type_to_string(record.find_time, tmps);
        tcmapput2(cols, "find_time", tmps.c_str());
        tcmapput2(cols, "anchor_text", record.anchor_text.c_str());
        tcmapput2(cols, "crawl_tag", record.crawl_tag.c_str());
        // last download_time
        strtk::type_to_string(record.download_time, tmps);
        tcmapput2(cols, "download_time", tmps.c_str());
        strtk::type_to_string(record.http_code, tmps);
        tcmapput2(cols, "http_code", tmps.c_str());
        if (record.is_list) {
            tcmapput2(cols, "is_list", "1");
        } else {
            tcmapput2(cols, "is_list", "0");
        }

        if (record.crawled_okay) {
            tcmapput2(cols, "crawled_okay", "1");
            tcmapput2(cols, "raw_title", record.raw_title.c_str());
            tcmapput2(cols, "title", record.title.c_str());
            tcmapput2(cols, "keywords", record.keywords.c_str());
            tcmapput2(cols, "description", record.description.c_str());
            tcmapput2(cols, "content", record.content.c_str());
            strtk::type_to_string(record.publish_time, tmps);
            tcmapput2(cols, "publish_time", tmps.c_str());
            tcmapput2(cols, "images", record.images.c_str());
            if (record.is_redirect) {
                tcmapput2(cols, "is_redirect", "1");
            } else {
                tcmapput2(cols, "is_redirect", "0");
            }
            tcmapput2(cols, "redirect_url", record.redirect_url.c_str());
            strtk::type_to_string(record.content_confidence, tmps);
            tcmapput2(cols, "content_confidence", tmps.c_str());
            strtk::type_to_string(record.list_confidence, tmps);
            tcmapput2(cols, "list_confidence", tmps.c_str());
            strtk::type_to_string(record.links_size, tmps);
            tcmapput2(cols, "links_size", tmps.c_str());

            strtk::type_to_string(record.last_modified, tmps);
            tcmapput2(cols, "last_modified", tmps.c_str());
            strtk::type_to_string(record.loading_time, tmps);
            tcmapput2(cols, "loading_time", tmps.c_str());

            strtk::type_to_string(record.new_links_size, tmps);
            tcmapput2(cols, "new_links_size", tmps.c_str());
        } else {
            tcmapput2(cols, "crawled_okay", "0");
        }

        if(!tcrdbtblput(record_db, record.url_md5.c_str(), record.url_md5.size(), cols)){
            int ecode = tcrdbecode(record_db);
            LOG(ERROR) << "put record error " <<  tcrdberrmsg(ecode);
            tcmapdel(cols);
            return -1;
        }
        tcmapdel(cols);

        std::string url_md5_vdom = record.url_md5 + ".v";

        memcached_return_t rc = memcached_set(html_memc, url_md5_vdom.c_str(), url_md5_vdom.size(),
                record.vdom.c_str(), record.vdom.size(), 0, 0);
        if (rc != MEMCACHED_SUCCESS) {
            LOG(ERROR) << "put vdom url " << record.url << " size: " << record.vdom.size()
                       << " error: " << memcached_strerror(html_memc, rc);
            return -1;
        }
/*
        if (!tcrdbput(html_db, url_md5_vdom.c_str(), url_md5_vdom.size(), record.vdom.c_str(), record.vdom.size())) {
            int ecode = tcrdbecode(html_db);
            // TODO log
            LOG(ERROR) << "put vdom url " << record.url << " size: " << record.vdom.size() << " error " <<  tcrdberrmsg(ecode);
            return -1;
        }
*/
    }

    return 0;
}