Example #1
0
//return 1 if successful
//return 0 if no..
int proc_destroy(void * vinstance) {
     proc_instance_t * proc = (proc_instance_t*)vinstance;
     tool_print("query cnt %" PRIu64, proc->query_cnt);
     tool_print("increment cnt %" PRIu64, proc->increment_cnt);
     tool_print("decrement cnt %" PRIu64, proc->decrement_cnt);
     tool_print("output cnt %" PRIu64, proc->outcnt);

     //destroy table
     serialize_table(proc);
     stringhash5_destroy(proc->cntquery_table);

     //free dynamic allocations
     free(proc->sharelabel);
     free(proc->outfile);
     free(proc);

     return 1;
}
Example #2
0
int serialize_kvp(lua_sandbox* lsb, serialization_data* data, size_t parent)
{
    int kindex = -2, vindex = -1;

    if (ignore_value_type(lsb, data, vindex)) return 0;
    int result = serialize_data(lsb, kindex, &lsb->m_output);
    if (result != 0) return result;

    size_t pos = data->m_keys.m_pos;
    if (dynamic_snprintf(&data->m_keys, "%s[%s]", data->m_keys.m_data + parent,
                         lsb->m_output.m_data)) {
        return 1;
    }

    fprintf(data->m_fh, "%s = ", data->m_keys.m_data + pos);
    if (lua_type(lsb->m_lua, vindex) == LUA_TTABLE) {
        const void* ptr = lua_topointer(lsb->m_lua, vindex);
        table_ref* seen = find_table_ref(&data->m_tables, ptr);
        if (seen == NULL) {
            seen = add_table_ref(&data->m_tables, ptr, pos);
            if (seen != NULL) {
                data->m_keys.m_pos += 1;
                fprintf(data->m_fh, "{}\n");
                result = serialize_table(lsb, data, pos);
            } else {
                snprintf(lsb->m_error_message, ERROR_SIZE,
                         "preserve table out of memory");
                return 1;
            }
        } else {
            data->m_keys.m_pos = pos;
            fprintf(data->m_fh, "%s\n", data->m_keys.m_data + seen->m_name_pos);
        }
    } else {
        data->m_keys.m_pos = pos;
        result = serialize_data(lsb, vindex, &lsb->m_output);
        if (result == 0) {
            fprintf(data->m_fh, "%s\n", lsb->m_output.m_data);
        }
    }
    return result;
}
Example #3
0
bss_status_t sdb_serialize( sdb_table_t *tbl, bss_ctx_t *bss_ctx) {
    int r;
    switch( tbl->state) {
    case SDB_ST_READING:
        if( tbl->nwrittenobjects < tbl->ncolumns) return SDB_EOK; // empty table
        r = serialize_init( tbl, bss_ctx);
        if( r) return r;
        tbl->state = SDB_ST_SERIALIZING;
        /* fall through. */

    case SDB_ST_SERIALIZING:
        if( bss_ctx !=  tbl->serialization_ctx->bss_ctx) return BSS_EINVALID;
        r = serialize_table( tbl);
        if( SDB_EOK == r) tbl->state = SDB_ST_READING;
        return r;

    default:
        return SDB_EBADSTATE;
    }
}
//return 1 if successful
//return 0 if no..
int proc_destroy(void * vinstance) {
     proc_instance_t * proc = (proc_instance_t*)vinstance;
     tool_print("%" PRIu64" input buffers", proc->ibufs);
     tool_print("%" PRIu64" datums", proc->datums);
     if (proc->twolevel) {
          tool_print("%" PRIu64" outer keys", proc->outerkeys);
          tool_print("%" PRIu64" inner keys", proc->keys);
     }
     else {
          tool_print("%" PRIu64" uniq keys", proc->keys);
     }
     tool_print("%" PRIu64" Total Positives", proc->positive);
     tool_print("%" PRIu64" Total Negatives", proc->negative);
     tool_print("---- anomaly detection");
     tool_print("%" PRIu64" True Positives", proc->tp);
     tool_print("%" PRIu64" False Negatives", proc->fn);
     tool_print("%" PRIu64" False Positives", proc->fp);
     tool_print("%" PRIu64" True Negatives", proc->tn);
     if (proc->errors) {
          tool_print("%" PRIu64" Parsing Errors", proc->errors);
     }

     //destroy tables
     serialize_table(proc);
     stringhash5_destroy(proc->key_table);
     if (proc->twolevel) {
          serialize_outer_table(proc);
          stringhash5_destroy(proc->outer_table);
     }

     //free dynamic allocations
     free(proc->sharelabel);
     free(proc->sharelabel5);
     free(proc->outfile);
     free(proc->outer_file);
     free(proc);

     return 1;
}
static lsb_err_value
serialize_kvp(lsb_lua_sandbox *lsb, serialization_data *data, size_t parent)
{
  lsb_err_value ret = NULL;
  lua_CFunction fp = NULL;
  int kindex = -2, vindex = -1;
  if (ignore_value_type(lsb, data, vindex, &fp)) {
    return ret;
  }
  ret = serialize_data(lsb, kindex, &lsb->output);
  if (ret) {
    return ret;
  }

  size_t pos = data->keys.pos;
  ret = lsb_outputf(&data->keys, "%s[%s]", data->keys.buf + parent,
                    lsb->output.buf);
  if (ret) return ret;

  if (lua_type(lsb->lua, vindex) == LUA_TTABLE) {
    const void *ptr = lua_topointer(lsb->lua, vindex);
    table_ref *seen = find_table_ref(&data->tables, ptr);
    if (seen == NULL) {
      seen = add_table_ref(&data->tables, ptr, pos);
      if (seen != NULL) {
        data->keys.pos += 1;
        fprintf(data->fh, "%s = {}\n", data->keys.buf + pos);
        ret = serialize_table(lsb, data, pos);
      } else {
        snprintf(lsb->error_message, LSB_ERROR_SIZE,
                 "lsb_serialize preserve table out of memory");
        return LSB_ERR_UTIL_OOM;
      }
    } else {
      fprintf(data->fh, "%s = ", data->keys.buf + pos);
      data->keys.pos = pos;
      fprintf(data->fh, "%s\n", data->keys.buf + seen->name_pos);
    }
  } else if (lua_type(lsb->lua, vindex) == LUA_TUSERDATA) {
    void *ud = lua_touserdata(lsb->lua, vindex);
    table_ref *seen = find_table_ref(&data->tables, ud);
    if (seen == NULL) {
      seen = add_table_ref(&data->tables, ud, pos);
      if (seen != NULL) {
        data->keys.pos += 1;
        lua_pushlightuserdata(lsb->lua, data->keys.buf + pos);
        lua_pushlightuserdata(lsb->lua, &lsb->output);
        lsb->output.pos = 0;
        int result = fp(lsb->lua);
        lua_pop(lsb->lua, 2); // remove the key and the output
        if (!result) {
          size_t n = fwrite(lsb->output.buf, 1, lsb->output.pos, data->fh);
          if (n != lsb->output.pos) {
            snprintf(lsb->error_message, LSB_ERROR_SIZE,
                     "lsb_serialize failed %s", data->keys.buf + pos);
            return LSB_ERR_LUA;
          }
        }
      } else {
        snprintf(lsb->error_message, LSB_ERROR_SIZE,
                 "lsb_serialize out of memory %s", data->keys.buf + pos);
        return LSB_ERR_UTIL_OOM;
      }
    } else {
      fprintf(data->fh, "%s = ", data->keys.buf + pos);
      data->keys.pos = pos;
      fprintf(data->fh, "%s\n", data->keys.buf +
              seen->name_pos);
    }
  } else {
    fprintf(data->fh, "%s = ", data->keys.buf + pos);
    data->keys.pos = pos;
    ret = serialize_data(lsb, vindex, &lsb->output);
    if (!ret) {
      fprintf(data->fh, "%s\n", lsb->output.buf);
    }
  }
  return ret;
}
Example #6
0
void createProbingPT(const char * phrasetable_path, const char * target_path){
    //Get basepath and create directory if missing
    std::string basepath(target_path);
    mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);

    //Set up huffman and serialize decoder maps.
    Huffman huffmanEncoder(phrasetable_path); //initialize
    huffmanEncoder.assign_values();
    huffmanEncoder.produce_lookups();
    huffmanEncoder.serialize_maps(target_path);

    //Get uniq lines:
    unsigned long uniq_entries = huffmanEncoder.getUniqLines();

    //Source phrase vocabids
    std::map<uint64_t, std::string> source_vocabids;

    //Read the file
    util::FilePiece filein(phrasetable_path);

    //Init the probing hash table
    size_t size = Table::Size(uniq_entries, 1.2);
    char * mem = new char[size];
    memset(mem, 0, size);
    Table table(mem, size);

    BinaryFileWriter binfile(basepath); //Init the binary file writer.

    line_text prev_line; //Check if the source phrase of the previous line is the same

    //Keep track of the size of each group of target phrases
    uint64_t entrystartidx = 0;
    //uint64_t line_num = 0;


    //Read everything and processs
    while(true){
        try {
            //Process line read
            line_text line;
            line = splitLine(filein.ReadLine());
            //Add source phrases to vocabularyIDs
            add_to_map(&source_vocabids, line.source_phrase);

            if ((binfile.dist_from_start + binfile.extra_counter) == 0) {
                prev_line = line; //For the first iteration assume the previous line is
            } //The same as this one.

            if (line.source_phrase != prev_line.source_phrase){

                //Create a new entry even

                //Create an entry for the previous source phrase:
                Entry pesho;
                pesho.value = entrystartidx;
                //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
                pesho.key = 0;
                std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
                for (int i = 0; i < vocabid_source.size(); i++){
                    pesho.key += vocabid_source[i];
                }
                pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;

                //Put into table
                table.Insert(pesho);

                entrystartidx = binfile.dist_from_start + binfile.extra_counter; //Designate start idx for new entry

                //Encode a line and write it to disk.
                std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
                binfile.write(&encoded_line);

                //Set prevLine
                prev_line = line;

            } else{
                //If we still have the same line, just append to it:
                std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line);
                binfile.write(&encoded_line);
            }

        } catch (util::EndOfFileException e){
            std::cerr << "Reading phrase table finished, writing remaining files to disk." << std::endl;
            binfile.flush();

            //After the final entry is constructed we need to add it to the phrase_table
            //Create an entry for the previous source phrase:
            Entry pesho;
            pesho.value = entrystartidx;
            //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
            pesho.key = 0;
            std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase);
            for (int i = 0; i < vocabid_source.size(); i++){
                pesho.key += vocabid_source[i];
            }
            pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx;
            //Put into table
            table.Insert(pesho);

            break;
        }
    }

    serialize_table(mem, size, (basepath + "/probing_hash.dat").c_str());

    serialize_map(&source_vocabids, (basepath + "/source_vocabids").c_str());
    
    delete[] mem;

    //Write configfile
    std::ofstream configfile;
    configfile.open((basepath + "/config").c_str());
    configfile << uniq_entries << '\n';
    configfile.close();
}