//return 1 if successful //return 0 if no.. int proc_destroy(void * vinstance) { proc_instance_t * proc = (proc_instance_t*)vinstance; tool_print("query cnt %" PRIu64, proc->query_cnt); tool_print("increment cnt %" PRIu64, proc->increment_cnt); tool_print("decrement cnt %" PRIu64, proc->decrement_cnt); tool_print("output cnt %" PRIu64, proc->outcnt); //destroy table serialize_table(proc); stringhash5_destroy(proc->cntquery_table); //free dynamic allocations free(proc->sharelabel); free(proc->outfile); free(proc); return 1; }
int serialize_kvp(lua_sandbox* lsb, serialization_data* data, size_t parent) { int kindex = -2, vindex = -1; if (ignore_value_type(lsb, data, vindex)) return 0; int result = serialize_data(lsb, kindex, &lsb->m_output); if (result != 0) return result; size_t pos = data->m_keys.m_pos; if (dynamic_snprintf(&data->m_keys, "%s[%s]", data->m_keys.m_data + parent, lsb->m_output.m_data)) { return 1; } fprintf(data->m_fh, "%s = ", data->m_keys.m_data + pos); if (lua_type(lsb->m_lua, vindex) == LUA_TTABLE) { const void* ptr = lua_topointer(lsb->m_lua, vindex); table_ref* seen = find_table_ref(&data->m_tables, ptr); if (seen == NULL) { seen = add_table_ref(&data->m_tables, ptr, pos); if (seen != NULL) { data->m_keys.m_pos += 1; fprintf(data->m_fh, "{}\n"); result = serialize_table(lsb, data, pos); } else { snprintf(lsb->m_error_message, ERROR_SIZE, "preserve table out of memory"); return 1; } } else { data->m_keys.m_pos = pos; fprintf(data->m_fh, "%s\n", data->m_keys.m_data + seen->m_name_pos); } } else { data->m_keys.m_pos = pos; result = serialize_data(lsb, vindex, &lsb->m_output); if (result == 0) { fprintf(data->m_fh, "%s\n", lsb->m_output.m_data); } } return result; }
bss_status_t sdb_serialize( sdb_table_t *tbl, bss_ctx_t *bss_ctx) { int r; switch( tbl->state) { case SDB_ST_READING: if( tbl->nwrittenobjects < tbl->ncolumns) return SDB_EOK; // empty table r = serialize_init( tbl, bss_ctx); if( r) return r; tbl->state = SDB_ST_SERIALIZING; /* fall through. */ case SDB_ST_SERIALIZING: if( bss_ctx != tbl->serialization_ctx->bss_ctx) return BSS_EINVALID; r = serialize_table( tbl); if( SDB_EOK == r) tbl->state = SDB_ST_READING; return r; default: return SDB_EBADSTATE; } }
//return 1 if successful //return 0 if no.. int proc_destroy(void * vinstance) { proc_instance_t * proc = (proc_instance_t*)vinstance; tool_print("%" PRIu64" input buffers", proc->ibufs); tool_print("%" PRIu64" datums", proc->datums); if (proc->twolevel) { tool_print("%" PRIu64" outer keys", proc->outerkeys); tool_print("%" PRIu64" inner keys", proc->keys); } else { tool_print("%" PRIu64" uniq keys", proc->keys); } tool_print("%" PRIu64" Total Positives", proc->positive); tool_print("%" PRIu64" Total Negatives", proc->negative); tool_print("---- anomaly detection"); tool_print("%" PRIu64" True Positives", proc->tp); tool_print("%" PRIu64" False Negatives", proc->fn); tool_print("%" PRIu64" False Positives", proc->fp); tool_print("%" PRIu64" True Negatives", proc->tn); if (proc->errors) { tool_print("%" PRIu64" Parsing Errors", proc->errors); } //destroy tables serialize_table(proc); stringhash5_destroy(proc->key_table); if (proc->twolevel) { serialize_outer_table(proc); stringhash5_destroy(proc->outer_table); } //free dynamic allocations free(proc->sharelabel); free(proc->sharelabel5); free(proc->outfile); free(proc->outer_file); free(proc); return 1; }
static lsb_err_value serialize_kvp(lsb_lua_sandbox *lsb, serialization_data *data, size_t parent) { lsb_err_value ret = NULL; lua_CFunction fp = NULL; int kindex = -2, vindex = -1; if (ignore_value_type(lsb, data, vindex, &fp)) { return ret; } ret = serialize_data(lsb, kindex, &lsb->output); if (ret) { return ret; } size_t pos = data->keys.pos; ret = lsb_outputf(&data->keys, "%s[%s]", data->keys.buf + parent, lsb->output.buf); if (ret) return ret; if (lua_type(lsb->lua, vindex) == LUA_TTABLE) { const void *ptr = lua_topointer(lsb->lua, vindex); table_ref *seen = find_table_ref(&data->tables, ptr); if (seen == NULL) { seen = add_table_ref(&data->tables, ptr, pos); if (seen != NULL) { data->keys.pos += 1; fprintf(data->fh, "%s = {}\n", data->keys.buf + pos); ret = serialize_table(lsb, data, pos); } else { snprintf(lsb->error_message, LSB_ERROR_SIZE, "lsb_serialize preserve table out of memory"); return LSB_ERR_UTIL_OOM; } } else { fprintf(data->fh, "%s = ", data->keys.buf + pos); data->keys.pos = pos; fprintf(data->fh, "%s\n", data->keys.buf + seen->name_pos); } } else if (lua_type(lsb->lua, vindex) == LUA_TUSERDATA) { void *ud = lua_touserdata(lsb->lua, vindex); table_ref *seen = find_table_ref(&data->tables, ud); if (seen == NULL) { seen = add_table_ref(&data->tables, ud, pos); if (seen != NULL) { data->keys.pos += 1; lua_pushlightuserdata(lsb->lua, data->keys.buf + pos); lua_pushlightuserdata(lsb->lua, &lsb->output); lsb->output.pos = 0; int result = fp(lsb->lua); lua_pop(lsb->lua, 2); // remove the key and the output if (!result) { size_t n = fwrite(lsb->output.buf, 1, lsb->output.pos, data->fh); if (n != lsb->output.pos) { snprintf(lsb->error_message, LSB_ERROR_SIZE, "lsb_serialize failed %s", data->keys.buf + pos); return LSB_ERR_LUA; } } } else { snprintf(lsb->error_message, LSB_ERROR_SIZE, "lsb_serialize out of memory %s", data->keys.buf + pos); return LSB_ERR_UTIL_OOM; } } else { fprintf(data->fh, "%s = ", data->keys.buf + pos); data->keys.pos = pos; fprintf(data->fh, "%s\n", data->keys.buf + seen->name_pos); } } else { fprintf(data->fh, "%s = ", data->keys.buf + pos); data->keys.pos = pos; ret = serialize_data(lsb, vindex, &lsb->output); if (!ret) { fprintf(data->fh, "%s\n", lsb->output.buf); } } return ret; }
void createProbingPT(const char * phrasetable_path, const char * target_path){ //Get basepath and create directory if missing std::string basepath(target_path); mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); //Set up huffman and serialize decoder maps. Huffman huffmanEncoder(phrasetable_path); //initialize huffmanEncoder.assign_values(); huffmanEncoder.produce_lookups(); huffmanEncoder.serialize_maps(target_path); //Get uniq lines: unsigned long uniq_entries = huffmanEncoder.getUniqLines(); //Source phrase vocabids std::map<uint64_t, std::string> source_vocabids; //Read the file util::FilePiece filein(phrasetable_path); //Init the probing hash table size_t size = Table::Size(uniq_entries, 1.2); char * mem = new char[size]; memset(mem, 0, size); Table table(mem, size); BinaryFileWriter binfile(basepath); //Init the binary file writer. line_text prev_line; //Check if the source phrase of the previous line is the same //Keep track of the size of each group of target phrases uint64_t entrystartidx = 0; //uint64_t line_num = 0; //Read everything and processs while(true){ try { //Process line read line_text line; line = splitLine(filein.ReadLine()); //Add source phrases to vocabularyIDs add_to_map(&source_vocabids, line.source_phrase); if ((binfile.dist_from_start + binfile.extra_counter) == 0) { prev_line = line; //For the first iteration assume the previous line is } //The same as this one. if (line.source_phrase != prev_line.source_phrase){ //Create a new entry even //Create an entry for the previous source phrase: Entry pesho; pesho.value = entrystartidx; //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast pesho.key = 0; std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase); for (int i = 0; i < vocabid_source.size(); i++){ pesho.key += vocabid_source[i]; } pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx; //Put into table table.Insert(pesho); entrystartidx = binfile.dist_from_start + binfile.extra_counter; //Designate start idx for new entry //Encode a line and write it to disk. std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line); binfile.write(&encoded_line); //Set prevLine prev_line = line; } else{ //If we still have the same line, just append to it: std::vector<unsigned char> encoded_line = huffmanEncoder.full_encode_line(line); binfile.write(&encoded_line); } } catch (util::EndOfFileException e){ std::cerr << "Reading phrase table finished, writing remaining files to disk." << std::endl; binfile.flush(); //After the final entry is constructed we need to add it to the phrase_table //Create an entry for the previous source phrase: Entry pesho; pesho.value = entrystartidx; //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast pesho.key = 0; std::vector<uint64_t> vocabid_source = getVocabIDs(prev_line.source_phrase); for (int i = 0; i < vocabid_source.size(); i++){ pesho.key += vocabid_source[i]; } pesho.bytes_toread = binfile.dist_from_start + binfile.extra_counter - entrystartidx; //Put into table table.Insert(pesho); break; } } serialize_table(mem, size, (basepath + "/probing_hash.dat").c_str()); serialize_map(&source_vocabids, (basepath + "/source_vocabids").c_str()); delete[] mem; //Write configfile std::ofstream configfile; configfile.open((basepath + "/config").c_str()); configfile << uniq_entries << '\n'; configfile.close(); }