void Server::serve (const char * address) { POMAGMA_INFO("Starting server"); zmq::context_t context(1); zmq::socket_t socket(context, ZMQ_REP); socket.bind(address); while (true) { POMAGMA_DEBUG("waiting for request"); zmq::message_t raw_request; socket.recv(& raw_request); POMAGMA_DEBUG("parsing request"); messaging::AnalystRequest request; request.ParseFromArray(raw_request.data(), raw_request.size()); messaging::AnalystResponse response = handle(* this, request); POMAGMA_DEBUG("serializing response"); std::string response_str; response.SerializeToString(& response_str); const size_t size = response_str.length(); zmq::message_t raw_response(size); memcpy(raw_response.data(), response_str.c_str(), size); POMAGMA_DEBUG("sending response"); socket.send(raw_response); } }
void Server::execute(const std::string& program) { POMAGMA_DEBUG("parsing program"); vm::ProgramParser parser; parser.load(m_structure.signature()); std::istringstream istream(program); const auto listings = parser.parse(istream); POMAGMA_DEBUG("executing " << listings.size() << " listings"); vm::VirtualMachine virtual_machine; virtual_machine.load(m_structure.signature()); for (const auto& listing : listings) { vm::Program program = parser.find_program(listing); virtual_machine.execute(program); } }
std::vector<float> Router::measure_probs(float reltol) const { POMAGMA_INFO("Measuring ob probs"); const size_t item_count = m_carrier.item_count(); std::vector<float> probs(1 + item_count, 0); const float max_increase = 1.0 + reltol; bool changed = true; while (changed) { changed = false; POMAGMA_DEBUG("accumulating route probabilities"); // The following three cannot be mixed: openmp, gcc, fork. // see http://bisqwit.iki.fi/story/howto/openmp/#OpenmpAndFork //# pragma omp parallel for schedule(dynamic, 1) for (size_t i = 0; i < item_count; ++i) { Ob ob = 1 + i; float prob = 0; for (const Segment& segment : iter_val(ob)) { prob += get_prob(segment, probs); } if (prob > probs[ob] * max_increase) { //#pragma omp atomic changed = true; } probs[ob] = prob; // relaxed memory order } } return probs; }
void Router::update_probs(std::vector<float>& probs, float reltol) const { POMAGMA_INFO("Updating ob probs"); const size_t item_count = m_carrier.item_count(); POMAGMA_ASSERT_EQ(probs.size(), 1 + item_count); const float max_increase = 1.0 + reltol; bool changed = true; while (changed) { changed = false; POMAGMA_DEBUG("accumulating route probabilities"); #pragma omp parallel for schedule(dynamic, 1) for (size_t i = 0; i < item_count; ++i) { Ob ob = 1 + i; float& prob = probs[ob]; float temp_prob = 0; for (const Segment& segment : iter_val(ob)) { temp_prob += get_prob(segment, probs); } if (temp_prob > prob * max_increase) { changed = true; } prob = temp_prob; } } }
void SymmetricFunction::validate() const { POMAGMA_INFO("Validating SymmetricFunction"); m_lines.validate(); POMAGMA_DEBUG("validating line-value consistency"); for (size_t i = 1; i <= item_dim(); ++i) for (size_t j = i; j <= item_dim(); ++j) { auto val_iter = m_values.find(assert_sorted_pair(i, j)); if (not(support().contains(i) and support().contains(j))) { POMAGMA_ASSERT(val_iter == m_values.end(), "found unsupported lhs, rhs: " << i << ',' << j); } else if (val_iter != m_values.end()) { POMAGMA_ASSERT(defined(i, j), "found undefined value: " << i << ',' << j); Ob val = val_iter->second; POMAGMA_ASSERT(val, "found zero value: " << i << ',' << j); POMAGMA_ASSERT(support().contains(val), "found unsupported value: " << i << ',' << j); } else { POMAGMA_ASSERT(not defined(i, j), "found defined null value: " << i << ',' << j); } } }
std::unordered_map<std::string, float> Server::fit_language( const Corpus::Histogram& histogram) { Router router(m_structure.signature(), m_language); router.fit_language(histogram.symbols, histogram.obs); m_language = router.get_language(); POMAGMA_DEBUG("Language:") std::map<std::string, float> language(m_language.begin(), m_language.end()); for (auto pair : language) { POMAGMA_DEBUG("\t" << pair.first << "\t" << pair.second); } m_probs = router.measure_probs(); m_routes = router.find_routes(); return m_language; }
size_t batch_simplify( Structure & structure, const std::vector<std::string> & routes, const char * source_file, const char * destin_file) { POMAGMA_INFO("simplifying expressions"); POMAGMA_ASSERT( std::string(source_file) != std::string(destin_file), "source and destin cannot be the same"); SimplifyParser parser(structure.signature(), routes); std::ofstream destin(destin_file); POMAGMA_ASSERT(destin, "failed to open " << destin_file); destin << "# expressions simplifed by pomagma\n"; size_t line_count = 0; for (LineParser iter(source_file); iter.ok(); iter.next()) { const std::string & expression = * iter; POMAGMA_DEBUG("simplifying " << expression); // simplify relations like EQUAL I APP APP S K K parser.begin(expression); std::string type = parser.parse_token(); SimplifyTerm lhs = parser.parse_term(); SimplifyTerm rhs = parser.parse_term(); parser.end(); destin << type << " " << lhs.route << " " << rhs.route << "\n"; ++line_count; } return line_count; }
template<class X> void splay_forest<X>::validate_forest () { POMAGMA_DEBUG("Validating " << nameof<X>() << " forest"); for (typename Pos::sparse_iterator iter=Pos::sbegin(); iter!=Pos::send(); ++iter) { Pos eqn = *iter; //make sure eqn is inserted test_find(eqn); //check L-U agreement if (Pos l = L(eqn)) { POMAGMA_ASSERT(rank(l) < rank(eqn), "L-U out of order"); POMAGMA_ASSERT(U(l) == eqn, "invalid: runaway L-child"); } //check R-U agreement if (Pos r = R(eqn)) { POMAGMA_ASSERT(rank(eqn) < rank(r), "R-U out of order"); POMAGMA_ASSERT(U(r) == eqn, "invalid: runaway R-child"); } //check U-_ agreement if (Pos u = U(eqn)) { if (rank(eqn) < rank(u)) { POMAGMA_ASSERT(L(u) == eqn, "invalid: neglected L-child"); } else { POMAGMA_ASSERT(R(u) == eqn, "invalid: neglected R-child"); } } else { POMAGMA_ASSERT(root(eqn) == eqn, "invalid: root mismatch"); } } }
DenseSet Router::find_defined() const { POMAGMA_INFO("Finding defined obs"); DenseSet defined(m_carrier.item_dim()); DenseSet undefined(m_carrier.item_dim()); undefined = m_carrier.support(); bool changed = true; while (changed) { changed = false; POMAGMA_DEBUG("accumulating route probabilities"); undefined -= defined; for (auto iter = undefined.iter(); iter.ok(); iter.next()) { Ob ob = *iter; if (defines(defined, ob)) { defined.insert(ob); changed = true; break; } } } return defined; }
BinaryRelation::BinaryRelation ( const Carrier & carrier, BinaryRelation && other) : m_lines(carrier, std::move(other.m_lines)) { POMAGMA_DEBUG("resizing BinaryRelation with " << round_word_dim() << " words"); }
void Router::update_weights( const std::vector<float>& probs, const std::unordered_map<std::string, size_t>& symbol_counts, const std::unordered_map<Ob, size_t>& ob_counts, std::vector<float>& symbol_weights, std::vector<float>& ob_weights, float reltol) const { POMAGMA_INFO("Updating weights"); const size_t symbol_count = m_types.size(); const size_t ob_count = m_carrier.item_count(); POMAGMA_ASSERT_EQ(probs.size(), 1 + ob_count); POMAGMA_ASSERT_EQ(symbol_weights.size(), symbol_count); POMAGMA_ASSERT_EQ(ob_weights.size(), 1 + ob_count); const float max_increase = 1.0 + reltol; std::vector<float> temp_symbol_weights(symbol_weights.size()); std::vector<float> temp_ob_weights(ob_weights.size()); update_weights_loop : { POMAGMA_DEBUG("distributing route weight"); std::fill(temp_symbol_weights.begin(), temp_symbol_weights.end(), 0); for (size_t i = 0; i < symbol_count; ++i) { temp_symbol_weights[i] = map_get(symbol_counts, m_types[i].name, 0); } std::fill(temp_ob_weights.begin(), temp_ob_weights.end(), 0); for (const auto& pair : ob_counts) { temp_ob_weights[pair.first] = pair.second; } #pragma omp parallel for schedule(dynamic, 1) for (size_t i = 0; i < ob_count; ++i) { Ob ob = 1 + i; const float weight = ob_weights[ob] / probs[ob]; for (const Segment& segment : iter_val(ob)) { float part = weight * get_prob(segment, probs); add_weight(part, segment, temp_symbol_weights, temp_ob_weights); } } std::swap(symbol_weights, temp_symbol_weights); std::swap(ob_weights, temp_ob_weights); for (size_t i = 0; i < symbol_count; ++i) { if (symbol_weights[i] > temp_symbol_weights[i] * max_increase) { goto update_weights_loop; } } for (size_t i = 0; i < ob_count; ++i) { Ob ob = 1 + i; if (ob_weights[ob] > temp_ob_weights[ob] * max_increase) { goto update_weights_loop; } } } }
NullaryFunction::NullaryFunction ( const Carrier & carrier, void (*insert_callback) (const NullaryFunction *)) : m_carrier(carrier), m_value(0), m_insert_callback(insert_callback ? insert_callback : noop_callback) { POMAGMA_DEBUG("creating NullaryFunction"); }
void Server::serve(const char* address) { void* context; void* socket; zmq_msg_t message; POMAGMA_INFO("Starting server"); POMAGMA_ASSERT_C((context = zmq_ctx_new())); POMAGMA_ASSERT_C((socket = zmq_socket(context, ZMQ_REP))); POMAGMA_ASSERT_C(0 == zmq_bind(socket, address)); for (m_serving = true; m_serving;) { POMAGMA_DEBUG("waiting for request"); POMAGMA_ASSERT_C(0 == zmq_msg_init(&message)); POMAGMA_ASSERT_C(-1 != zmq_msg_recv(&message, socket, 0)); POMAGMA_DEBUG("parsing request"); protobuf::CartographerRequest request; bool parsed = request.ParseFromArray(zmq_msg_data(&message), zmq_msg_size(&message)); POMAGMA_ASSERT(parsed, "Failed to parse request"); POMAGMA_ASSERT_C(0 == zmq_msg_close(&message)); protobuf::CartographerResponse response = handle(*this, request); POMAGMA_DEBUG("serializing response"); std::string response_str; response.SerializeToString(&response_str); const int size = response_str.length(); POMAGMA_ASSERT_C(0 == zmq_msg_init(&message)); POMAGMA_ASSERT_C(0 == zmq_msg_init_size(&message, size)); memcpy(zmq_msg_data(&message), response_str.c_str(), size); POMAGMA_DEBUG("sending response"); POMAGMA_ASSERT_C(size == zmq_msg_send(&message, socket, 0)); POMAGMA_ASSERT_C(0 == zmq_msg_close(&message)); } POMAGMA_INFO("stopping server"); int linger_ms = 0; POMAGMA_ASSERT_C( 0 == zmq_setsockopt(socket, ZMQ_LINGER, &linger_ms, sizeof(linger_ms))); POMAGMA_ASSERT_C(0 == zmq_close(socket)); POMAGMA_ASSERT_C(0 == zmq_ctx_destroy(context)); }
WorkerPool (Processor & processor, size_t thread_count) : m_processor(processor), m_accepting(true) { POMAGMA_ASSERT_LT(0, thread_count); POMAGMA_DEBUG("Starting pool of " << thread_count << " workers"); for (size_t i = 0; i < thread_count; ++i) { m_pool.push_back(std::thread([this](){ this->do_work(); })); } }
void wait () { bool expected = true; if (m_accepting.compare_exchange_strong(expected, false)) { m_condition.notify_all(); for (auto & worker : m_pool) { worker.join(); } POMAGMA_DEBUG("Stopped pool of " << m_pool.size() << " workers"); } }
void Server::serve(const char* address) { void* context; void* socket; zmq_msg_t message; POMAGMA_INFO("Starting server"); POMAGMA_ASSERT_C((context = zmq_ctx_new())); POMAGMA_ASSERT_C((socket = zmq_socket(context, ZMQ_REP))); POMAGMA_ASSERT_C(0 == zmq_bind(socket, address)); while (true) { POMAGMA_DEBUG("waiting for request"); POMAGMA_ASSERT_C(0 == zmq_msg_init(&message)); POMAGMA_ASSERT_C(-1 != zmq_msg_recv(&message, socket, 0)); POMAGMA_DEBUG("parsing request"); protobuf::AnalystRequest request; bool parsed = request.ParseFromArray(zmq_msg_data(&message), zmq_msg_size(&message)); POMAGMA_ASSERT(parsed, "Failed to parse request"); POMAGMA_ASSERT_C(0 == zmq_msg_close(&message)); protobuf::AnalystResponse response = handle(*this, request); POMAGMA_DEBUG("serializing response"); std::string response_str; response.SerializeToString(&response_str); const int size = response_str.length(); POMAGMA_ASSERT_C(0 == zmq_msg_init(&message)); POMAGMA_ASSERT_C(0 == zmq_msg_init_size(&message, size)); memcpy(zmq_msg_data(&message), response_str.c_str(), size); POMAGMA_DEBUG("sending response"); POMAGMA_ASSERT_C(size == zmq_msg_send(&message, socket, 0)); POMAGMA_ASSERT_C(0 == zmq_msg_close(&message)); } }
std::vector<Ob> conjecture_diverge(Structure& structure, const std::vector<float>& probs, const std::vector<std::string>& routes, const char* conjectures_file) { POMAGMA_INFO("Conjecturing divergent terms"); const Carrier& carrier = structure.carrier(); const BinaryRelation& NLESS = structure.binary_relation("NLESS"); const Ob BOT = structure.nullary_function("BOT").find(); const Ob TOP = structure.nullary_function("TOP").find(); POMAGMA_DEBUG("collecting conjectures"); DenseSet conjecture_set(carrier.item_count()); conjecture_set.set_diff(carrier.support(), NLESS.get_Rx_set(BOT)); POMAGMA_ASSERT(conjecture_set.contains(BOT), "BOT not conjectured"); POMAGMA_ASSERT(not conjecture_set.contains(TOP), "TOP conjectured"); conjecture_set.remove(BOT); std::vector<Ob> conjectures; for (auto iter = conjecture_set.iter(); iter.ok(); iter.next()) { Ob ob = *iter; conjectures.push_back(ob); } POMAGMA_DEBUG("sorting " << conjectures.size() << " conjectures"); std::sort(conjectures.begin(), conjectures.end(), [&](const Ob& x, const Ob& y) { return probs[x] > probs[y]; }); POMAGMA_DEBUG("writing conjectures to " << conjectures_file); std::ofstream file(conjectures_file, std::ios::out | std::ios::trunc); POMAGMA_ASSERT(file, "failed to open " << conjectures_file); file << "# divergence conjectures generated by pomagma"; for (auto ob : conjectures) { file << "\nEQUAL BOT " << routes[ob]; } return conjectures; }
Carrier::Carrier ( size_t item_dim, void (*insert_callback) (Ob), void (*merge_callback) (Ob)) : m_support(item_dim), m_item_count(0), m_rep_count(0), m_reps(alloc_blocks<Rep>(1 + item_dim)), m_insert_callback(insert_callback), m_merge_callback(merge_callback) { POMAGMA_DEBUG("creating Carrier with " << item_dim << " items"); POMAGMA_ASSERT_LE(item_dim, MAX_ITEM_DIM); construct_blocks(m_reps, 1 + item_dim, 0); }
base_bin_rel::base_bin_rel (size_t item_dim, bool symmetric) : m_support(item_dim), m_round_item_dim(dense_set::round_item_dim(item_dim)), m_round_word_dim(dense_set::round_word_dim(item_dim)), m_data_size_words((1 + m_round_item_dim) * m_round_word_dim), m_Lx_lines(pomagma::alloc_blocks<Word>(m_data_size_words)), m_Rx_lines(symmetric ? m_Lx_lines : pomagma::alloc_blocks<Word>(m_data_size_words)) { POMAGMA_DEBUG("creating base_bin_rel with " << m_data_size_words << " words"); POMAGMA_ASSERT(m_round_item_dim <= MAX_ITEM_DIM, "base_bin_rel is too large"); // initialize to zeros bzero(m_Lx_lines, sizeof(Word) * m_data_size_words); if (not symmetric) { bzero(m_Rx_lines, sizeof(Word) * m_data_size_words); } }
void execute (const AssumeTask & task) { POMAGMA_DEBUG("assume " << task.expression); InsertParser parser(signature); parser.begin(task.expression); std::string type = parser.parse_token(); Ob lhs = parser.parse_term(); Ob rhs = parser.parse_term(); parser.end(); if (type == "EQUAL") { ensure_equal(lhs, rhs); } else if (type == "LESS") { ensure_less(lhs, rhs); } else if (type == "NLESS") { ensure_nless(lhs, rhs); } else { POMAGMA_ERROR("bad relation type: " << type); } }
// for growing void base_bin_rel::move_from (const base_bin_rel & other) { POMAGMA_DEBUG("Copying base_bin_rel"); size_t min_item_dim = min(item_dim(), other.item_dim()); size_t min_word_dim = min(word_dim(), other.word_dim()); m_support.move_from(other.m_support); if (_symmetric()) { POMAGMA_ASSERT(other._symmetric(), "symmetry mismatch"); for (size_t i = 1; i <= min_item_dim; ++i) { memcpy(Lx(i), other.Lx(i), sizeof(Word) * min_word_dim); } } else { POMAGMA_ASSERT(not other._symmetric(), "symmetry mismatch"); for (size_t i = 1; i <= min_item_dim; ++i) { memcpy(Lx(i), other.Lx(i), sizeof(Word) * min_word_dim); memcpy(Rx(i), other.Rx(i), sizeof(Word) * min_word_dim); } } }
void Router::fit_language( const std::unordered_map<std::string, size_t>& symbol_counts, const std::unordered_map<Ob, size_t>& ob_counts, float reltol) { POMAGMA_INFO("Fitting language"); const size_t item_count = m_carrier.item_count(); std::vector<float> ob_probs(1 + item_count, 0); std::vector<float> ob_weights(1 + item_count, 0); std::vector<float> symbol_weights(m_types.size(), 0); POMAGMA_ASSERT_EQ(m_types.size(), m_language.size()); const float max_increase = 1.0 + reltol; bool changed = true; while (changed) { changed = false; update_probs(ob_probs, reltol); update_weights(ob_probs, symbol_counts, ob_counts, symbol_weights, ob_weights, reltol); POMAGMA_DEBUG("optimizing language"); float total_weight = 0; for (float weight : symbol_weights) { total_weight += weight; } for (size_t i = 0; i < m_types.size(); ++i) { SegmentType& type = m_types[i]; float new_prob = symbol_weights[i] / total_weight; float old_prob = type.prob; type.prob = new_prob; m_language[type.name] = new_prob; if (new_prob > old_prob * max_increase) { changed = true; } } } }
BinaryFunction::BinaryFunction(Carrier& carrier, BinaryFunction&& other) : m_lines(carrier, std::move(other.m_lines)), m_values(std::move(other.m_values)) { POMAGMA_DEBUG("resizing BinaryFunction"); }
BinaryFunction::BinaryFunction(Carrier& carrier) : m_lines(carrier) { POMAGMA_DEBUG("creating BinaryFunction"); }
void merge_callback (Ob i) { POMAGMA_DEBUG("merging " << i); ++g_merge_count; }
SymmetricFunction::SymmetricFunction(Carrier& carrier, SymmetricFunction&& other) : m_lines(carrier, std::move(other.m_lines)), m_values(std::move(other.m_values)) { POMAGMA_DEBUG("resizing SymmetricFunction"); }
SymmetricFunction::SymmetricFunction(Carrier& carrier) : m_lines(carrier) { POMAGMA_DEBUG("creating SymmetricFunction"); }
std::vector<std::string> Router::find_routes() const { POMAGMA_INFO("Routing all obs"); const size_t item_count = m_carrier.item_count(); std::vector<float> best_probs(1 + item_count, 0); std::vector<Segment> best_segments(1 + item_count); bool changed = true; while (changed) { changed = false; POMAGMA_DEBUG("finding best local routes"); //#pragma omp parallel for schedule(dynamic, 1) for (size_t i = 0; i < item_count; ++i) { Ob ob = 1 + i; float& best_prob = best_probs[ob]; Segment& best_segment = best_segments[ob]; bool best_changed = false; for (const Segment& segment : iter_val(ob)) { float prob = get_prob(segment, best_probs); if (unlikely(std::make_pair(-prob, segment) < std::make_pair(-best_prob, best_segment))) { best_prob = prob; // relaxed memory order best_segment = segment; // relaxed memory order best_changed = true; } } if (best_changed) { //#pragma omp atomic changed = true; } } } POMAGMA_DEBUG("scheduling route building"); std::vector<Ob> schedule; schedule.reserve(item_count); for (auto iter = m_carrier.iter(); iter.ok(); iter.next()) { Ob ob = *iter; POMAGMA_ASSERT_LT(0, best_probs[ob]); schedule.push_back(ob); } std::sort(schedule.begin(), schedule.end(), [&](const Ob& x, const Ob& y) { return best_probs[x] > best_probs[y]; }); POMAGMA_DEBUG("building full routes"); std::vector<std::string> routes(1 + item_count); for (Ob ob : schedule) { const Segment& segment = best_segments[ob]; const SegmentType& type = m_types[segment.type]; switch (type.arity) { case NULLARY: { routes[ob] = type.name; } break; case UNARY: { const auto& arg = routes[segment.arg1]; POMAGMA_ASSERT(not arg.empty(), "unknown arg route"); routes[ob] = type.name + " " + arg; } break; case BINARY: { const auto& lhs = routes[segment.arg1]; const auto& rhs = routes[segment.arg2]; POMAGMA_ASSERT(not lhs.empty(), "unknown lhs route"); POMAGMA_ASSERT(not rhs.empty(), "unknown rhs route"); routes[ob] = type.name + " " + lhs + " " + rhs; } break; } } return routes; }
BinaryRelation::BinaryRelation (const Carrier & carrier) : m_lines(carrier) { POMAGMA_DEBUG("creating BinaryRelation with " << round_word_dim() << " words"); }