コード例 #1
0
// _____________________________________________________________________________
bool ContextFileParser::getLine(ContextFileParser::Line& line) {
  string l;
  if (std::getline(_in, l)) {
    size_t i = l.find('\t');
    assert(i != string::npos);
    size_t j = i + 2;
    assert(j + 3 < l.size());
    size_t k = l.find('\t', j + 2);
    assert(k != string::npos);
    line._isEntity = (l[i + 1] == '1');
    line._word =
        (line._isEntity ? l.substr(0, i)
                        : ad_utility::getLowercaseUtf8(l.substr(0, i)));
    line._contextId = static_cast<Id>(atol(l.substr(j + 1, k - j - 1).c_str()));
    line._score = static_cast<Score>(atol(l.substr(k + 1).c_str()));
#ifndef NDEBUG
    if (_lastCId > line._contextId) {
      AD_THROW(ad_semsearch::Exception::BAD_INPUT,
               "ContextFile has to be sorted by context Id.");
    }
    _lastCId = line._contextId;
#endif
    return true;
  }
  return false;
}
コード例 #2
0
// _____________________________________________________________________________
string Server::createQueryFromHttpParams(const ParamValueMap& params) const {
  string query;
  // Construct a Query object from the parsed request.
  auto it = params.find("query");
  if (it == params.end() || it->second == "") {
    AD_THROW(ad_semsearch::Exception::BAD_REQUEST,
             "Expected at least one non-empty attribute \"query\".");
  }
  return it->second;
}
コード例 #3
0
// _____________________________________________________________________________
void SparqlParser::addFilter(const string& str, ParsedQuery& query) {
  size_t i = str.find('(');
  AD_CHECK(i != string::npos);
  size_t j = str.find(')', i + 1);
  AD_CHECK(j != string::npos);
  string filter = str.substr(i + 1, j - i - 1);
  auto tokens = ad_utility::split(filter, ' ');
  if (tokens.size() != 3) {
    AD_THROW(ad_semsearch::Exception::BAD_QUERY,
             "Unknown syntax for filter: " + filter);
  }
  if (tokens[0].size() == 0 || tokens[0][0] != '?' || tokens[2].size() == 0 ||
      tokens[2][0] != '?') {
    AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
             "Filter not supported yet: " + filter);
  }
  SparqlFilter f;
  f._lhs = tokens[0];
  f._rhs = tokens[2];

  if (tokens[1] == "=" || tokens[1] == "==") {
    f._type = SparqlFilter::EQ;
  } else if (tokens[1] == "!=") {
    f._type = SparqlFilter::NE;
  } else if (tokens[1] == "<") {
    f._type = SparqlFilter::LT;
  } else if (tokens[1] == "<=") {
    f._type = SparqlFilter::LE;
  } else if (tokens[1] == "<") {
    f._type = SparqlFilter::GT;
  } else if (tokens[1] == ">=") {
    f._type = SparqlFilter::GE;
  } else {
    AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
             "Filter not supported yet: " + filter);
  }
  query._filters.emplace_back(f);
}
コード例 #4
0
// _____________________________________________________________________________
Server::ParamValueMap Server::parseHttpRequest(
    const string& httpRequest) const {
  LOG(DEBUG) << "Parsing HTTP Request." << endl;
  ParamValueMap params;
  _requestProcessingTimer.start();
  // Parse the HTTP Request.

  size_t indexOfGET = httpRequest.find("GET");
  size_t indexOfHTTP = httpRequest.find("HTTP");

  if (indexOfGET == httpRequest.npos || indexOfHTTP == httpRequest.npos) {
    AD_THROW(ad_semsearch::Exception::BAD_REQUEST,
             "Invalid request. Only supporting proper HTTP GET requests!\n" +
             httpRequest);
  }

  string request = httpRequest.substr(indexOfGET + 3,
                                      indexOfHTTP - (indexOfGET + 3));

  size_t index = request.find("?");
  if (index == request.npos) {
    AD_THROW(ad_semsearch::Exception::BAD_REQUEST,
             "Invalid request. At least one parameters is "
                 "required for meaningful queries!\n"
             + httpRequest);
  }
  size_t next = request.find('&', index + 1);
  while (next != request.npos) {
    size_t posOfEq = request.find('=', index + 1);
    if (posOfEq == request.npos) {
      AD_THROW(ad_semsearch::Exception::BAD_REQUEST,
               "Parameter without \"=\" in HTTP Request.\n" + httpRequest);
    }
    string param = ad_utility::getLowercaseUtf8(
        request.substr(index + 1, posOfEq - (index + 1)));
    string value = ad_utility::decodeUrl(
        request.substr(posOfEq + 1, next - (posOfEq + 1)));
    if (params.count(param) > 0) {
      AD_THROW(ad_semsearch::Exception::BAD_REQUEST,
               "Duplicate HTTP parameter: " + param);
    }
    params[param] = value;
    index = next;
    next = request.find('&', index + 1);
  }
  size_t posOfEq = request.find('=', index + 1);
  if (posOfEq == request.npos) {
    AD_THROW(ad_semsearch::Exception::BAD_REQUEST,
             "Parameter without \"=\" in HTTP Request." + httpRequest);
  }
  string param = ad_utility::getLowercaseUtf8(
      request.substr(index + 1, posOfEq - (index + 1)));
  string value = ad_utility::decodeUrl(request.substr(posOfEq + 1,
                                                      request.size() - 1 -
                                                      (posOfEq + 1)));
  if (params.count(param) > 0) {
    AD_THROW(ad_semsearch::Exception::BAD_REQUEST, "Duplicate HTTP parameter.");
  }
  params[param] = value;

  LOG(DEBUG) << "Done parsing HTTP Request." << endl;
  return params;
}
コード例 #5
0
// _____________________________________________________________________________
void SparqlParser::addWhereTriple(const string& str, ParsedQuery& query) {
  size_t i = 0;
  while (i < str.size() &&
         (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')) { ++i; }
  if (i == str.size()) {
    AD_THROW(ad_semsearch::Exception::BAD_QUERY, "Illegal triple: " + str);
  }
  size_t j = i + 1;
  while (j < str.size() && str[j] != '\t' && str[j] != ' ' &&
         str[j] != '\n') { ++j; }
  if (j == str.size()) {
    AD_THROW(ad_semsearch::Exception::BAD_QUERY, "Illegal triple: " + str);
  }

  string s = str.substr(i, j - i);
  i = j;
  while (i < str.size() &&
         (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')) { ++i; }
  if (i == str.size()) {
    AD_THROW(ad_semsearch::Exception::BAD_QUERY, "Illegal triple: " + str);
  }
  j = i + 1;
  while (j < str.size() && str[j] != '\t' && str[j] != ' ' &&
         str[j] != '\n') { ++j; }
  string p = str.substr(i, j - i);
  if (p == IN_CONTEXT_RELATION ||
      p.find(IN_CONTEXT_RELATION_NS) != string::npos) {
    string o = ad_utility::strip(str.substr(j), " \t\n");
    query._whereClauseTriples.push_back(SparqlTriple(s, p, o));
  } else {
    i = j;
    while (i < str.size() &&
           (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')) { ++i; }
    if (i == str.size()) {
      AD_THROW(ad_semsearch::Exception::BAD_QUERY, "Illegal triple: " + str);
    }
    if (str[i] == '<') {
      // URI
      j = str.find('>', i + 1);
      if (j == string::npos) {
        AD_THROW(ad_semsearch::Exception::BAD_QUERY,
                 "Illegal object in : " + str);
      }
      ++j;
    } else {
      if (str[i] == '\"') {
        // Literal
        j = str.find('\"', i + 1);
        if (j == string::npos) {
          AD_THROW(ad_semsearch::Exception::BAD_QUERY,
                   "Illegal literal in : " + str);
        }
        ++j;
      } else {
        j = i + 1;
      }
      while (j < str.size() && str[j] != ' ' && str[j] != '\t' &&
             str[j] != '\n') { ++j; }
    }
    string o = str.substr(i, j - i);
    query._whereClauseTriples.push_back(SparqlTriple(s, p, o));
  }
}
コード例 #6
0
// _____________________________________________________________________________
void SparqlParser::parseWhere(const string& str, ParsedQuery& query) {
  size_t i = str.find('{');
  size_t j = str.find('}', i);
  assert(j != string::npos);
  if (i == string::npos) {
    throw ParseException("Need curly braces in where clause.");
  }
  string inner = ad_utility::strip(str.substr(i + 1, j - i - 1), "\n\t ");

  // Split where clauses. Cannot simply split at dots, because they may occur
  // in URIs, stuff with namespaces or literals.
  vector<string> clauses;
  vector<string> filters;
  size_t start = 0;
  bool insideUri = false;
  bool insideNsThing = false;
  bool insideLiteral = false;
  while (start < inner.size()) {
    size_t k = start;
    while (inner[k] == ' ' || inner[k] == '\t' || inner[k] == '\n') { ++k; }
    if (inner[k] == 'F') {
      if (inner.substr(k, 6) == "FILTER") {
        size_t end = inner.find(')', k);
        if (end == string::npos) {
          AD_THROW(ad_semsearch::Exception::BAD_QUERY,
                   "Filter without closing paramthesis.");
        }
        filters.push_back(inner.substr(k, end - k + 1));
        size_t posOfDot = inner.find('.', end);
        start = (posOfDot == string::npos ? end + 1 : posOfDot + 1);
        continue;
      }
    }
    while (k < inner.size()) {
      if (!insideUri && !insideLiteral && !insideNsThing) {
        if (inner[k] == '.') {
          clauses.emplace_back(inner.substr(start, k - start));
          break;
        }
        if (inner[k] == '<') { insideUri = true; }
        if (inner[k] == '\"') { insideLiteral = true; }
        if (inner[k] == ':') { insideNsThing = true; }
      } else {
        if (insideUri && inner[k] == '>') { insideUri = false; }
        if (insideLiteral && inner[k] == '\"') { insideUri = false; }
        if (insideNsThing && (inner[k] == ' ' || inner[k] == '\t')) {
          insideNsThing = false;
        }
      }
      ++k;
    }
    if (k == inner.size()) { clauses.emplace_back(inner.substr(start)); }
    start = k + 1;
  }
  for (const string& clause: clauses) {
    string c = ad_utility::strip(clause, ' ');
    if (c.size() > 0) {
      addWhereTriple(c, query);
    }
  }
  for (const string& filter: filters) {
    addFilter(filter, query);
  }
}
コード例 #7
0
// _____________________________________________________________________________
void ScanningJoin::computeResult(ResultTable* result) {
  AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED, "TODO");
  IndexScan::computeResult(result);
}
コード例 #8
0
// _____________________________________________________________________________
vector<QueryPlanner::SubtreePlan> QueryPlanner::merge(
    const vector<QueryPlanner::SubtreePlan>& a,
    const vector<QueryPlanner::SubtreePlan>& b,
    const QueryPlanner::TripleGraph& tg) const {
  // TODO: Add the following features:
  // If a join is supposed to happen, always check if it happens between
  // a scan with a relatively large result size
  // esp. with an entire relation but also with something like is-a Person
  // If that is the case look at the size estimate for the other side,
  // if that is rather small, replace the join and scan by a combination.
  std::unordered_map<string, vector<SubtreePlan>> candidates;
  // Find all pairs between a and b that are connected by an edge.
  for (size_t i = 0; i < a.size(); ++i) {
    for (size_t j = 0; j < b.size(); ++j) {
      if (connected(a[i], b[j], tg)) {
        // Find join variable(s) / columns.
        auto jcs = getJoinColumns(a[i], b[j]);
        if (jcs.size() != 1) {
          // TODO: Add joins with secondary join columns.
          AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
                   "Joins should happen on one variable only, for now. "
                       "No cyclic queries either, currently.");
        }
        // Check if a sub-result has to be re-sorted
        // TODO: replace with HashJoin maybe (or add variant to possible plans).
        QueryExecutionTree left(_qec);
        QueryExecutionTree right(_qec);
        if (a[i]._qet.resultSortedOn() == jcs[0][0]) {
          left = a[i]._qet;
        } else {
          // Create a sort operation.
          Sort sort(_qec, a[i]._qet, jcs[0][0]);
          left.setVariableColumns(a[i]._qet.getVariableColumnMap());
          left.setOperation(QueryExecutionTree::SORT, &sort);
        }
        if (b[j]._qet.resultSortedOn() == jcs[0][1]) {
          right = b[j]._qet;
        } else {
          // Create a sort operation.
          Sort sort(_qec, b[j]._qet, jcs[0][1]);
          right.setVariableColumns(b[j]._qet.getVariableColumnMap());
          right.setOperation(QueryExecutionTree::SORT, &sort);
        }

        // Create the join operation.
        QueryExecutionTree tree(_qec);
        Join join(_qec, left, right, jcs[0][0], jcs[0][1]);
        tree.setVariableColumns(join.getVariableColumns());
        tree.setOperation(QueryExecutionTree::JOIN, &join);
        SubtreePlan plan(_qec);
        plan._qet = tree;
        plan._idsOfIncludedFilters = a[i]._idsOfIncludedFilters;
        plan._idsOfIncludedNodes = a[i]._idsOfIncludedNodes;
        plan._idsOfIncludedNodes.insert(
            b[j]._idsOfIncludedNodes.begin(),
            b[j]._idsOfIncludedNodes.end());
        candidates[getPruningKey(plan, jcs[0][0])].emplace_back(plan);
      }
    }
  }

  // Duplicates are removed if the same triples are touched,
  // the ordering is the same. Only the best is kept then.

  // Therefore we mapped plans and use contained triples + ordering var
  // as key.
  vector<SubtreePlan> prunedPlans;
  for (auto it = candidates.begin(); it != candidates.end(); ++it) {
    size_t minCost = std::numeric_limits<size_t>::max();
    size_t minIndex = 0;
    for (size_t i = 0; i < it->second.size(); ++i) {
      if (it->second[i].getCostEstimate() < minCost) {
        minCost = it->second[i].getCostEstimate();
        minIndex = i;
      }
    }
    prunedPlans.push_back(it->second[minIndex]);
  }


  return prunedPlans;
}
コード例 #9
0
// _____________________________________________________________________________
vector<QueryPlanner::SubtreePlan> QueryPlanner::seedWithScans(
    const QueryPlanner::TripleGraph& tg) const {
  vector<SubtreePlan> seeds;
  for (size_t i = 0; i < tg._nodeMap.size(); ++i) {
    const TripleGraph::Node& node = *tg._nodeMap.find(i)->second;
    if (node._variables.size() == 0) {
      AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
               "Triples should have at least one variable. Not the case in: "
               + node._triple.asString());
    }

    if (node._variables.size() == 1) {
      // Just pick one direction, they should be equivalent.
      SubtreePlan plan(_qec);
      plan._idsOfIncludedNodes.insert(i);
      QueryExecutionTree tree(_qec);
      if (isVariable(node._triple._s)) {
        IndexScan scan(_qec, IndexScan::ScanType::POS_BOUND_O);
        scan.setPredicate(node._triple._p);
        scan.setObject(node._triple._o);
        scan.precomputeSizeEstimate();
        tree.setOperation(QueryExecutionTree::OperationType::SCAN,
                          &scan);
        tree.setVariableColumn(node._triple._s, 0);
      } else if (isVariable(node._triple._o)) {
        IndexScan scan(_qec, IndexScan::ScanType::PSO_BOUND_S);
        scan.setPredicate(node._triple._p);
        scan.setSubject(node._triple._s);
        scan.precomputeSizeEstimate();
        tree.setOperation(QueryExecutionTree::OperationType::SCAN,
                          &scan);
        tree.setVariableColumn(node._triple._o, 0);
      } else {
        // Pred variable.
        AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
                 "No predicate vars yet, please. Triple in question: "
                 + node._triple.asString());
      }
      plan._qet = tree;
      seeds.push_back(plan);
    }

    if (node._variables.size() == 2) {
      // Add plans for both possible scan directions.
      if (isVariable(node._triple._p)) {
        // Pred variable.
        AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
                 "No predicate vars yet, please. Triple in question: "
                 + node._triple.asString());
      }
      {
        SubtreePlan plan(_qec);
        plan._idsOfIncludedNodes.insert(i);
        QueryExecutionTree tree(_qec);
        IndexScan scan(_qec, IndexScan::ScanType::PSO_FREE_S);
        scan.setPredicate(node._triple._p);
        scan.precomputeSizeEstimate();
        tree.setOperation(QueryExecutionTree::OperationType::SCAN,
                          &scan);
        tree.setVariableColumn(node._triple._s, 0);
        tree.setVariableColumn(node._triple._o, 1);
        plan._qet = tree;
        seeds.push_back(plan);
      }
      {
        SubtreePlan plan(_qec);
        plan._idsOfIncludedNodes.insert(i);
        QueryExecutionTree tree(_qec);
        IndexScan scan(_qec, IndexScan::ScanType::POS_FREE_O);
        scan.setPredicate(node._triple._p);
        scan.precomputeSizeEstimate();
        tree.setOperation(QueryExecutionTree::OperationType::SCAN,
                          &scan);
        tree.setVariableColumn(node._triple._o, 0);
        tree.setVariableColumn(node._triple._s, 1);
        plan._qet = tree;
        seeds.push_back(plan);
      }
    }

    if (node._variables.size() >= 3) {
      AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
               "Triples should have at most two variables. Not the case in: "
               + node._triple.asString());
    }
  }

  return seeds;
}
コード例 #10
0
// _____________________________________________________________________________
QueryExecutionTree QueryPlanner::createExecutionTree(
    const ParsedQuery& pq) const {

  LOG(DEBUG) << "Creating execution plan.\n";
  // Strategy:
  // Create a graph.
  // Each triple corresponds to a node, there is an edge between two nodes iff
  // they share a variable.

  TripleGraph tg = createTripleGraph(pq);


  // Each node/triple corresponds to a scan (more than one way possible),
  // each edge corresponds to a possible join.

  // Enumerate and judge possible query plans using a DP table.
  // Each ExecutionTree for a sub-problem gives an estimate.
  // Start bottom up, i.e. with the scans for triples.
  // Always merge two solutions from the table by picking one possible join.
  // A join is possible, if there is an edge between the results.
  // Therefore we keep track of all edges that touch a sub-result.
  // When joining two sub-results, the results edges are those that belong
  // to exactly one of the two input sub-trees.
  // If two of them have the same target, only one out edge is created.
  // All edges that are shared by both subtrees, are checked if they are covered
  // by the join or if an extra filter/select is needed.

  // The algorithm then creates all possible plans for 1 to n triples.
  // To generate a plan for k triples, all subsets between i and k-i are
  // joined.

  // Filters are now added to the mix when building execution plans.
  // Without them, a plan has an execution tree and a set of
  // covered triple nodes.
  // With them, it also has a set of covered filters.
  // A filter can be applied as soon as all variables that occur in the filter
  // Are covered by the query. This is also always the place where this is done.

  // TODO: resolve cyclic queries and turn them into filters.
  // Copy made so that something can be added for cyclic queries.
  // tg.turnCyclesIntoFilters(filters);

  // TODO: resolve cycles involving a text operation.
  // Split the graph at possible text operations.
  vector<pair<TripleGraph, vector<SparqlFilter>>> graphs;
  unordered_map<string, vector<size_t>> contextVarTotextNodes;
  vector<SparqlFilter> filtersWithContextVars;
  tg.splitAtText(pq._filters, graphs, contextVarTotextNodes,
                 filtersWithContextVars);

  vector<vector<SubtreePlan>> finalTab;
  if (graphs.size() == 1) {
    finalTab = fillDpTab(graphs[0].first, graphs[0].second);
  } else {
    AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
             "No text yet.");
  }

  // If there is an order by clause, add another row to the table and
  // just add an order by / sort to every previous result if needed.
  // If the ordering is perfect already, just copy the plan.
  if (pq._orderBy.size() > 0) {
    finalTab.emplace_back(getOrderByRow(pq, finalTab));
  }

  vector<SubtreePlan>& lastRow = finalTab.back();
  AD_CHECK_GT(lastRow.size(), 0);
  size_t minCost = lastRow[0].getCostEstimate();
  size_t minInd = 0;

  for (size_t i = 1; i < lastRow.size(); ++i) {
    if (lastRow[i].getCostEstimate() < minCost) {
      minCost = lastRow[i].getCostEstimate();
      minInd = i;
    }
  }


  // A distinct modifier is applied in the end. This is very easy
  // but not necessarily optimal.
  // TODO: Adjust so that the optimal place for the operation is found.
  if (pq._distinct) {
    QueryExecutionTree distinctTree(lastRow[minInd]._qet);
    vector<size_t> keepIndices;
    for (const auto& var : pq._selectedVariables) {
      if (lastRow[minInd]._qet.getVariableColumnMap().find(var) !=
          lastRow[minInd]._qet.getVariableColumnMap().end()) {
        keepIndices.push_back(
            lastRow[minInd]._qet.getVariableColumnMap().find(
                var)->second);
      }
    }
    Distinct distinct(_qec, lastRow[minInd]._qet, keepIndices);
    distinctTree.setOperation(QueryExecutionTree::DISTINCT, &distinct);
    return distinctTree;
  }

  LOG(DEBUG) << "Done creating execution plan.\n";
  return lastRow[minInd]._qet;
}