Opcode negateQueryOp(Opcode opc) { assertx(isQueryOp(opc)); switch (opc) { case Gt: return Lte; case Gte: return Lt; case Lt: return Gte; case Lte: return Gt; case Eq: return Neq; case Neq: return Eq; case GtInt: return LteInt; case GteInt: return LtInt; case LtInt: return GteInt; case LteInt: return GtInt; case EqInt: return NeqInt; case NeqInt: return EqInt; case EqDbl: return NeqDbl; case NeqDbl: return EqDbl; case Same: return NSame; case NSame: return Same; case InstanceOfBitmask: return NInstanceOfBitmask; case NInstanceOfBitmask: return InstanceOfBitmask; case IsType: return IsNType; case IsNType: return IsType; case GtDbl: case GteDbl: case LtDbl: case LteDbl: // Negating dbl relational ops probably isn't what you want: // (X < Y) != !(X >= Y) -- when NaN gets involved always_assert(false); default: always_assert(0); } }
Opcode commuteQueryOp(Opcode opc) { assertx(isQueryOp(opc)); switch (opc) { case Gt: return Lt; case Gte: return Lte; case Lt: return Gt; case Lte: return Gte; case Eq: return Eq; case Neq: return Neq; case GtInt: return LtInt; case GteInt:return LteInt; case LtInt: return GtInt; case LteInt:return GteInt; case EqInt: return EqInt; case NeqInt:return NeqInt; case GtDbl: return LtDbl; case GteDbl:return LteDbl; case LtDbl: return GtDbl; case LteDbl:return GteDbl; case EqDbl: return EqDbl; case NeqDbl:return NeqDbl; case Same: return Same; case NSame: return NSame; default: always_assert(0); } }
Opcode queryToIntQueryOp(Opcode opc) { assert(isQueryOp(opc)); switch (opc) { case Gt: return GtInt; case Gte: return GteInt; case Lt: return LtInt; case Lte: return LteInt; case Eq: return EqInt; case Neq: return NeqInt; case JmpGt: return JmpGtInt; case JmpGte: return JmpGteInt; case JmpLt: return JmpLtInt; case JmpLte: return JmpLteInt; case JmpEq: return JmpEqInt; case JmpNeq: return JmpNeqInt; case SideExitJmpGt: return SideExitJmpGtInt; case SideExitJmpGte: return SideExitJmpGteInt; case SideExitJmpLt: return SideExitJmpLtInt; case SideExitJmpLte: return SideExitJmpLteInt; case SideExitJmpEq: return SideExitJmpEqInt; case SideExitJmpNeq: return SideExitJmpNeqInt; case ReqBindJmpGt: return ReqBindJmpGtInt; case ReqBindJmpGte: return ReqBindJmpGteInt; case ReqBindJmpLt: return ReqBindJmpLtInt; case ReqBindJmpLte: return ReqBindJmpLteInt; case ReqBindJmpEq: return ReqBindJmpEqInt; case ReqBindJmpNeq: return ReqBindJmpNeqInt; default: always_assert(0); } }
Opcode commuteQueryOp(Opcode opc) { assertx(isQueryOp(opc) || isSideEffectfulQueryOp(opc)); switch (opc) { case Gt: return Lt; case GtX: return LtX; case Gte: return Lte; case GteX: return LteX; case Lt: return Gt; case LtX: return GtX; case Lte: return Gte; case LteX: return GteX; case Eq: return Eq; case EqX: return EqX; case Neq: return Neq; case NeqX: return NeqX; case GtInt: return LtInt; case GteInt:return LteInt; case LtInt: return GtInt; case LteInt:return GteInt; case EqInt: return EqInt; case NeqInt:return NeqInt; case GtDbl: return LtDbl; case GteDbl:return LteDbl; case LtDbl: return GtDbl; case LteDbl:return GteDbl; case EqDbl: return EqDbl; case NeqDbl:return NeqDbl; case GtStr: return LtStr; case GteStr:return LteStr; case LtStr: return GtStr; case LteStr:return GteStr; case EqStr: case NeqStr: case SameStr: case NSameStr: return opc; case GtBool: return LtBool; case GteBool: return LteBool; case LtBool: return GtBool; case LteBool: return GteBool; case EqBool: case NeqBool: return opc; case GtObj: return LtObj; case GteObj: return LteObj; case LtObj: return GtObj; case LteObj: return LteObj; case EqObj: case NeqObj: case SameObj: case NSameObj: return opc; case Same: return Same; case NSame: return NSame; default: always_assert(0); } }
Opcode queryToStrQueryOp(Opcode opc) { assertx(isQueryOp(opc)); switch (opc) { case Gt: return GtStr; case Gte: return GteStr; case Lt: return LtStr; case Lte: return LteStr; case Eq: return EqStr; case Neq: return NeqStr; case Same: return SameStr; case NSame: return NSameStr; default: always_assert(0); } }
Opcode queryToBoolQueryOp(Opcode opc) { assertx(isQueryOp(opc)); switch (opc) { case Gt: return GtBool; case Gte: return GteBool; case Lt: return LtBool; case Lte: return LteBool; case Eq: return EqBool; case Neq: return NeqBool; case Same: return EqBool; case NSame: return NeqBool; default: always_assert(0); } }
Opcode negateQueryOp(Opcode opc) { assert(isQueryOp(opc)); switch (opc) { case Gt: return Lte; case Gte: return Lt; case Lt: return Gte; case Lte: return Gt; case Eq: return Neq; case Neq: return Eq; case Same: return NSame; case NSame: return Same; case InstanceOfBitmask: return NInstanceOfBitmask; case NInstanceOfBitmask: return InstanceOfBitmask; case IsType: return IsNType; case IsNType: return IsType; default: always_assert(0); } }
Opcode queryToJmpOp(Opcode opc) { assert(isQueryOp(opc)); switch (opc) { case Gt: return JmpGt; case Gte: return JmpGte; case Lt: return JmpLt; case Lte: return JmpLte; case Eq: return JmpEq; case Neq: return JmpNeq; case Same: return JmpSame; case NSame: return JmpNSame; case InstanceOfBitmask: return JmpInstanceOfBitmask; case NInstanceOfBitmask: return JmpNInstanceOfBitmask; case IsType: return JmpIsType; case IsNType: return JmpIsNType; default: always_assert(0); } }
Opcode queryToDblQueryOp(Opcode opc) { assertx(isQueryOp(opc)); switch (opc) { case Gt: return GtDbl; case Gte: return GteDbl; case Lt: return LtDbl; case Lte: return LteDbl; case Eq: return EqDbl; case Neq: return NeqDbl; case GtInt: return GtDbl; case GteInt:return GteDbl; case LtInt: return LtDbl; case LteInt:return LteDbl; case EqInt: return EqDbl; case NeqInt:return NeqDbl; default: always_assert(0); } }
Opcode queryToObjQueryOp(Opcode opc) { assertx(isQueryOp(opc) || isSideEffectfulQueryOp(opc)); switch (opc) { case GtX: case Gt: return GtObj; case GteX: case Gte: return GteObj; case LtX: case Lt: return LtObj; case LteX: case Lte: return LteObj; case EqX: case Eq: return EqObj; case NeqX: case Neq: return NeqObj; case Same: return SameObj; case NSame: return NSameObj; default: always_assert(0); } }
bool isFusableQueryOp(Opcode opc) { return isQueryOp(opc) && opc != IsType && opc != IsNType; }
std::vector<unsigned int> queryprocessor2::processQuery(std::string queryLine) { std::set<std::string> ANDs; std::set<std::string> ORs; std::set<std::string> NOTs; // parse the line std::stringstream stream(queryLine); std::string curWord, curOp; stream >> curWord; if(!isQueryOp(curWord)) { // no initial boolean query curOp = "AND"; Processor::prepareWord(curWord); ANDs.insert(curWord); } else curOp = curWord; while(stream.good()) { stream >> curWord; if(isQueryOp(curWord)) curOp = curWord; else { Processor::prepareWord(curWord); if(curWord == "") continue; if(curOp == "AND") ANDs.insert(curWord); else if(curOp == "OR") ORs.insert(curWord); else if(curOp == "NOT") NOTs.insert(curWord); } } // gather all the stuff & set operations along the way // this map is term -> multimap(page_id -> tf-idf) std::map<unsigned int, double> words; // ANDs for(auto it = ANDs.begin(); it != ANDs.end(); it++) { auto res = theIndex->findAll(*it); if(it == ANDs.begin()) { words.swap(res); } else { for(auto it2 = words.begin(); it2 != words.end(); it2++) { auto res_word = res.find(it2->first); if(res_word == res.end()) words.erase(it2); else it2->second += res_word->second; // increase ranking } } if(words.empty()) break; } // ORs // TODO: should words with multiple OR terms have a higher ranking?? That's how it is currently for(auto it = ORs.begin(); it != ORs.end(); it++) { auto res = theIndex->findAll(*it); for(auto it2 = res.begin(); it2 != res.end(); it2++) words[it2->first] += it2->second; } // NOTs for(auto it = NOTs.begin(); it != NOTs.end(); it++) { auto res = theIndex->findAll(*it); for(auto it2 = res.begin(); it2 != res.end(); it2++) words.erase(it2->first); } // order results by tf-idf std::multimap<double, unsigned int> sorted; // this map should automatically sort by key (if-idf) for(auto it = words.begin(); it != words.end(); it++) { sorted.insert(std::pair<double, unsigned int>(it->second, it->first)); } // then transfer results to return vector of page ids std::vector<unsigned int> return_me; for(auto it = sorted.cbegin(); it != sorted.cend(); it++) return_me.push_back(it->second); return return_me; }