void run() { // Insert a ton of documents with a: 1 for (size_t i = 0; i < 1000; ++i) { insert(BSON("a" << 1)); } // Insert a ton of other documents with a: 2 for (size_t i = 0; i < 1000; ++i) { insert(BSON("a" << 2)); } // Make an index on a:1 addIndex(BSON("a" << 1)); AutoGetCollectionForRead ctx(&_txn, ns()); Collection* coll = ctx.getCollection(); // Set up the distinct stage. std::vector<IndexDescriptor*> indexes; coll->getIndexCatalog()->findIndexesByKeyPattern(&_txn, BSON("a" << 1), false, &indexes); ASSERT_EQ(indexes.size(), 1U); DistinctParams params; params.descriptor = indexes[0]; params.direction = 1; // Distinct-ing over the 0-th field of the keypattern. params.fieldNo = 0; // We'll look at all values in the bounds. params.bounds.isSimpleRange = false; OrderedIntervalList oil("a"); oil.intervals.push_back(IndexBoundsBuilder::allValues()); params.bounds.fields.push_back(oil); WorkingSet ws; DistinctScan distinct(&_txn, params, &ws); WorkingSetID wsid; // Get our first result. int firstResultWorks = 0; while (PlanStage::ADVANCED != distinct.work(&wsid)) { ++firstResultWorks; } // 5 is a bogus number. There's some amount of setup done by the first few calls but // we should return the first result relatively promptly. ASSERT_LESS_THAN(firstResultWorks, 5); ASSERT_EQUALS(1, getIntFieldDotted(ws, wsid, "a")); // Getting our second result should be very quick as we just skip // over the first result. int secondResultWorks = 0; while (PlanStage::ADVANCED != distinct.work(&wsid)) { ++secondResultWorks; } ASSERT_EQUALS(2, getIntFieldDotted(ws, wsid, "a")); // This is 0 because we don't have to loop for several values; we just skip over // all the 'a' values. ASSERT_EQUALS(0, secondResultWorks); ASSERT_EQUALS(PlanStage::IS_EOF, distinct.work(&wsid)); }
int main(int argc, char **argv) { vector<int> nums; for (int i = 1; i < argc; i++) nums.push_back(atoi(argv[i])); print_vector(distinct(nums)); return 0; }
vector RetriveCore::retrive(vector<string> files) { FeatureExtract fe; Mat retriveF; vector< vector<int> > allLenSeq = read(); Mat allGlobal = read(); Mat allLocal = read(); vector<string> cand; int rv = files.size(); for(int i = 0; i < rv; i ++) { retriveF = fe.tomyFeature(files[i]); #ifdef FILTER_BY_LENGTH //the minist edit distance double* dist = distinct(rLenSeq, allLenSeq); cand = filterByLenSeq(cand, dist, allLenSeq); #endif Mat hogFeature = localFeature(retriveF); Mat local_code = spectralHash(hogFeature); cand = filterByCode(cand, local_code, allLocal); printResult(files[i], cand); cand.clear(); retriveF.release(); } }
MojErr MojDbSearchCursor::load() { // pull unique ids from index ObjectSet ids; MojErr err = loadIds(ids); MojErrCheck(err); // load objects into memory err = loadObjects(ids); MojErrCheck(err); // sort results if (!m_orderProp.empty()) { err = sort(); MojErrCheck(err); } // distinct if (!m_distinct.empty()) { distinct(); } // reverse for desc if (m_query.desc()) { err = m_items.reverse(); MojErrCheck(err); } // set limit and pos if (m_limit >= m_items.size()) { m_limitPos = m_items.end(); } else { m_limitPos = m_items.begin() + m_limit; } m_pos = m_items.begin(); return MojErrNone; }
void run() { // Insert a ton of documents with a: [1, 2, 3] for (size_t i = 0; i < 1000; ++i) { insert(BSON("a" << BSON_ARRAY(1 << 2 << 3))); } // Insert a ton of other documents with a: [4, 5, 6] for (size_t i = 0; i < 1000; ++i) { insert(BSON("a" << BSON_ARRAY(4 << 5 << 6))); } // Make an index on a:1 addIndex(BSON("a" << 1)); AutoGetCollectionForRead ctx(&_txn, ns()); Collection* coll = ctx.getCollection(); // Set up the distinct stage. std::vector<IndexDescriptor*> indexes; coll->getIndexCatalog()->findIndexesByKeyPattern(&_txn, BSON("a" << 1), false, &indexes); verify(indexes.size() == 1); DistinctParams params; params.descriptor = indexes[0]; ASSERT_TRUE(params.descriptor->isMultikey(&_txn)); verify(params.descriptor); params.direction = 1; // Distinct-ing over the 0-th field of the keypattern. params.fieldNo = 0; // We'll look at all values in the bounds. params.bounds.isSimpleRange = false; OrderedIntervalList oil("a"); oil.intervals.push_back(IndexBoundsBuilder::allValues()); params.bounds.fields.push_back(oil); WorkingSet ws; DistinctScan distinct(&_txn, params, &ws); // We should see each number in the range [1, 6] exactly once. std::set<int> seen; WorkingSetID wsid; PlanStage::StageState state; while (PlanStage::IS_EOF != (state = distinct.work(&wsid))) { if (PlanStage::ADVANCED == state) { // Check int value. int currentNumber = getIntFieldDotted(ws, wsid, "a"); ASSERT_GREATER_THAN_OR_EQUALS(currentNumber, 1); ASSERT_LESS_THAN_OR_EQUALS(currentNumber, 6); // Should see this number only once. ASSERT_TRUE(seen.find(currentNumber) == seen.end()); seen.insert(currentNumber); } } ASSERT_EQUALS(6U, seen.size()); }
void evolvability_biped(Organism* org,char* fn,int* di,double* ev,bool recall) { fstream file; file.open(fn,ios::app|ios::out); cout <<"Evolvability..." << endl; // file << "---" << " " << org->winner << endl; double points[BIPEDMUTATIONS*BDIM]; float minx=-10.0,maxx=10.0,miny=-10.0,maxy=10.0; double ox,oy,fit; int nodes; int connections; data_record rec; for (int i=0; i<BIPEDMUTATIONS; i++) { Genome *new_gene= new Genome(*org->gnome); //new_org->gnome = new Genome(*org->gnome); if (i!=0) //first copy is clean for (int j=0; j<1; j++) mutate_genome(new_gene); Organism *new_org= new Organism(0.0,new_gene,0); noveltyitem* nov_item = biped_evaluate(new_org,&rec); if (i==0) { fit=nov_item->fitness; nodes=new_org->net->nodecount(); connections=new_org->net->linkcount(); ox=rec.ToRec[1]; oy=rec.ToRec[2]; } if(recall) { for(int k=0;k<nov_item->data[0].size();k++) file << nov_item->data[0][k] << " "; file << endl; } //file << rec.ToRec[1] << " " << rec.ToRec[2]<< endl; for(int k=0;k<nov_item->data[0].size();k++) { points[i*BDIM+k]=nov_item->data[0][k]/25.0; } /* points[i*2]=(rec.ToRec[1]-minx)/(maxx-minx); points[i*2+1]=(rec.ToRec[2]-miny)/(maxy-miny); cout << points[i*2] << " " << points[i*2+1] << endl; */ delete new_org; delete nov_item; //file << endl; } int dist = distinct(points,BIPEDMUTATIONS,BDIM); if (di!=NULL) *di=dist; double evol = 0; //test_indiv(points,BIPEDMUTATIONS); if (ev!=NULL) *ev=evol; if(!recall) { file << dist << " " << evol << " " << ox << " " << oy << " " << nodes << " " <<connections << " " << fit << endl; file.close(); } }
vector<vector<int> > fourSum(vector<int>& nums, int target) { sort(nums.begin(), nums.end()); int pos = 0; int n = nums.size(); for (int i = 0; i < n; i++) { for (int j = i + 1; j < n; j++) { d[pos].v = nums[i] + nums[j]; d[pos].x = i; d[pos++].y = j; } } // sort the combine sum array sort(d, d + pos, comparator); vector<vector<int> > matchlist; for (int i = 0; i < pos; i++) { int temp = target - d[i].v; int l = i + 1, r = pos - 1; while (l <= r) { int m = (l + r) / 2; if (d[m].v == temp) { int j = m; // get the index before first while (j >= 0 && d[j].v == temp)j--; for (j++; j < pos; j++) { if (d[j].v != temp) break; if (d[j].x != d[i].x && d[j].x != d[i].y && d[j].y != d[i].x && d[j].y != d[i].y) { vector<int> item; item.push_back(nums[d[i].x]); item.push_back(nums[d[i].y]); item.push_back(nums[d[j].x]); item.push_back(nums[d[j].y]); sort(item.begin(), item.end()); matchlist.push_back(item); } } break; } else if (d[m].v > temp) { r = m - 1; } else { l = m + 1; } } } // remove repeated items return distinct(matchlist); }
virtual void report(StringBuffer &out) { unsigned __int64 d = distinct(); out.append("<Field name=\"").append(fieldname).append("\""); if (exact()) { out.append(" distinct=\"").append(d).append("\">\n"); reportValues(out); out.append("</Field>\n"); } else out.append(" estimate=\"").append(d).append("\"/>\n"); }
MojErr MojDbSearchCursor::load() { LOG_TRACE("Entering function %s", __FUNCTION__); // pull unique ids from index ObjectSet ids; MojErr err = loadIds(ids); MojErrCheck(err); // load objects into memory err = loadObjects(ids); MojErrCheck(err); // sort results if (!m_orderProp.empty()) { err = sort(); MojErrCheck(err); } // distinct if (!m_distinct.empty()) { distinct(); } // reverse for desc if (m_query.desc()) { err = m_items.reverse(); MojErrCheck(err); } // next page if (!m_page.empty()) { err = setPagePosition(); MojErrCheck(err); } else { // set begin/last position. m_pos = m_items.begin(); if (m_limit >= m_items.size()) { m_limitPos = m_items.end(); } else { // if item size is bigger than limit, set next page. m_limitPos = m_items.begin() + m_limit; MojDbStorageItem* nextItem = m_limitPos->get(); const MojObject nextId = nextItem->id(); m_page.fromObject(nextId); } } // set remainder count m_count = m_items.end() - m_pos; return MojErrNone; }
MojErr MojDbIsamQuery::getImpl(MojDbStorageItem*& itemOut, bool& foundOut, bool getItem) { itemOut = NULL; MojUInt32 group = 0; MojErr err = getKey(group, foundOut); MojErrCheck(err); if (foundOut && getItem) { err = getVal(itemOut, foundOut); if (err == MojErrInternalIndexOnFind) { #if defined (MOJ_DEBUG) char s[1024]; char *s2 = NULL; MojErr err2 = MojByteArrayToHex(m_keyData, m_keySize, s); MojErrCheck(err2); if (m_keySize > 17) s2 = ((char *)m_keyData) + m_keySize - 17; MojSize idIndex = m_plan->idIndex(); const MojChar * from = m_plan->query().from().data(); MojLogInfo(MojDb::s_log, _T("isamquery_warnindex: from: %s; indexid: %zu; group: %d; KeySize: %zu; %s ;id: %s \n"), from, idIndex, (int)group, m_keySize, s, (s2?s2:"NULL")); #endif } MojErrCheck(err); } if (foundOut) { //If distinct query is, We need to check that field is duplicated or not //In case of duplication, count will not incremented, //and set "itemOut" to NULL for getting next DB result. if(!m_distinct.empty() && itemOut) { bool distincted = false; err = distinct(itemOut, distincted); MojErrCheck(err); if(!distincted) incrementCount(); else itemOut = NULL; } else incrementCount(); } return MojErrNone; }
int main(void) { seive = (int *) calloc(MAX, sizeof(int)); int i, j; for (i = 2; i < MAX; i++) { if (seive[i] == 0) { for (j = i; j < MAX; j+=i) seive[j] = i; } } int c = 0; int x = 1; int n = 4; while (c != n) { if (distinct(x) == n) c++; else c = 0; x++; } printf("%d\n", x-n); return 0; }
void run() { // insert documents with a: 1 and b: 1 for (size_t i = 0; i < 1000; ++i) { insert(BSON("a" << 1 << "b" << 1)); } // insert documents with a: 1 and b: 2 for (size_t i = 0; i < 1000; ++i) { insert(BSON("a" << 1 << "b" << 2)); } // insert documents with a: 2 and b: 1 for (size_t i = 0; i < 1000; ++i) { insert(BSON("a" << 2 << "b" << 1)); } // insert documents with a: 2 and b: 3 for (size_t i = 0; i < 1000; ++i) { insert(BSON("a" << 2 << "b" << 3)); } addIndex(BSON("a" << 1 << "b" << 1)); AutoGetCollectionForRead ctx(&_txn, ns()); Collection* coll = ctx.getCollection(); std::vector<IndexDescriptor*> indices; coll->getIndexCatalog()->findIndexesByKeyPattern( &_txn, BSON("a" << 1 << "b" << 1), false, &indices); ASSERT_EQ(1U, indices.size()); DistinctParams params; params.descriptor = indices[0]; ASSERT_TRUE(params.descriptor); params.direction = 1; params.fieldNo = 1; params.bounds.isSimpleRange = false; OrderedIntervalList aOil{"a"}; aOil.intervals.push_back(IndexBoundsBuilder::allValues()); params.bounds.fields.push_back(aOil); OrderedIntervalList bOil{"b"}; bOil.intervals.push_back(IndexBoundsBuilder::allValues()); params.bounds.fields.push_back(bOil); WorkingSet ws; DistinctScan distinct(&_txn, params, &ws); WorkingSetID wsid; PlanStage::StageState state; std::vector<int> seen; while (PlanStage::IS_EOF != (state = distinct.work(&wsid))) { ASSERT_NE(PlanStage::FAILURE, state); ASSERT_NE(PlanStage::DEAD, state); if (PlanStage::ADVANCED == state) { seen.push_back(getIntFieldDotted(ws, wsid, "b")); } } ASSERT_EQUALS(4U, seen.size()); ASSERT_EQUALS(1, seen[0]); ASSERT_EQUALS(2, seen[1]); ASSERT_EQUALS(1, seen[2]); ASSERT_EQUALS(3, seen[3]); }
return db_v_line_sta_info_find(bus_line_info, load, sub_str, NULL); } /* 构造查询SQL语句,针对于表bus_line_info * 查询经过指定站点的线路信息 * @param buf 存放条件子句的buffer指针 @param buf_size buffer 的 size ,函数会自动初始化为0一次 @param name 要查询的站点名称 */ static int construct_v_line_sta_info_sql_find_line_by_sta_name(char *buf, int buf_size,char *name, int fuzzy){ #if 0 SELECT * FROM bus_line_info a WHERE a.id IN ( SELECT distinct(id) FROM v_line_sta_info b WHERE b.sta_name = 'xx' ) #endif if(NULL == buf){ return ERROR; } memset(buf, 0, buf_size); /* 线路ID唯一,因为一个站点在同一条线路中可能出现多次 */ if(SQL_SEARCH_FUZZY == fuzzy){ snprintf(buf, buf_size, " WHERE %s.id IN ( SELECT distinct(id) FROM %s b \ WHERE b.sta_name LIKE '%%%s%%')",BUS_LINE_INFO_TABLE_NAME, BUS_V_LINE_STA_TABLE_NAME, name); }else{ snprintf(buf, buf_size, " WHERE %s.id IN ( SELECT distinct(id) FROM %s b \ WHERE b.sta_name='%s')",BUS_LINE_INFO_TABLE_NAME,
MojErr MojDbQuery::fromObject(const MojObject& obj) { // TODO: validate against query schema bool found; MojErr err; MojObject array; MojString str; // distinct found = false; err = obj.get(DistinctKey, str, found); MojErrCheck(err); if (found) { err = distinct(str); MojErrCheck(err); // if "distinct" is set, force "distinct" column into "select". err = select(str); MojErrCheck(err); // order err = order(str); MojErrCheck(err); } else { // select if (obj.get(SelectKey, array)) { if(array.empty()) { MojErrThrowMsg(MojErrDbInvalidQuery, _T("db: select clause but no selected properties")); } MojObject prop; MojSize i = 0; while (array.at(i++, prop)) { MojErr err = prop.stringValue(str); MojErrCheck(err); err = select(str); MojErrCheck(err); } } // order found = false; err = obj.get(OrderByKey, str, found); MojErrCheck(err); if (found) { err = order(str); MojErrCheck(err); } } // from err = obj.getRequired(FromKey, str); MojErrCheck(err); err = from(str); MojErrCheck(err); // where if (obj.get(WhereKey, array)) { err = addClauses(m_whereClauses, array); MojErrCheck(err); } // filter if (obj.get(FilterKey, array)) { err = addClauses(m_filterClauses, array); MojErrCheck(err); } // desc bool descVal; if (obj.get(DescKey, descVal)) { desc(descVal); } // limit MojInt64 lim; if (obj.get(LimitKey, lim)) { if (lim < 0) MojErrThrowMsg(MojErrDbInvalidQuery, _T("db: negative query limit")); } else { lim = LimitDefault; } limit((MojUInt32) lim); // page MojObject pageObj; if (obj.get(PageKey, pageObj)) { Page pagec; err = pagec.fromObject(pageObj); MojErrCheck(err); page(pagec); } bool incDel = false; if (obj.get(IncludeDeletedKey, incDel) && incDel) { err = includeDeleted(); MojErrCheck(err); } return MojErrNone; }
SCENARIO("distinct - never", "[distinct][operators]"){ GIVEN("a source"){ auto sc = rxsc::make_test(); auto w = sc.create_worker(); const rxsc::test::messages<int> on; auto xs = sc.make_hot_observable({ on.next(150, 1) }); WHEN("distinct values are taken"){ auto res = w.start( [xs]() { return xs.distinct(); } ); THEN("the output is empty"){ auto required = std::vector<rxsc::test::messages<int>::recorded_type>(); auto actual = res.get_observer().messages(); REQUIRE(required == actual); } THEN("there was 1 subscription/unsubscription to the source"){ auto required = rxu::to_vector({ on.subscribe(200, 1000) }); auto actual = xs.subscriptions(); REQUIRE(required == actual);
// _____________________________________________________________________________ QueryExecutionTree QueryPlanner::createExecutionTree( const ParsedQuery& pq) const { LOG(DEBUG) << "Creating execution plan.\n"; // Strategy: // Create a graph. // Each triple corresponds to a node, there is an edge between two nodes iff // they share a variable. TripleGraph tg = createTripleGraph(pq); // Each node/triple corresponds to a scan (more than one way possible), // each edge corresponds to a possible join. // Enumerate and judge possible query plans using a DP table. // Each ExecutionTree for a sub-problem gives an estimate. // Start bottom up, i.e. with the scans for triples. // Always merge two solutions from the table by picking one possible join. // A join is possible, if there is an edge between the results. // Therefore we keep track of all edges that touch a sub-result. // When joining two sub-results, the results edges are those that belong // to exactly one of the two input sub-trees. // If two of them have the same target, only one out edge is created. // All edges that are shared by both subtrees, are checked if they are covered // by the join or if an extra filter/select is needed. // The algorithm then creates all possible plans for 1 to n triples. // To generate a plan for k triples, all subsets between i and k-i are // joined. // Filters are now added to the mix when building execution plans. // Without them, a plan has an execution tree and a set of // covered triple nodes. // With them, it also has a set of covered filters. // A filter can be applied as soon as all variables that occur in the filter // Are covered by the query. This is also always the place where this is done. // TODO: resolve cyclic queries and turn them into filters. // Copy made so that something can be added for cyclic queries. // tg.turnCyclesIntoFilters(filters); // TODO: resolve cycles involving a text operation. // Split the graph at possible text operations. vector<pair<TripleGraph, vector<SparqlFilter>>> graphs; unordered_map<string, vector<size_t>> contextVarTotextNodes; vector<SparqlFilter> filtersWithContextVars; tg.splitAtText(pq._filters, graphs, contextVarTotextNodes, filtersWithContextVars); vector<vector<SubtreePlan>> finalTab; if (graphs.size() == 1) { finalTab = fillDpTab(graphs[0].first, graphs[0].second); } else { AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED, "No text yet."); } // If there is an order by clause, add another row to the table and // just add an order by / sort to every previous result if needed. // If the ordering is perfect already, just copy the plan. if (pq._orderBy.size() > 0) { finalTab.emplace_back(getOrderByRow(pq, finalTab)); } vector<SubtreePlan>& lastRow = finalTab.back(); AD_CHECK_GT(lastRow.size(), 0); size_t minCost = lastRow[0].getCostEstimate(); size_t minInd = 0; for (size_t i = 1; i < lastRow.size(); ++i) { if (lastRow[i].getCostEstimate() < minCost) { minCost = lastRow[i].getCostEstimate(); minInd = i; } } // A distinct modifier is applied in the end. This is very easy // but not necessarily optimal. // TODO: Adjust so that the optimal place for the operation is found. if (pq._distinct) { QueryExecutionTree distinctTree(lastRow[minInd]._qet); vector<size_t> keepIndices; for (const auto& var : pq._selectedVariables) { if (lastRow[minInd]._qet.getVariableColumnMap().find(var) != lastRow[minInd]._qet.getVariableColumnMap().end()) { keepIndices.push_back( lastRow[minInd]._qet.getVariableColumnMap().find( var)->second); } } Distinct distinct(_qec, lastRow[minInd]._qet, keepIndices); distinctTree.setOperation(QueryExecutionTree::DISTINCT, &distinct); return distinctTree; } LOG(DEBUG) << "Done creating execution plan.\n"; return lastRow[minInd]._qet; }