struct felem *next_elem(struct fcontainer *self) { self->cur++; return cur_elem(self); }
/// Public methods void grouped_sframe::group(const gl_sframe &sf, const std::vector<std::string> column_names, bool is_grouped) { if(m_inited) log_and_throw("Group has already been called on this object!"); // Do our "grouping" if it hasn't already been done if(!is_grouped) { m_grouped_sf = sf.sort(column_names); } else { m_grouped_sf = sf; } m_key_col_names = column_names; // Get indices from column names std::vector<size_t> col_ids; std::unordered_set<size_t> dedup_set; for(const auto &i : column_names) { auto col_id = sf.column_index(i); col_ids.push_back(col_id); auto ins_ret = dedup_set.insert(col_id); if(!ins_ret.second) log_and_throw("Found duplicate column name: " + i); } // Build the directory of ranges to allow querying of the groups // (this is an extra, sequential pass over the data) auto sf_range = m_grouped_sf.range_iterator(); auto iter = sf_range.begin(); size_t cnt = 0; std::vector<flexible_type> prev_elem(col_ids.size()); std::vector<flexible_type> cur_elem(col_ids.size()); bool first = true; for(; iter != sf_range.end(); ++iter, ++cnt) { // Create cur_elem int col_cnt = 0; for(const auto &i : col_ids) { cur_elem[col_cnt] = (*iter)[i]; ++col_cnt; } // Check for new group if((cur_elem != prev_elem) || first) { first = false; m_key2range.insert(std::make_pair(cur_elem, m_range_directory.size())); m_range_directory.push_back(cnt); if(cur_elem.size() == 1) m_group_names.push_back(cur_elem[0]); else m_group_names.push_back(cur_elem); } prev_elem = cur_elem; } if(col_ids.size() > 1) { m_group_type = flex_type_enum::LIST; } else { m_group_type = prev_elem[0].get_type(); } m_inited = true; }