Cell<D,C>::Cell(CellData<D,C>* ave, double sizesq, std::vector<CellData<D,C>*>& vdata, double minsizesq, SplitMethod sm, size_t start, size_t end) : _sizesq(sizesq), _data(ave), _left(0), _right(0) { Assert(sizesq >= 0.); //xdbg<<"Make cell starting with ave = "<<*ave<<std::endl; //xdbg<<"size = "<<_size<<std::endl; Assert(vdata.size()>0); Assert(end <= vdata.size()); Assert(end > start); if (_sizesq > minsizesq) { _size = sqrt(_sizesq); size_t mid = SplitData(vdata,sm,start,end,_data->getPos()); try { _left = new Cell<D,C>(vdata,minsizesq,sm,start,mid); _right = new Cell<D,C>(vdata,minsizesq,sm,mid,end); } catch (std::bad_alloc) { myerror("out of memory - cannot create new Cell"); } } else { _size = _sizesq = 0.; } }
void CosineTreeBuilder::CTNodeSplit(CosineTree& root, CosineTree& left, CosineTree& right) { //Extracting points from the root arma::mat A = root.Data(); //Cosine Similarity Array std::vector<double> c; //Matrices holding points for the left and the right node arma::mat ALeft, ARight; //Sampling probabilities arma::vec prob = root.Probabilities(); //Pivot size_t pivot = GetPivot(prob); //Creating Array CreateCosineSimilarityArray(c,A,pivot); //Splitting data points SplitData(c,ALeft,ARight,A); //Creating Nodes if(ALeft.n_rows > 0) { CTNode(ALeft.t(),left); //TODO: Traversal is not required, still fix this //root.Left(left); } if(ARight.n_rows > 0) { CTNode(ARight.t(),right); //TODO: Traversal is not required, still fix this //root.Right(right); } }
void ObjFile::subdivide() { // clear old data midPoints_ = std::vector<int>(triangles_.size() * 3, -1); midTriangles_ = std::vector<int>(triangles_.size() * 3, -1); tmpTriangles_.clear(); processed_ = std::vector<bool>(triangles_.size(), false); int oldVertSize = vertices_.size(); splits_.clear(); splits_.push_back(SplitData(0, -1, -1, -1, -1)); while(!splits_.empty()) { SplitData sd = splits_.back(); splits_.pop_back(); triangleSplit(sd.index, sd.callee, sd.newPoint, sd.t1, sd.t2); } newverts_ = std::vector<Vec3f>(oldVertSize); computeNewVerts(); std::copy(newverts_.begin(), newverts_.end(), vertices_.begin()); triangles_.swap(tmpTriangles_); loadNormalIndices(); computeNormals(); }
void App() { long t1; (void) time(&t1); seedMT(t1); float em_converged = 1e-4; int em_max_iter = 20; int em_estimate_alpha = 1; //1 indicate estimate alpha and 0 use given value int var_max_iter = 30; double var_converged = 1e-6; double initial_alpha = 0.1; int n_topic = 30; LDA lda; lda.Init(em_converged, em_max_iter, em_estimate_alpha, var_max_iter, var_converged, initial_alpha, n_topic); Corpus cor; //Str data = "../../data/ap.dat"; Str data = "lda_data"; cor.LoadData(data); Corpus train; Corpus test; double p = 0.8; SplitData(cor, p, &train, &test); Str type = "seeded"; LdaModel m; lda.RunEM(type, train, test, &m); LOG(INFO) << m.alpha; VVReal gamma; VVVReal phi; lda.Infer(test, m, &gamma, &phi); WriteStrToFile(Join(gamma, " ", "\n"), "gamma"); WriteStrToFile(Join(phi, " ", "\n", "\n\n"), "phi"); }
already_AddRefed<Text> Text::SplitText(uint32_t aOffset, ErrorResult& rv) { nsCOMPtr<nsIContent> newChild; rv = SplitData(aOffset, getter_AddRefs(newChild)); if (rv.Failed()) { return nullptr; } return newChild.forget().downcast<Text>(); }
void RegressionTree::Fit(DataVector *data, size_t len, Node *node, size_t depth, double *gain) { size_t max_depth = g_conf.max_depth; if (g_conf.loss == SQUARED_ERROR) { node->pred = Average(*data, len); } else if (g_conf.loss == LOG_LIKELIHOOD) { node->pred = LogitOptimalValue(*data, len); } if (max_depth == depth || Same(*data, len) || len <= g_conf.min_leaf_size) { node->leaf = true; return; } double g = 0.0; if (!FindSplit(data, len, &(node->index), &(node->value), &g)) { node->leaf = true; return; } DataVector out[Node::CHILDSIZE]; SplitData(*data, len, node->index, node->value, out); if (out[Node::LT].empty() || out[Node::GE].empty()) { node->leaf = true; return; } // update gain if (gain[node->index] < g) { gain[node->index] = g; } // increase feature cost if certain feature is used if (g_conf.feature_costs && g_conf.enable_feature_tunning) { g_conf.feature_costs[node->index] += 1.0e-4; } node->child[Node::LT] = new Node(); node->child[Node::GE] = new Node(); Fit(&out[Node::LT], node->child[Node::LT], depth+1, gain); Fit(&out[Node::GE], node->child[Node::GE], depth+1, gain); if (!out[Node::UNKNOWN].empty()) { node->child[Node::UNKNOWN] = new Node(); Fit(&out[Node::UNKNOWN], node->child[Node::UNKNOWN], depth+1, gain); } }
Cell<D,C>::Cell(std::vector<CellData<D,C>*>& vdata, double minsizesq, SplitMethod sm, size_t start, size_t end) : _size(0.), _sizesq(0.), _left(0), _right(0) { Assert(vdata.size()>0); Assert(end <= vdata.size()); Assert(end > start); if (end - start == 1) { //xdbg<<"Make leaf cell from "<<*vdata[start]<<std::endl; //xdbg<<"size = "<<_size<<std::endl; _data = vdata[start]; vdata[start] = 0; // Make sure calling routine doesn't delete this one! } else { _data = new CellData<D,C>(vdata,start,end); _data->finishAverages(vdata,start,end); //xdbg<<"Make cell from "<<start<<".."<<end<<" = "<<*_data<<std::endl; _sizesq = CalculateSizeSq(_data->getPos(),vdata,start,end); Assert(_sizesq >= 0.); if (_sizesq > minsizesq) { _size = sqrt(_sizesq); //xdbg<<"size = "<<_size<<std::endl; size_t mid = SplitData(vdata,sm,start,end,_data->getPos()); try { _left = new Cell<D,C>(vdata,minsizesq,sm,start,mid); _right = new Cell<D,C>(vdata,minsizesq,sm,mid,end); } catch (std::bad_alloc) { myerror("out of memory - cannot create new Cell"); } } else { // This shouldn't be necessary for 2-point, but 3-point calculations sometimes // have triangles that have two sides that are almost the same, so splits can // go arbitrarily small to switch which one is d1,d2 or d2,d3. This isn't // actually an important distinction, so just abort that by calling the size // exactly zero. _size = _sizesq = 0.; } } }
size_t SplitData( std::vector<CellData<D,C>*>& vdata, SplitMethod sm, size_t start, size_t end, const Position<C>& meanpos) { Assert(end-start > 1); size_t mid=0; Bounds<C> b; for(size_t i=start;i<end;++i) b += vdata[i]->getPos(); int split = b.getSplit(); switch (sm) { // three different split methods case MIDDLE : { // Middle is the average of the min and max value of x or y double splitvalue = b.getMiddle(split); DataCompareToValue<D,C> comp(split,splitvalue); typename std::vector<CellData<D,C>*>::iterator middle = std::partition(vdata.begin()+start,vdata.begin()+end,comp); mid = middle - vdata.begin(); } break; case MEDIAN : { // Median is the point which divides the group into equal numbers DataCompare<D,C> comp(split); mid = (start+end)/2; typename std::vector<CellData<D,C>*>::iterator middle = vdata.begin()+mid; std::nth_element(vdata.begin()+start,middle,vdata.begin()+end,comp); } break; case MEAN : { // Mean is the weighted average value of x or y double splitvalue = meanpos.get(split); DataCompareToValue<D,C> comp(split,splitvalue); typename std::vector<CellData<D,C>*>::iterator middle = std::partition(vdata.begin()+start,vdata.begin()+end,comp); mid = middle - vdata.begin(); } break; case RANDOM : { // Random is a random point from the first quartile to the third quartile DataCompare<D,C> comp(split); // The code for RANDOM is same as MEDIAN except for the next line. // Note: The lo and hi values are slightly subtle. We want to make sure if there // are only two values, we actually split. So if start=1, end=3, the only possible // result should be mid=2. Otherwise, we want roughly 1/4 and 3/4 of the span. mid = select_random(end-3*(end-start)/4,start+3*(end-start)/4); typename std::vector<CellData<D,C>*>::iterator middle = vdata.begin()+mid; std::nth_element(vdata.begin()+start,middle,vdata.begin()+end,comp); } break; default : myerror("Invalid SplitMethod"); } if (mid == start || mid == end) { xdbg<<"Found mid not in middle. Probably duplicate entries.\n"; xdbg<<"start = "<<start<<std::endl; xdbg<<"end = "<<end<<std::endl; xdbg<<"mid = "<<mid<<std::endl; xdbg<<"sm = "<<sm<<std::endl; xdbg<<"b = "<<b<<std::endl; xdbg<<"split = "<<split<<std::endl; for(size_t i=start; i!=end; ++i) { xdbg<<"v["<<i<<"] = "<<vdata[i]<<std::endl; } // With duplicate entries, can get mid == start or mid == end. // This should only happen if all entries in this set are equal. // So it should be safe to just take the mid = (start + end)/2. // But just to be safe, re-call this function with sm = MEDIAN to // make sure. Assert(sm != MEDIAN); return SplitData(vdata,MEDIAN,start,end,meanpos); } Assert(mid > start); Assert(mid < end); return mid; }
void ObjFile::triangleSplit(int index, int callee, int newPoint, int t1, int t2) { if (processed_[index]) return; int sz; int midPoints[3]; int indices[3]; bool process[3]; const Triangle& cur = triangles_[index]; Triangle tri[4]; int off = tmpTriangles_.size(); if (callee < 0) { sz = 3; indices[0] = 0; indices[1] = 1; indices[2] = 2; } else { sz = 2; int update = cur.findNeighborIndex(callee); if (update < 0) { std::cerr << "ObjFile::triangleSplit: semantic error !\n"; exit(1); } midPoints[update] = newPoint; midPoints_[3 * index + update] = newPoint; tri[update].t[0] = t1; tri[(update + 1) % 3].t[2] = t2; tmpTriangles_[t1].t[2] = off + update; tmpTriangles_[t2].t[0] = off + (update + 1) % 3; if (update == 0) { indices[0] = 1; indices[1] = 2; } else if (update == 1) { indices[0] = 0; indices[1] = 2; } else { indices[0] = 0; indices[1] = 1; } } // process the other ones for (int i = 0; i < sz; ++i) { // check whether already processed ? // get the neighbour of current edge int ti = cur.t[indices[i]]; Triangle& t = triangles_[ti]; int tv = 3 * ti + t.findNeighborIndex(index); int tvnext = 3 * ti + (t.findNeighborIndex(index) + 1) % 3; if (midPoints_[tv] >= 0) { midPoints[indices[i]] = midPoints_[3 * index + indices[i]] = midPoints_[tv]; process[i] = false; // update triangle connectivity assert(midTriangles_[tv] != -1); assert(midTriangles_[tvnext] != -1); tri[indices[i]].t[0] = midTriangles_[tvnext]; tri[(indices[i] + 1) % 3].t[2] = midTriangles_[tv]; tmpTriangles_[midTriangles_[tvnext]].t[2] = off + indices[i]; tmpTriangles_[midTriangles_[tv]].t[0] = off + (indices[i] + 1) % 3; } else { //insert new one int k = indices[i]; int knext = (k + 1) % 3; int kprev = (k + 2) % 3; int opposite = (t.findNeighborIndex(index) + 2) % 3; Vec3f p = (vertices_[cur.v[k]] + vertices_[cur.v[knext]]) * (3.0f / 8.0f) + (vertices_[cur.v[kprev]] + vertices_[t.v[opposite]]) * (1.0f / 8.0f); vertices_.push_back(p); midPoints[indices[i]] = midPoints_[3 * index + indices[i]] = vertices_.size() - 1; process[i] = true; } } // add subdivided triangles to new list // 2 // /\ // / \ // / 2 \ // / \ // *--------* // / \ 3 / \ // / 0 \ / 1 \ // *------**------* // 0 1 assert(midPoints[0] != -1); assert(midPoints[1] != -1); assert(midPoints[2] != -1); tri[0].v[0] = cur.v[0]; tri[0].v[1] = midPoints[0]; tri[0].v[2] = midPoints[2]; tri[0].t[1] = off + 3; tri[3].v[0] = midPoints[2]; tri[3].v[1] = midPoints[0]; tri[3].v[2] = midPoints[1]; tri[3].t[0] = off; tri[3].t[1] = off + 1; tri[3].t[2] = off + 2; tri[1].v[0] = cur.v[1]; tri[1].v[1] = midPoints[1]; tri[1].v[2] = midPoints[0]; tri[1].t[1] = off + 3; tri[2].v[0] = cur.v[2]; tri[2].v[1] = midPoints[2]; tri[2].v[2] = midPoints[1]; tri[2].t[1] = off + 3; // vertexTriangle_[cur.v[0]] = vertexTriangle_[midPoints[0]] = off; // vertexTriangle_[cur.v[1]] = vertexTriangle_[midPoints[1]] = off+1; // vertexTriangle_[cur.v[2]] = vertexTriangle_[midPoints[2]] = off+2; midTriangles_[3 * index] = off; midTriangles_[3 * index + 1] = off + 1; midTriangles_[3 * index + 2] = off + 2; for (int i = 0; i < 4; ++i) tmpTriangles_.push_back(tri[i]); processed_[index] = true; // call the neigbor triangles for (int i = 0; i < sz; ++i) { if (process[i]) { splits_.push_back(SplitData(cur.t[indices[i]], index, midPoints[indices[i]], off + (indices[i] + 1) % 3, off + indices[i])); } } }
int main(int argc,char* argv[]){ const char* ip="127.0.0.1"; int port=atoi("13000"); //init read data char* file="./ml-1m/rating.dat"; //rat.dat为5万小样本 rating.dat为中等样本,ratings.dat为大样本,大样本运行载入太慢 //忽略SIGPIPE信号 addsig(SIGPIPE,SIG_IGN); int M=8; if(!readData(file)){ printf("file error\n"); return 1; } int rands=random(M); SplitData(M,rands); UserSimilarity(); //创建线程池 threadpool<UserIIF>* pool; try{ pool=new threadpool<UserIIF>(); }catch(...){ return 1; } /*预先为每个可能的客户连接分配一个http_conn对象*/ UserIIF* users=new UserIIF[MAX_FD]; users->testRes(10); //上一句会输出准确率,召回率,覆盖率,新颖度 assert(users); int user_count=0; int listenfd=socket(AF_INET,SOCK_STREAM,0); assert(listenfd>=0); struct linger tmp={1,0}; setsockopt(listenfd,SOL_SOCKET,SO_LINGER,&tmp,sizeof(tmp)); int ret=0; struct sockaddr_in address; bzero(&address,sizeof(address)); address.sin_family=AF_INET; inet_pton(AF_INET,ip,&address.sin_addr); address.sin_port=htons(port); ret=bind(listenfd,(struct sockaddr*)&address,sizeof(address)); assert(ret>=0); ret=listen(listenfd,5); assert(ret>=0); epoll_event events[MAX_EVENT_NUMBER]; int epollfd=epoll_create(5); assert(epollfd!=-1); UserIIF::m_epollfd=epollfd; addfd(epollfd,listenfd,false); while(true){ int number=epoll_wait(epollfd,events,MAX_EVENT_NUMBER,-1); if ((number<0)&&(errno!=EINTR)) { printf("epoll failure\n"); break; } for (int i = 0; i < number; i++) { int sockfd=events[i].data.fd; if (sockfd==listenfd) { struct sockaddr_in client_address; socklen_t client_addrlen=sizeof(client_address); int connfd=accept(listenfd,(struct sockaddr*)&client_address,&client_addrlen); if (connfd<0) { printf("errno is:%d\n",errno ); continue; } users[connfd].init(connfd,true);//debug }else if((events[i].events&EPOLLRDHUP)||(events[i].events&EPOLLERR)||(events[i].events&EPOLLRDHUP)){ //有一场,直接关闭客户连接 printf("%d stop\n",sockfd); users[sockfd].close(); }else if(events[i].events&EPOLLIN){ //根据读的结果,决定时将任务添加到线程池,还是关闭连接 if (users[sockfd].readfrom()==true) { pool->append(users+sockfd); }else{ users[sockfd].close(); } }else if(events[i].events&EPOLLOUT){ //根据写的结果,决定是否关闭连接 if (!users[sockfd].writeto()) { users[sockfd].close(); } }else{} } } close(epollfd); close(listenfd); delete [] users; delete pool; return 0; }