Ejemplo n.º 1
0
void test_Iterate_UserBased_Predictor()
{
	clock_t begin,end;
	string train_data = "E:/data/resys/corpus/1/train/base";
	string test_data = "E:/data/resys/corpus/1/test/u1.test";
	FileDataReader train_reader = FileDataReader(train_data);
	FileDataReader test_reader = FileDataReader(test_data);
	int n_user, n_item, n_rating;
	n_user = 943;
	n_item = 1682;
	n_rating = 80000;
	UserModel trainModel(n_user,n_item,n_rating);
	UserModel testModel(n_user,n_item,20000);
	UserModel resultModel(n_user,n_item,20000);
	if(train_reader.readData(trainModel) && test_reader.readData(testModel)) {
		int min_com = 0;
		double min_sim = 0.00001;
		int shrink_parameter = 25;
		Similarity_Shrinking shrinker(min_com,min_sim,shrink_parameter);
		Pearson_Similarity similarity(shrinker);
		UserBased_Predictor predictor = UserBased_Predictor(30,0);
		begin = clock();
		predictor.train(trainModel);
		end = clock();
		cout << "Train time: " << (end - begin) * 1.0 / CLOCKS_PER_SEC << endl;
		begin = end;
		predictor.predictAll(trainModel,testModel,resultModel);
		end = clock();
		cout << "Predict time: " << (end - begin) * 1.0 / CLOCKS_PER_SEC << endl;
		double rmse = evl_rmse(testModel, resultModel);
		cout << "RMSE: " << rmse << endl;
	}
}
Ejemplo n.º 2
0
void printUserSim()
{
	clock_t begin,end;
	string train_data = "E:/data/resys/corpus/1/train/base";
	FileDataReader train_reader = FileDataReader(train_data);
	int n_user, n_item, n_rating;
	n_user = 943;
	n_item = 1682;
	n_rating = 80000;
	UserModel trainModel(n_user,n_item,n_rating);
	if(train_reader.readData(trainModel)) {
		int min_com = 0;
		double min_sim = 0.0;
		int shrink_parameter = 30;
		int k_neighbors = 30;
		Similarity_Shrinking shrinker(min_com,min_sim,shrink_parameter);
		Pearson_Similarity similarity(shrinker);
		NeighborCollection *neighbor_cls;
		neighbor_cls = new NeighborCollection[trainModel.n_user+trainModel.user_0];
		similarity.similarity(trainModel, neighbor_cls);

		string sim_file = "E:/data/resys/corpus/1/user_sims.txt";
		ofstream fout(sim_file.c_str());
		vector<Neighbor>::iterator nb_it;
		for(int u = trainModel.user_0; u < trainModel.user_0 + trainModel.n_user; u++) {
			double sum = 0;
			int count = 0;
			for(nb_it = neighbor_cls[u].begin(); nb_it != neighbor_cls[u].end(); nb_it++) {
				int n = nb_it->neighbor;
				double sim = nb_it->similarity;
				sum += sim;
				count ++;
				if(count >= k_neighbors)
					break;
			}
			count = 0;
			if(sum > 0) {
				for(nb_it = neighbor_cls[u].begin(); nb_it != neighbor_cls[u].end(); nb_it++) {
					int n = nb_it->neighbor;
					double sim = nb_it->similarity;
					sim = sim/sum;
					fout << u << " " << n << " " << sim << endl;
					count ++;
					if(count >= k_neighbors)
						break;
				}
			}
		}
		fout.close();
	}
}
Ejemplo n.º 3
0
void GuiGameListMenuCtrl::onDebugRender(Point2I offset)
{
   GuiGameListMenuProfile * profile = (GuiGameListMenuProfile *) mProfile;

   F32 xScale = (float) getWidth() / profile->getRowWidth();

   ColorI controlBorderColor(200, 200, 200); // gray
   ColorI rowBorderColor(255, 127, 255); // magenta
   ColorI hitBorderColor(255, 0, 0); // red
   Point2I shrinker(-1, -1);
   Point2I extent = getExtent();

   // render a border around the entire control
   RectI borderRect(offset, extent + shrinker);
   GFX->getDrawUtil()->drawRect(borderRect, controlBorderColor);

   S32 rowHeight = profile->getRowHeight();
   Point2I currentOffset(offset);
   Point2I rowExtent(extent.x, rowHeight);
   rowExtent += shrinker;
   Point2I hitAreaExtent(profile->getHitAreaExtent());
   hitAreaExtent.x *= xScale;
   hitAreaExtent += shrinker;
   Point2I hitAreaOffset = profile->mHitAreaUpperLeft;
   hitAreaOffset.x *= xScale;
   Point2I upperLeft;
   for (Vector<Row *>::iterator row = mRows.begin(); row < mRows.end(); ++row)
   {
      // set the top of the current row
      if (row != mRows.begin())
      {
         // rows other than the first can have padding above them
         currentOffset.y += (*row)->mHeightPad;
         currentOffset.y += rowHeight;
      }

      // draw the box around the whole row's extent
      upperLeft = currentOffset;
      borderRect.point = upperLeft;
      borderRect.extent = rowExtent;
      GFX->getDrawUtil()->drawRect(borderRect, rowBorderColor);

      // draw the box around the hit area of the row
      upperLeft = currentOffset + hitAreaOffset;
      borderRect.point = upperLeft;
      borderRect.extent = hitAreaExtent;
      GFX->getDrawUtil()->drawRect(borderRect, hitBorderColor);
   }
}
Ejemplo n.º 4
0
void test_Fill_Predictor()
{
	string train_data = "E:/data/resys/corpus/1/train/base";
	string test_data = "E:/data/resys/corpus/1/test/u1.test";
	FileDataReader train_reader = FileDataReader(train_data);
	FileDataReader test_reader = FileDataReader(test_data);
	int n_user, n_item, n_rating;
	n_user = 943;
	n_item = 1682;
	n_rating = 80000;
	UserModel userModel(n_user,n_item,n_rating);
	UserModel testModel(n_user,n_item,20000);
	UserModel resultModel(n_user,n_item,20000);
	ItemModel itemModel(n_user,n_item,n_rating);
	if(train_reader.readData(userModel) && train_reader.readData(itemModel) 
		&& test_reader.readData(testModel)) {
		int min_com = 0;
		double min_sim = 0.00001;
		int shrink_parameter = 25;
		Similarity_Shrinking shrinker(min_com,min_sim,shrink_parameter);
		Pearson_Similarity similarity(shrinker);
		UserBased_Predictor predictor = UserBased_Predictor(similarity,30,0);

		min_com = 30;
		min_sim = 0.3;
		shrink_parameter = 40;
		NeighborCollection *item_nbs = new NeighborCollection[n_item+1];
		vector<FillObj> *u_fobjs = new vector<FillObj>[n_user+1];
		Similarity_Shrinking shrinker2(min_com,min_sim,shrink_parameter);
		Pearson_Similarity similarity2(shrinker2);
		similarity2.similarity(itemModel,item_nbs,1);
		//similarity2.similarity(itemModel,item_nbs,1);
		Fill_Predictor fill_predictor;
		//int fill_count = fill_predictor.cal_fill_objs(userModel,item_nbs,u_fobjs,min_com);
		int fill_count = fill_predictor.cal_fill_objs(userModel,item_nbs,u_fobjs,min_sim);
		cout << fill_count << endl;
		fill_predictor.fill(predictor,userModel,u_fobjs);

		predictor.train(userModel);
		predictor.predictAll(userModel,testModel,resultModel);
		double rmse = evl_rmse(testModel, resultModel);
		cout << "RMSE: " << rmse << endl;
		double mae = evl_mae(testModel, resultModel);
		cout << "MAE: " << mae << endl;
	}
}
Ejemplo n.º 5
0
void test_ItemBased_Predictor()
{
	clock_t begin,end;
	string train_data = "E:/data/resys/corpus/1/train/base";
	string test_data = "E:/data/resys/corpus/1/test/u1.test";
	FileDataReader train_reader = FileDataReader(train_data);
	FileDataReader test_reader = FileDataReader(test_data);
	int n_user, n_item, n_rating;
	n_user = 943;
	n_item = 1682;
	n_rating = 80000;
	ItemModel trainModel(n_user,n_item,n_rating);
	ItemModel testModel(n_user,n_item,20000);
	ItemModel resultModel(n_user,n_item,20000);

	double *userRanks = new double[n_user+1];
	string filepath = "E:/data/resys/corpus/1/user_com_ranks.txt";
	train_reader.readUserComRank(userRanks,filepath);

	if(train_reader.readData(trainModel) && test_reader.readData(testModel)) {
		int min_com = 0;
		double min_sim = 0.0;
		int shrink_parameter = 30;
		Similarity_Shrinking shrinker(min_com,min_sim,shrink_parameter);
		Pearson_Similarity similarity(shrinker);
		ItemBased_Predictor predictor = ItemBased_Predictor(similarity,30,0);
		begin = clock();
		//predictor.train(trainModel);
		predictor.train(trainModel,userRanks);
		end = clock();
		cout << "Train time: " << (end - begin) * 1.0 / CLOCKS_PER_SEC << endl;
		begin = end;
		predictor.predictAll(trainModel,testModel,resultModel);
		end = clock();
		cout << "Predict time: " << (end - begin) * 1.0 / CLOCKS_PER_SEC << endl;
		double rmse = evl_rmse(testModel, resultModel);
		cout << "RMSE: " << rmse << endl;
		double mae = evl_mae(testModel, resultModel);
		cout << "MAE: " << mae << endl;
	}
}
std::vector<cv::Rect> 
SoftPPWordSplitter::split(const CCGroup &grp)
{
    cv::Rect bb = grp.get_rect();
    // generate the projection profile sums
    cv::Mat sums(1, bb.width, CV_32FC1, cv::Scalar(0));
    ProjectionProfileComputer pp_computer(cv::Size(bb.width, 1), bb.x);
    for (int i = 0; i < grp.ccs.size(); i++) {
        sums = pp_computer.compute(grp.ccs[i].pixels, sums);
    }

    int threshold = pp_computer.compute_threshold(sums);
    if (_verbose) {
        std::cout << "Projection Profile Threshold: " << threshold << std::endl;
    }
    cv::Mat gaps = sums < threshold;

    // now shrink each bounding rect on the border with the gaps matrix
    std::vector<cv::Rect> original_rects(grp.ccs.size());
    std::transform(
        grp.ccs.begin(), grp.ccs.end(), 
        original_rects.begin(), 
        [](const CC &cc) -> cv::Rect { return cc.rect; });
    std::sort(
        original_rects.begin(),
        original_rects.end(), 
        [](const cv::Rect &a, const cv::Rect &b) -> bool { return a.x < b.x; });
    RectShrinker shrinker(0.10, bb.x);
    std::vector<cv::Rect> shrinked_rects(shrinker.shrink(original_rects, gaps));
    
    //cv::Mat img(grp.get_image());
    //cv::imshow("RECTS-wo-rects", img);
    //cv::waitKey(0);
    //for (cv::Rect r : shrinked_rects) {
    //    cv::rectangle(img, r.tl(), r.br(), cv::Scalar(128));
    //}
    //cv::imshow("RECTS", img);
    //cv::waitKey(0);

    std::vector<bool> collide(bb.width, false);
    for (int i = 0; i < shrinked_rects.size(); i++) {
        for (int j = shrinked_rects[i].x; j < shrinked_rects[i].x + shrinked_rects[i].width; j++) {
            collide[j-bb.x] = true;
        }
    }

    //std::vector<bool> collide(bb.width, false);
    //for (int i = 0; i < ccs.size(); i++) {
    //    for (int j = ccs[i].rect.x; j < ccs[i].rect.x + ccs[i].rect.width; j++) {
    //        collide[j-bb.x] = true;
    //    }
    //}

    std::vector<float> heights(grp.ccs.size(), 0.0);
    std::transform(
        grp.ccs.begin(),
        grp.ccs.end(), 
        heights.begin(),
        [] (const CC &c) -> float { return c.rect.height; });
    float mean_height = cv::sum(heights)[0] / heights.size();

    // Now find the rects from this binary mask.
    // This merges overlapping/touching CCs into a single component
    std::vector<cv::Rect> rects;
    cv::Rect last_rect(bb.x, bb.y, 1, bb.height);
    
    for (int i = 0; i < collide.size(); i++) {
        if (collide[i]) {
            last_rect.width += 1;
        } else {
            if (last_rect.width > 0) {
                rects.push_back(last_rect);
            }
            last_rect = cv::Rect(bb.x + i, bb.y, 0, bb.height);
        }
    }
    if (last_rect.width > 0) {
        rects.push_back(last_rect);
    }

    if (_verbose)
        std::cout << "#Rects: " << rects.size() << std::endl;

    if (rects.size() <= 2) {
        std::vector<cv::Rect> result;
        result.push_back(bb);
        return result;
    }

    // find the dists
    std::vector<float> dists;
    for (int i = 1; i < rects.size(); i++) {
        dists.push_back(rects[i].tl().x - rects[i-1].br().x);
    }

    //  kmeans
    cv::Mat dist_mat(dists.size(), 1, CV_32FC1);
    for (size_t i = 0; i < dists.size(); i++) {
        dist_mat.at<float>(i,0) = dists[i];
    }
    cv::Mat centers;
    cv::Mat labels;//(dists.size(),1, CV_32SC1, cv::Scalar(0));
    /*
    float min = *std::min_element(dists.begin(), dists.end());
    float max = *std::max_element(dists.begin(), dists.end());
    for (size_t i = 0; i < dists.size(); i++) {
        labels.at<int>(i,0) = std::abs(dists[i] - min) < std::abs(dists[i] - max) ? 0 : 1;
    }
    */

    if (_verbose) 
        std::cout << dist_mat << std::endl;
    kmeans(dist_mat, 2, labels, cv::TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, .01), 5, cv::KMEANS_PP_CENTERS, centers);

    if (_verbose)
        std::cout << centers << std::endl;

    std::vector<float> cpy(dists);
    std::sort(cpy.begin(), cpy.end());
    float median = cpy[cpy.size() / 2];
    if (cpy.size() % 2 == 0) {
        median = cpy[cpy.size() / 2] + cpy[cpy.size() / 2 - 1];
        median = median / 2.0f;
    }
    float medval = median;

    float height = std::abs(centers.at<float>(0,0) - centers.at<float>(1,0)) / mean_height;
    median = std::abs(centers.at<float>(0,0) - centers.at<float>(1,0)) / (median + 1e-10);
    if (_verbose) {
        std::cout << dists.size() << " " << medval << " " << median << " " << height << std::endl;
    }
    // liblinear: 92% ACC: (10-F)
    // ./train -v 10 -B 1 -w1 2 -c 100 dists_cleaned.dat   
    // do we have a single cluster?!
    //if (dists.size() > 3 && median * 0.84320891 + height * 0.3127415 < 1.23270849 ||
    //    dists.size() <= 3 && height < 0.43413942) {
    if (median * 0.33974138 + height * 0.47850904 < 0.56307525) {
        std::vector<cv::Rect> result;
        result.push_back(bb);
        return result;
    }

    // get the index of the smallest center
    int small_center = centers.at<float>(0,0) < centers.at<float>(1,0) ? 0 : 1;

    // count the distance to cluster assignments
    int cnt[2] = {0,0};
    for (int i = 0; i < labels.rows; i++) {
        cnt[labels.at<int>(i,0)]++;
    }
    // we have more word gaps than letter gaps -> don't split!
    if (cnt[small_center] < cnt[1-small_center]) {
        std::vector<cv::Rect> result;
        result.push_back(bb);
        return result;
    }

    // start from left to right and iteratively merge rects if the
    // distance between them is clustered into the smaller center
    last_rect = rects[0];
    std::vector<cv::Rect> word_candidates;
    for (int i = 1; i < rects.size(); i++) {
        if (_allow_single_letters) {
            if (labels.at<int>(i-1,0) == small_center) {
                // extend the last rect
                last_rect = last_rect | rects[i];
            } else {
                // do not extend it!
                word_candidates.push_back(last_rect);
                last_rect = rects[i];
            }
        } else {
            if (labels.at<int>(i-1,0) == small_center) {
                // extend the last rect
                last_rect = last_rect | rects[i];
            } else if (i < labels.rows && labels.at<int>(i,0) == small_center) {
                // do not extend it!
                word_candidates.push_back(last_rect);
                last_rect = rects[i];
            } else {
                last_rect = last_rect | rects[i];
            }
        }
    }
    word_candidates.push_back(last_rect);

    // for each rect, find the original connected component rects
    std::vector<cv::Rect> words;
    for (cv::Rect candidate : word_candidates) {
        std::vector<cv::Rect> word;
        for (size_t i = 0; i < grp.ccs.size(); i++) {
            cv::Rect intersect(grp.ccs[i].rect & candidate);
            if (float (intersect.width * intersect.height) / float (grp.ccs[i].rect.width * grp.ccs[i].rect.height) >= 0.8f) {
                cv::Rect r = grp.ccs[i].rect;
                // set the text height correctly
                r.y = bb.y;
                r.height = bb.height;
                word.push_back(r);
            }
        }

        if (_verbose) {
            std::cout << "Accumulated: " << word.size() << " rects!" << std::endl;
        }
        if (word.empty()) continue;
        assert(!word.empty());
        cv::Rect r = word[0];
        for (size_t i = 1; i < word.size(); i++) {
            r = r | word[i];
        }
        words.push_back(r);
    }
    
    return words;
}