Пример #1
0
      void readTokenMatrix(Dataset& data, const char* filename) {
	std::vector<string> lines;
	loadFile(lines, filename);

	assert(lines.size() > 1);
	std::vector<int> counts(2);
	tokenizeLineIntoIntVector(lines[1], counts);
	assert(counts.size() == 2);
	//cout << "Found: " << counts[0] << " and " << counts[1] << endl;
	data.numDocs = counts[0];
	data.numTokens = counts[1];

	vec trainClasses(data.numDocs);
	mat features(data.numDocs, data.numTokens);
	rowvec row(data.numTokens);

	for(uint i = 3; i < data.numDocs + 3; i++) {
	  std::vector<double> tokens(getNumTokens(lines[i]));
	  tokenizeLineIntoDoubleVector(lines[i], tokens);
	  trainClasses(i-3) = tokens[0];
	  //cout << "class[" << tokens[0] << "]";
	  row.zeros();
	  int cumsum = 0;
	  for(uint j = 1; j < tokens.size() - 1; j+=2) {
	    cumsum += tokens[j];
	    //cout << "cumsum[" << cumsum << "]token[" << tokens[j+1] << "]" << flush ;
	    row[cumsum] = tokens[j+1];
	  }
	  Matrix::setMatrixRowToVector(features, i-3 ,row);
	}
	
	data.classifications = trainClasses;
	data.features = features;

      }
Пример #2
0
 Impl(const std::string& learnpath, const std::string& allchars)
 {
     std::set<char> goodChars(allchars.begin(), allchars.end());
     std::ifstream train((learnpath + "/train.txt").c_str());
     std::vector< std::pair<char, std::string> > samples;
     char symbol;
     std::string imageFile;
     while (train >> symbol >> imageFile) {
         if (goodChars.find(symbol) == goodChars.end()) continue;
         samples.push_back(std::make_pair(symbol, imageFile));
     }
     cv::Mat trainData(samples.size(), FEATURE_COUNT, cv::DataType<float>::type);
     cv::Mat trainClasses(samples.size(), 1, cv::DataType<float>::type);
     for (size_t i = 0; i < samples.size(); ++i) {
         std::string path = learnpath + samples[i].second;
         cv::Mat inp = cv::imread(path, 0), out, canny;
         if (inp.empty()) continue;
         cv::Canny(inp, canny, 100, 50, 3);
         std::vector< std::vector<cv::Point> > contours;
         cv::findContours(canny, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE, cv::Point(0, 0));
         int maxx = -1, maxy = -1, minx = 1e9, miny = 1e9;
         for (size_t j = 0; j < contours.size(); ++j) {
             cv::Rect r = cv::boundingRect(contours[j]);
             if (r.x + r.width > maxx) maxx = r.x + r.width;
             if (r.y + r.height > maxy) maxy = r.y + r.height;
             if (r.x < minx) minx = r.x;
             if (r.y < miny) miny = r.y;
         }
         cv::Rect bound(minx, miny, maxx - minx, maxy - miny);
         cv::resize(cv::Mat(inp, bound), out, cv::Size(10, 16), 0, 0, cv::INTER_CUBIC);
         for (int j = 0; j < FEATURE_COUNT; ++j) trainData.at<float>(i, j) = out.data[j];
         trainClasses.at<float>(i, 0) = samples[i].first;
     }
     oracle.train(trainData, trainClasses);
 }