示例#1
0
void load_weights(void) {
    
    typedef network<mse, gradient_descent_levenberg_marquardt> CNN;
    CNN nn;
    convolutional_layer_hw<CNN, tan_h> C1(32, 32, 5, 1, 6);
    //average_pooling_layer<CNN, tan_h> S2(28, 28, 6, 2);
#define O true
#define X false
    static const bool connection[] = {
        O, X, X, X, O, O, O, X, X, O, O, O, O, X, O, O,
        O, O, X, X, X, O, O, O, X, X, O, O, O, O, X, O,
        O, O, O, X, X, X, O, O, O, X, X, O, X, O, O, O,
        X, O, O, O, X, X, O, O, O, O, X, X, O, X, O, O,
        X, X, O, O, O, X, X, O, O, O, O, X, O, O, X, O,
        X, X, X, O, O, O, X, X, O, O, O, O, X, O, O, O
    };
#undef O
#undef X
    //convolutional_layer2_hw<CNN, tan_h> C3(14, 14, 5, 6, 16, connection_table(connection, 6, 16));
    convolutional_layer<CNN, tan_h> C3(14, 14, 5, 6, 16, connection_table(connection, 6, 16));
    average_pooling_layer<CNN, tan_h> S4(10, 10, 16, 2);
    convolutional_layer<CNN, tan_h> C5(5, 5, 5, 16, 120);
    fully_connected_layer<CNN, tan_h> F6(120, 10);
    nn.add(&C1);
    nn.add(&C3);
    nn.add(&S4);
    nn.add(&C5);
    nn.add(&F6);

    std::stringstream stream;
    ReadFloatsFromSDFile(stream, std::string("weights.bin"));


    stream >> C1 >> C3 >> S4 >> C5 >> F6;

   // C3.print_weights();



    std::vector<label_t> train_labels, test_labels;
    std::vector<vec_t> train_images, test_images;

    parse_mnist_labels("labels.bin", &test_labels);
    parse_mnist_images("images.bin", &test_images);

    nn.test(test_images, test_labels).print_detail(std::cout);
    return;
    //C1.print_weights();
}
示例#2
0
int main(int argc, const char** argv)
{
    /*DepthImage d(2, 2);
    
    d(0, 0) = 1.0f;
    d(1, 0) = 6.0f;
    d(0, 1) = 6.0f;
    d(1, 1) = 100.0f;

    d.save("C:\\code\\test.dat", true);
    d.load("C:\\code\\test.dat", true);
    d.save("C:\\code\\test2.dat", true);
    d.load("C:\\code\\test2.dat", true);*/

    ReconstructionParams reconParams;

    CNN cnn;
    cnn.initStandard();
    
    Bitmap testImage, reconstructedImage;
    LayerData testOutput;

    const string dataDir = "../data/";
    const string imageDir = "../testImages/";
    const string outputDir = "../testResults/";

    testImage = ml::LodePNG::load(imageDir + "imageA.png");
    cnn.filter(testImage, testOutput);

    cnn.layer.invert(reconParams, testOutput, cnn.transform.meanValues, reconstructedImage);

    ml::LodePNG::save(reconstructedImage, outputDir + reconParams.toString() + ".png");

    for (UINT filter = 0; filter < testOutput.images.size(); filter++)
    {
        const Bitmap bmp = testOutput.images[filter].makeVisualization(reconParams);
        ml::LodePNG::save(bmp, outputDir + util::zeroPad(filter, 2) + ".png");
    }

    return 0;
}
示例#3
0
文件: main.cpp 项目: CCJY/tiny-cnn
///////////////////////////////////////////////////////////////////////////////
// learning convolutional neural networks (LeNet-5 like architecture)
void sample1_convnet(void) {
    // construct LeNet-5 architecture
    typedef network<mse, gradient_descent_levenberg_marquardt> CNN;
    CNN nn;
    convolutional_layer<CNN, tan_h> C1(32, 32, 5, 1, 6);
    average_pooling_layer<CNN, tan_h> S2(28, 28, 6, 2);
    // connection table [Y.Lecun, 1998 Table.1]
#define O true
#define X false
    static const bool connection[] = {
        O, X, X, X, O, O, O, X, X, O, O, O, O, X, O, O,
        O, O, X, X, X, O, O, O, X, X, O, O, O, O, X, O,
        O, O, O, X, X, X, O, O, O, X, X, O, X, O, O, O,
        X, O, O, O, X, X, O, O, O, O, X, X, O, X, O, O,
        X, X, O, O, O, X, X, O, O, O, O, X, O, O, X, O,
        X, X, X, O, O, O, X, X, O, O, O, O, X, O, O, O
    };
#undef O
#undef X
    convolutional_layer<CNN, tan_h> C3(14, 14, 5, 6, 16, connection_table(connection, 6, 16));
    average_pooling_layer<CNN, tan_h> S4(10, 10, 16, 2);
    convolutional_layer<CNN, tan_h> C5(5, 5, 5, 16, 120);
    fully_connected_layer<CNN, tan_h> F6(120, 10);

    assert(C1.param_size() == 156 && C1.connection_size() == 122304);
    assert(S2.param_size() == 12 && S2.connection_size() == 5880);
    assert(C3.param_size() == 1516 && C3.connection_size() == 151600);
    assert(S4.param_size() == 32 && S4.connection_size() == 2000);
    assert(C5.param_size() == 48120 && C5.connection_size() == 48120);

    nn.add(&C1);
    nn.add(&S2);
    nn.add(&C3);
    nn.add(&S4);
    nn.add(&C5);
    nn.add(&F6);

    std::cout << "load models..." << std::endl;

    // load MNIST dataset
    std::vector<label_t> train_labels, test_labels;
    std::vector<vec_t> train_images, test_images;

    parse_mnist_labels("train-labels.idx1-ubyte", &train_labels);
    parse_mnist_images("train-images.idx3-ubyte", &train_images);
    parse_mnist_labels("t10k-labels.idx1-ubyte", &test_labels);
    parse_mnist_images("t10k-images.idx3-ubyte", &test_images);

    std::cout << "start learning" << std::endl;

    boost::progress_display disp(train_images.size());
    boost::timer t;
    int minibatch_size = 10;

    nn.optimizer().alpha *= std::sqrt(minibatch_size);

    // create callback
    auto on_enumerate_epoch = [&](){
        std::cout << t.elapsed() << "s elapsed." << std::endl;

        tiny_cnn::result res = nn.test(test_images, test_labels);

        std::cout << nn.optimizer().alpha << "," << res.num_success << "/" << res.num_total << std::endl;

        nn.optimizer().alpha *= 0.85; // decay learning rate
        nn.optimizer().alpha = std::max(0.00001, nn.optimizer().alpha);

        disp.restart(train_images.size());
        t.restart();
    };

    auto on_enumerate_minibatch = [&](){ 
        disp += minibatch_size; 
    
        // weight visualization in imdebug
        /*static int n = 0;    
        n+=minibatch_size;
        if (n >= 1000) {
            image img;
            C3.weight_to_image(img);
            imdebug("lum b=8 w=%d h=%d %p", img.width(), img.height(), &img.data()[0]);
            n = 0;
        }*/
    };
    
    // training
    nn.train(train_images, train_labels, minibatch_size, 20, on_enumerate_minibatch, on_enumerate_epoch);

    std::cout << "end training." << std::endl;

    // test and show results
    nn.test(test_images, test_labels).print_detail(std::cout);

    // save networks
    std::ofstream ofs("LeNet-weights");
    ofs << C1 << S2 << C3 << S4 << C5 << F6;
}
示例#4
0
int main(int argc, char* argv[])
{
	CNN net;

	double time_cost;


	//-------- CNN Initializing --------
	//----------------------------------

	//Read parameters file
	net.readPara(parameter_file);


	//-------- Load Dataset ------------
	//----------------------------------

#ifdef _HANY_NET_WITH_LABEL_NAMES
	ifstream read_label(label_file);
	for(int c = 0; c < net.class_count; c++) {
		string new_label_name;
		read_label >> new_label_name;
		label_list.push_back(make_pair(c, new_label_name));
	}
#endif

#ifdef _HANY_NET_LOAD_MNIST
#ifdef _HANY_NET_PRINT_MSG
	cout << "Loading MNIST dataset..." << endl;
	time_cost = (double)getTickCount();
#endif

	loadMNIST("train-images.idx3-ubyte", "train-labels.idx1-ubyte", net.train_set);
	loadMNIST("t10k-images.idx3-ubyte", "t10k-labels.idx1-ubyte", net.test_set);

#ifdef _HANY_NET_PRINT_MSG
	time_cost = ((double)getTickCount() - time_cost) / getTickFrequency();
	cout << "Load samples done." << endl << "Time cost: " << time_cost << "s." << endl << endl;
#endif
#endif

#ifdef _HANY_NET_TRAIN_FROM_SCRATCH

#ifdef _HANY_NET_LOAD_SAMPLE_FROM_PIC
#ifdef _HANY_NET_PRINT_MSG
	cout << "Loading samples..." << endl;
	time_cost = (double)getTickCount();
#endif

	for(int c = 0; c < net.class_count; c++) {

		for(int i = 0; i < sample_num; i++) {
			string file_name = sample_file_pre + to_string(c) + "_" + to_string(i) + ".jpg";
			Mat img_read = imread(file_name, CV_LOAD_IMAGE_GRAYSCALE);
			if(img_read.data == NULL) {
				break;
			}
			Mat img_nor;
			resize(img_read, img_nor, Size(net.sample_width, net.sample_height));

			net.train_set.push_back(make_pair(img_nor, (uchar)(c)));
		}
	}

#ifdef _HANY_NET_PRINT_MSG
	time_cost = ((double)getTickCount() - time_cost) / getTickFrequency();
	cout << "Load samples done." << endl << "Time cost: " << time_cost << "s." << endl << endl;
#endif
#endif


#ifdef _HANY_NET_CAPTURE_FACE_FROM_CAMERA
#ifdef _HANY_NET_PRINT_MSG
	cout << "Capturing samples..." << endl;
	time_cost = (double)getTickCount();
#endif

	VideoCapture cap_in(0);
	if(!cap_in.isOpened()) {
		cout << "Cannot access camera. Press ANY key to exit." << endl;
		cin.get();
		exit(-1);
	}

	CascadeClassifier cascade_in;
	cascade_in.load(haar_file);

	Mat frame;
	int frame_count = 0;
	int capture_count = 0;
	int class_idx = 0;
	int class_count = 0;
	bool sample_suff = false;
	bool cap_sample = true;

	while(cap_in.read(frame)) {
		capture_count++;

		vector<Rect> faces;
		Mat frame_gray, img_gray;
		cvtColor(frame, frame_gray, CV_BGR2GRAY);
		equalizeHist(frame_gray, img_gray);
		cascade_in.detectMultiScale(img_gray, faces, 1.1, 2, 0, Size(120, 120));

		int face_area = 0;
		int face_idx = 0;

		if(faces.size() > 0) {
			for(int f = 0; f < faces.size(); f++) {
				if(faces[f].area() > face_area) {
					face_area = faces[f].area();
					face_idx = f;
				}
			}

			rectangle(frame, faces[face_idx], Scalar(255, 0, 0), 3);

			if(frame_count % 5 == 0 && cap_sample && !sample_suff) {
				Mat face, face_nor;
				img_gray(faces[face_idx]).copyTo(face);

				resize(face, face_nor, Size(net.sample_width, net.sample_height));

				net.train_set.push_back(make_pair(face_nor, (uchar)class_idx));
				class_count++;
			}
		}

		putText(frame, "Class: " + to_string(class_idx), Point(50, 100), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);
		putText(frame, "Sample: " + to_string(class_count), Point(50, 150), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);

		if(sample_suff) {
			putText(frame, "Enough samples. Press SPACE.", Point(50, 50), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);
		}else {
			putText(frame, "Capturing...", Point(50, 50), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);
		}
		if(!cap_sample) {
			putText(frame, "Wait for another person. Press SPACE.", Point(50, 200), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);
		}

		imshow(camera_window_name, frame);

		if(class_count >= sample_num) {
			sample_suff = true;
		}

		frame_count++;
		int key = waitKey(20);
		if(key == 27){
			cap_in.release();
			break;
		} else if(key == ' ') {
			if(cap_sample && sample_suff) {
				cap_sample = false;
				continue;
			}
			if(!cap_sample && sample_suff) {
				cap_sample = true;
				sample_suff = false;
				class_idx++;
				class_count = 0;
				continue;
			}
		}
	}

#ifdef _HANY_NET_PRINT_MSG
	time_cost = ((double)getTickCount() - time_cost) / getTickFrequency();
	cout << "Load samples done." << endl << "Time cost: " << time_cost << "s." << endl << endl;
#endif
#endif

#endif


	//-------- CNN Initializing --------
	//----------------------------------

#ifdef _HANY_NET_PRINT_MSG
	cout << "Initializing neural networks..." << endl;
	time_cost = (double)getTickCount();
#endif

	//Initialize CNN with knowledge of samples
	net.initCNN();

#ifdef _HANY_NET_PRINT_MSG
	time_cost = ((double)getTickCount() - time_cost) / getTickFrequency();
	cout << "Total number of samples: " << (int)(net.train_set.size() + net.test_set.size()) << endl;
	cout << "Initializing neural networks done." << endl << "Time cost: " << time_cost << "s." << endl << endl;
#endif


	//Load pre-trained CNN parameters from file and continue to train
//	net.uploadCNN(pretrained_cnn_file);

	//-------- CNN Training ----------
	//--------------------------------

#ifdef _HANY_NET_TRAIN_FROM_SCRATCH
#ifdef _HANY_NET_PRINT_MSG
	cout << "Start training CNN..." << endl;
	time_cost = (double)getTickCount();
#endif

	//Train CNN with train sample set
	net.trainCNN();

#ifdef _HANY_NET_PRINT_MSG
	time_cost = ((double)getTickCount() - time_cost) / getTickFrequency();
	cout << "CNN training done." << endl << "Time cost: " << time_cost << "s." << endl << endl;
#endif

	for(int i = 0; i < net.time_ff.size(); i++) {
		cout << "FeedForward stage " << i << ":  " << net.time_ff[i] << "s" << endl;
	}
	for(int i = 0; i < net.time_bp.size(); i++) {
		cout << "BackPropagation stage " << i << ":  " << net.time_bp[i] << "s" << endl;
	}

	//Draw stage loss graph
	Mat stage_loss_graph = Mat::zeros(600, 1100, CV_8UC3);
	Point2d pt1, pt2;
	pt1 = Point2d(50.0, 50.0);
	for(int stage = 0; stage < net.stage_loss.size(); stage++) {
		pt2 = Point2d(50.0 + 1200.0 / net.stage_loss.size() * stage, 550.0 - 500.0 * net.stage_loss[stage] / net.stage_loss[0]);
		line(stage_loss_graph, pt1, pt2, Scalar(255, 255, 255));
		pt1 = pt2;
	}
	imshow("Stage Loss Graph", stage_loss_graph);
	imwrite("stage_loss_graph.jpg", stage_loss_graph);
	waitKey(10);

#endif


	//-------- Save Trained Network -----
	//-----------------------------------

#ifdef _HANY_NET_TRAIN_FROM_SCRATCH
#ifdef _HANY_NET_PRINT_MSG
	cout << "Dumping trained CNN parameters to file " << pretrained_cnn_file << "..." << endl;
#endif

	//Dump trained CNN parameters to file
	net.downloadCNN(trained_cnn_file);

#ifdef _HANY_NET_PRINT_MSG
	cout << "Dumping trained CNN parameters to file done." << endl << endl;
#endif
#endif


	//-------- Load Pre-trained Network -----
	//---------------------------------------

#ifndef _HANY_NET_TRAIN_FROM_SCRATCH
#ifdef _HANY_NET_PRINT_MSG
	cout << "Loading pre-trained CNN parameters from file " << pretrained_cnn_file << "..." << endl;
#endif

	//Load pre-trained CNN parameters from file
	net.uploadCNN(pretrained_cnn_file);

#ifdef _HANY_NET_PRINT_MSG
	cout << "Loading pre-trained CNN parameters from file done." << endl << endl;
#endif
#endif


	//-------- Predict New Samples-------
	//--------------------------------------

#ifdef _HANY_NET_PREDICT_MNIST
#ifdef _HANY_NET_PRINT_MSG
	cout << "Predicting MNIST test dataset..." << endl;
	time_cost = (double)getTickCount();
#endif

	//Calculate correctness ratio with test samples
	int total_correct_count = 0;
	for(int sample_idx = 0; sample_idx < net.test_set.size(); sample_idx++) {
		vector<Mat> input_sample;
		input_sample.push_back(net.test_set[sample_idx].first);
		vector<Mat> predict_result = net.predictCNN(input_sample);
		if((int)predict_result[0].ptr<uchar>(0)[0] == net.test_set[sample_idx].second) {
			total_correct_count++;
		}
	}
	double total_correct_ratio = (double)total_correct_count / net.test_set.size();

#ifdef _HANY_NET_PRINT_MSG
	time_cost = ((double)getTickCount() - time_cost) / getTickFrequency();
	cout << "MNIST testing done." << endl << "Time cost: " << time_cost << "s." << endl;
	cout << "Total correctness ratio: " << total_correct_ratio << endl << endl;
#endif
#endif

#ifdef _HANY_NET_PREDICT_IMAGE_SERIES
#ifdef _HANY_NET_PRINT_MSG
	cout << "Predicting from image series..." << endl;
#endif

//	VideoWriter wri(output_video_file, CV_FOURCC('M', 'J', 'P', 'G'), 25.0, Size(640, 480));

	for(int c = 0; c < net.class_count; c++) {

		for(int i = 0; i < sample_num; i++) {
			string file_name = sample_file_pre + to_string(c) + "_" + to_string(i) + ".jpg";
			Mat img_read = imread(file_name, CV_LOAD_IMAGE_GRAYSCALE);
			if(img_read.data == NULL) {
				break;
			}
			Mat img_nor, img_show;
			resize(img_read, img_show, Size(400, 400));
			resize(img_read, img_nor, Size(net.sample_width, net.sample_height));

			vector<Mat> input_sample;
			input_sample.push_back(img_nor);

			vector<Mat> predict_result = net.predictCNN(input_sample);

			int pred_rst = (int)predict_result[0].ptr<uchar>(0)[0];
			if(pred_rst <= net.class_count)
				putText(img_show, label_list[pred_rst].second, Point(10, 40), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);

			putText(img_show, to_string(c)+"-"+to_string(i), Point(img_show.cols-80, 40), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);

			int frame_count = 25;
			while(--frame_count) {
//				wri.write(img_show);
			}
			imshow(camera_window_name, img_show);

			int key_get = waitKey(20);
			switch(key_get) {
			case 27:
//				wri.release();
				return 0;
			default:
				break;
			}
		}
	}

#endif


#ifdef _HANY_NET_PREDICT_VEDIO_SERIES
#ifdef _HANY_NET_PRINT_MSG
	cout << "Predicting from video series..." << endl;
#endif

	VideoWriter wri(output_video_file, CV_FOURCC('M', 'J', 'P', 'G'), 25.0, Size(640, 480));
	namedWindow(camera_window_name);

	CascadeClassifier cascade_out;
	cascade_out.load(haar_file);

	for(int c = 1; c <= net.class_count; c++) {
		string file_name = "path_to_face_videos\\" + to_string(c) + ".wmv";
		VideoCapture cap(file_name);
		if(!cap.isOpened())
			continue;

		Mat img_read;
		while(cap.read(img_read)) {
			Mat img_gray, nor_gray, img_show;
			img_read.copyTo(img_show);
			cvtColor(img_read, img_gray, CV_BGR2GRAY);

			vector<Rect> faces;
			equalizeHist(img_gray, img_gray);
			cascade_out.detectMultiScale(img_gray, faces, 1.1, 2, 0, Size(120, 120));

			for(int f = 0; f < faces.size(); f++) {
				rectangle(img_show, faces[f], Scalar(0, 255, 255), 3);

				resize(img_gray(faces[f]), nor_gray, Size(net.sample_width, net.sample_height));
				vector<Mat> input_sample;
				input_sample.push_back(nor_gray);

				vector<Mat> predict_result = net.predictCNN(input_sample);
				
				int pred_rst = (int)predict_result[0].ptr<uchar>(0)[0];
				if(pred_rst <= net.class_count)
					putText(img_show, to_string(pred_rst), Point(faces[f].x+faces[f].width, faces[f].y+faces[f].height), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);
			}

			int frame_count = 2;
			while(--frame_count) {
				wri.write(img_show);
			}
			imshow(camera_window_name, img_show);

			int key_get = waitKey(20);
			switch(key_get) {
			case 27:
				wri.release();
				return 0;
			default:
				break;
			}
		}
	}
	wri.release();
#endif

#ifdef _HANY_NET_PREDICT_CAMERA
#ifdef _HANY_NET_PRINT_MSG
	cout << "Predicting from camera..." << endl;
#endif

	VideoCapture cap_out(0);
	if(!cap_out.isOpened()) {
		cout << "Cannot access camera." << endl;
		cin.get();
		exit(-1);
	}

	CascadeClassifier cascade_out;
	cascade_out.load(haar_file);

//	VideoWriter wri(output_video_file, CV_FOURCC('M', 'J', 'P', 'G'), 25.0, Size(640, 480));

	Mat src_frame;

	namedWindow(camera_window_name);

	Mat img_read;
	while(cap_out.read(img_read)) {
		Mat img_gray, nor_gray, img_show;
		img_read.copyTo(img_show);
		cvtColor(img_read, img_gray, CV_BGR2GRAY);

		vector<Rect> faces;
		equalizeHist(img_gray, img_gray);
		cascade_out.detectMultiScale(img_gray, faces, 1.1, 2, 0, Size(120, 120));

		for(int f = 0; f < faces.size(); f++) {
			rectangle(img_show, faces[f], Scalar(0, 255, 255), 3);

			resize(img_gray(faces[f]), nor_gray, Size(net.sample_width, net.sample_height));
			vector<Mat> input_sample;
			input_sample.push_back(nor_gray);

			vector<Mat> predict_result = net.predictCNN(input_sample);

			int pred_rst = (int)predict_result[0].ptr<uchar>(0)[0];
			if(pred_rst <= net.class_count)
				putText(img_show, label_list[pred_rst].second, Point(faces[f].x+faces[f].width, faces[f].y+faces[f].height), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2);

		}

		int frame_count = 2;
		while(--frame_count) {
//			wri.write(img_show);
		}
		imshow(camera_window_name, img_show);

		int key_get = waitKey(20);
		if(key_get == 27) {
//			wri.release();
			cap_out.release();
			return 0;
		}
	}
#endif

	cout << "Press any key to quit..." << endl;
//	waitKey(0);
	cin.get();

	return 0;
}