void load_weights(void) { typedef network<mse, gradient_descent_levenberg_marquardt> CNN; CNN nn; convolutional_layer_hw<CNN, tan_h> C1(32, 32, 5, 1, 6); //average_pooling_layer<CNN, tan_h> S2(28, 28, 6, 2); #define O true #define X false static const bool connection[] = { O, X, X, X, O, O, O, X, X, O, O, O, O, X, O, O, O, O, X, X, X, O, O, O, X, X, O, O, O, O, X, O, O, O, O, X, X, X, O, O, O, X, X, O, X, O, O, O, X, O, O, O, X, X, O, O, O, O, X, X, O, X, O, O, X, X, O, O, O, X, X, O, O, O, O, X, O, O, X, O, X, X, X, O, O, O, X, X, O, O, O, O, X, O, O, O }; #undef O #undef X //convolutional_layer2_hw<CNN, tan_h> C3(14, 14, 5, 6, 16, connection_table(connection, 6, 16)); convolutional_layer<CNN, tan_h> C3(14, 14, 5, 6, 16, connection_table(connection, 6, 16)); average_pooling_layer<CNN, tan_h> S4(10, 10, 16, 2); convolutional_layer<CNN, tan_h> C5(5, 5, 5, 16, 120); fully_connected_layer<CNN, tan_h> F6(120, 10); nn.add(&C1); nn.add(&C3); nn.add(&S4); nn.add(&C5); nn.add(&F6); std::stringstream stream; ReadFloatsFromSDFile(stream, std::string("weights.bin")); stream >> C1 >> C3 >> S4 >> C5 >> F6; // C3.print_weights(); std::vector<label_t> train_labels, test_labels; std::vector<vec_t> train_images, test_images; parse_mnist_labels("labels.bin", &test_labels); parse_mnist_images("images.bin", &test_images); nn.test(test_images, test_labels).print_detail(std::cout); return; //C1.print_weights(); }
int main(int argc, const char** argv) { /*DepthImage d(2, 2); d(0, 0) = 1.0f; d(1, 0) = 6.0f; d(0, 1) = 6.0f; d(1, 1) = 100.0f; d.save("C:\\code\\test.dat", true); d.load("C:\\code\\test.dat", true); d.save("C:\\code\\test2.dat", true); d.load("C:\\code\\test2.dat", true);*/ ReconstructionParams reconParams; CNN cnn; cnn.initStandard(); Bitmap testImage, reconstructedImage; LayerData testOutput; const string dataDir = "../data/"; const string imageDir = "../testImages/"; const string outputDir = "../testResults/"; testImage = ml::LodePNG::load(imageDir + "imageA.png"); cnn.filter(testImage, testOutput); cnn.layer.invert(reconParams, testOutput, cnn.transform.meanValues, reconstructedImage); ml::LodePNG::save(reconstructedImage, outputDir + reconParams.toString() + ".png"); for (UINT filter = 0; filter < testOutput.images.size(); filter++) { const Bitmap bmp = testOutput.images[filter].makeVisualization(reconParams); ml::LodePNG::save(bmp, outputDir + util::zeroPad(filter, 2) + ".png"); } return 0; }
/////////////////////////////////////////////////////////////////////////////// // learning convolutional neural networks (LeNet-5 like architecture) void sample1_convnet(void) { // construct LeNet-5 architecture typedef network<mse, gradient_descent_levenberg_marquardt> CNN; CNN nn; convolutional_layer<CNN, tan_h> C1(32, 32, 5, 1, 6); average_pooling_layer<CNN, tan_h> S2(28, 28, 6, 2); // connection table [Y.Lecun, 1998 Table.1] #define O true #define X false static const bool connection[] = { O, X, X, X, O, O, O, X, X, O, O, O, O, X, O, O, O, O, X, X, X, O, O, O, X, X, O, O, O, O, X, O, O, O, O, X, X, X, O, O, O, X, X, O, X, O, O, O, X, O, O, O, X, X, O, O, O, O, X, X, O, X, O, O, X, X, O, O, O, X, X, O, O, O, O, X, O, O, X, O, X, X, X, O, O, O, X, X, O, O, O, O, X, O, O, O }; #undef O #undef X convolutional_layer<CNN, tan_h> C3(14, 14, 5, 6, 16, connection_table(connection, 6, 16)); average_pooling_layer<CNN, tan_h> S4(10, 10, 16, 2); convolutional_layer<CNN, tan_h> C5(5, 5, 5, 16, 120); fully_connected_layer<CNN, tan_h> F6(120, 10); assert(C1.param_size() == 156 && C1.connection_size() == 122304); assert(S2.param_size() == 12 && S2.connection_size() == 5880); assert(C3.param_size() == 1516 && C3.connection_size() == 151600); assert(S4.param_size() == 32 && S4.connection_size() == 2000); assert(C5.param_size() == 48120 && C5.connection_size() == 48120); nn.add(&C1); nn.add(&S2); nn.add(&C3); nn.add(&S4); nn.add(&C5); nn.add(&F6); std::cout << "load models..." << std::endl; // load MNIST dataset std::vector<label_t> train_labels, test_labels; std::vector<vec_t> train_images, test_images; parse_mnist_labels("train-labels.idx1-ubyte", &train_labels); parse_mnist_images("train-images.idx3-ubyte", &train_images); parse_mnist_labels("t10k-labels.idx1-ubyte", &test_labels); parse_mnist_images("t10k-images.idx3-ubyte", &test_images); std::cout << "start learning" << std::endl; boost::progress_display disp(train_images.size()); boost::timer t; int minibatch_size = 10; nn.optimizer().alpha *= std::sqrt(minibatch_size); // create callback auto on_enumerate_epoch = [&](){ std::cout << t.elapsed() << "s elapsed." << std::endl; tiny_cnn::result res = nn.test(test_images, test_labels); std::cout << nn.optimizer().alpha << "," << res.num_success << "/" << res.num_total << std::endl; nn.optimizer().alpha *= 0.85; // decay learning rate nn.optimizer().alpha = std::max(0.00001, nn.optimizer().alpha); disp.restart(train_images.size()); t.restart(); }; auto on_enumerate_minibatch = [&](){ disp += minibatch_size; // weight visualization in imdebug /*static int n = 0; n+=minibatch_size; if (n >= 1000) { image img; C3.weight_to_image(img); imdebug("lum b=8 w=%d h=%d %p", img.width(), img.height(), &img.data()[0]); n = 0; }*/ }; // training nn.train(train_images, train_labels, minibatch_size, 20, on_enumerate_minibatch, on_enumerate_epoch); std::cout << "end training." << std::endl; // test and show results nn.test(test_images, test_labels).print_detail(std::cout); // save networks std::ofstream ofs("LeNet-weights"); ofs << C1 << S2 << C3 << S4 << C5 << F6; }
int main(int argc, char* argv[]) { CNN net; double time_cost; //-------- CNN Initializing -------- //---------------------------------- //Read parameters file net.readPara(parameter_file); //-------- Load Dataset ------------ //---------------------------------- #ifdef _HANY_NET_WITH_LABEL_NAMES ifstream read_label(label_file); for(int c = 0; c < net.class_count; c++) { string new_label_name; read_label >> new_label_name; label_list.push_back(make_pair(c, new_label_name)); } #endif #ifdef _HANY_NET_LOAD_MNIST #ifdef _HANY_NET_PRINT_MSG cout << "Loading MNIST dataset..." << endl; time_cost = (double)getTickCount(); #endif loadMNIST("train-images.idx3-ubyte", "train-labels.idx1-ubyte", net.train_set); loadMNIST("t10k-images.idx3-ubyte", "t10k-labels.idx1-ubyte", net.test_set); #ifdef _HANY_NET_PRINT_MSG time_cost = ((double)getTickCount() - time_cost) / getTickFrequency(); cout << "Load samples done." << endl << "Time cost: " << time_cost << "s." << endl << endl; #endif #endif #ifdef _HANY_NET_TRAIN_FROM_SCRATCH #ifdef _HANY_NET_LOAD_SAMPLE_FROM_PIC #ifdef _HANY_NET_PRINT_MSG cout << "Loading samples..." << endl; time_cost = (double)getTickCount(); #endif for(int c = 0; c < net.class_count; c++) { for(int i = 0; i < sample_num; i++) { string file_name = sample_file_pre + to_string(c) + "_" + to_string(i) + ".jpg"; Mat img_read = imread(file_name, CV_LOAD_IMAGE_GRAYSCALE); if(img_read.data == NULL) { break; } Mat img_nor; resize(img_read, img_nor, Size(net.sample_width, net.sample_height)); net.train_set.push_back(make_pair(img_nor, (uchar)(c))); } } #ifdef _HANY_NET_PRINT_MSG time_cost = ((double)getTickCount() - time_cost) / getTickFrequency(); cout << "Load samples done." << endl << "Time cost: " << time_cost << "s." << endl << endl; #endif #endif #ifdef _HANY_NET_CAPTURE_FACE_FROM_CAMERA #ifdef _HANY_NET_PRINT_MSG cout << "Capturing samples..." << endl; time_cost = (double)getTickCount(); #endif VideoCapture cap_in(0); if(!cap_in.isOpened()) { cout << "Cannot access camera. Press ANY key to exit." << endl; cin.get(); exit(-1); } CascadeClassifier cascade_in; cascade_in.load(haar_file); Mat frame; int frame_count = 0; int capture_count = 0; int class_idx = 0; int class_count = 0; bool sample_suff = false; bool cap_sample = true; while(cap_in.read(frame)) { capture_count++; vector<Rect> faces; Mat frame_gray, img_gray; cvtColor(frame, frame_gray, CV_BGR2GRAY); equalizeHist(frame_gray, img_gray); cascade_in.detectMultiScale(img_gray, faces, 1.1, 2, 0, Size(120, 120)); int face_area = 0; int face_idx = 0; if(faces.size() > 0) { for(int f = 0; f < faces.size(); f++) { if(faces[f].area() > face_area) { face_area = faces[f].area(); face_idx = f; } } rectangle(frame, faces[face_idx], Scalar(255, 0, 0), 3); if(frame_count % 5 == 0 && cap_sample && !sample_suff) { Mat face, face_nor; img_gray(faces[face_idx]).copyTo(face); resize(face, face_nor, Size(net.sample_width, net.sample_height)); net.train_set.push_back(make_pair(face_nor, (uchar)class_idx)); class_count++; } } putText(frame, "Class: " + to_string(class_idx), Point(50, 100), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); putText(frame, "Sample: " + to_string(class_count), Point(50, 150), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); if(sample_suff) { putText(frame, "Enough samples. Press SPACE.", Point(50, 50), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); }else { putText(frame, "Capturing...", Point(50, 50), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); } if(!cap_sample) { putText(frame, "Wait for another person. Press SPACE.", Point(50, 200), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); } imshow(camera_window_name, frame); if(class_count >= sample_num) { sample_suff = true; } frame_count++; int key = waitKey(20); if(key == 27){ cap_in.release(); break; } else if(key == ' ') { if(cap_sample && sample_suff) { cap_sample = false; continue; } if(!cap_sample && sample_suff) { cap_sample = true; sample_suff = false; class_idx++; class_count = 0; continue; } } } #ifdef _HANY_NET_PRINT_MSG time_cost = ((double)getTickCount() - time_cost) / getTickFrequency(); cout << "Load samples done." << endl << "Time cost: " << time_cost << "s." << endl << endl; #endif #endif #endif //-------- CNN Initializing -------- //---------------------------------- #ifdef _HANY_NET_PRINT_MSG cout << "Initializing neural networks..." << endl; time_cost = (double)getTickCount(); #endif //Initialize CNN with knowledge of samples net.initCNN(); #ifdef _HANY_NET_PRINT_MSG time_cost = ((double)getTickCount() - time_cost) / getTickFrequency(); cout << "Total number of samples: " << (int)(net.train_set.size() + net.test_set.size()) << endl; cout << "Initializing neural networks done." << endl << "Time cost: " << time_cost << "s." << endl << endl; #endif //Load pre-trained CNN parameters from file and continue to train // net.uploadCNN(pretrained_cnn_file); //-------- CNN Training ---------- //-------------------------------- #ifdef _HANY_NET_TRAIN_FROM_SCRATCH #ifdef _HANY_NET_PRINT_MSG cout << "Start training CNN..." << endl; time_cost = (double)getTickCount(); #endif //Train CNN with train sample set net.trainCNN(); #ifdef _HANY_NET_PRINT_MSG time_cost = ((double)getTickCount() - time_cost) / getTickFrequency(); cout << "CNN training done." << endl << "Time cost: " << time_cost << "s." << endl << endl; #endif for(int i = 0; i < net.time_ff.size(); i++) { cout << "FeedForward stage " << i << ": " << net.time_ff[i] << "s" << endl; } for(int i = 0; i < net.time_bp.size(); i++) { cout << "BackPropagation stage " << i << ": " << net.time_bp[i] << "s" << endl; } //Draw stage loss graph Mat stage_loss_graph = Mat::zeros(600, 1100, CV_8UC3); Point2d pt1, pt2; pt1 = Point2d(50.0, 50.0); for(int stage = 0; stage < net.stage_loss.size(); stage++) { pt2 = Point2d(50.0 + 1200.0 / net.stage_loss.size() * stage, 550.0 - 500.0 * net.stage_loss[stage] / net.stage_loss[0]); line(stage_loss_graph, pt1, pt2, Scalar(255, 255, 255)); pt1 = pt2; } imshow("Stage Loss Graph", stage_loss_graph); imwrite("stage_loss_graph.jpg", stage_loss_graph); waitKey(10); #endif //-------- Save Trained Network ----- //----------------------------------- #ifdef _HANY_NET_TRAIN_FROM_SCRATCH #ifdef _HANY_NET_PRINT_MSG cout << "Dumping trained CNN parameters to file " << pretrained_cnn_file << "..." << endl; #endif //Dump trained CNN parameters to file net.downloadCNN(trained_cnn_file); #ifdef _HANY_NET_PRINT_MSG cout << "Dumping trained CNN parameters to file done." << endl << endl; #endif #endif //-------- Load Pre-trained Network ----- //--------------------------------------- #ifndef _HANY_NET_TRAIN_FROM_SCRATCH #ifdef _HANY_NET_PRINT_MSG cout << "Loading pre-trained CNN parameters from file " << pretrained_cnn_file << "..." << endl; #endif //Load pre-trained CNN parameters from file net.uploadCNN(pretrained_cnn_file); #ifdef _HANY_NET_PRINT_MSG cout << "Loading pre-trained CNN parameters from file done." << endl << endl; #endif #endif //-------- Predict New Samples------- //-------------------------------------- #ifdef _HANY_NET_PREDICT_MNIST #ifdef _HANY_NET_PRINT_MSG cout << "Predicting MNIST test dataset..." << endl; time_cost = (double)getTickCount(); #endif //Calculate correctness ratio with test samples int total_correct_count = 0; for(int sample_idx = 0; sample_idx < net.test_set.size(); sample_idx++) { vector<Mat> input_sample; input_sample.push_back(net.test_set[sample_idx].first); vector<Mat> predict_result = net.predictCNN(input_sample); if((int)predict_result[0].ptr<uchar>(0)[0] == net.test_set[sample_idx].second) { total_correct_count++; } } double total_correct_ratio = (double)total_correct_count / net.test_set.size(); #ifdef _HANY_NET_PRINT_MSG time_cost = ((double)getTickCount() - time_cost) / getTickFrequency(); cout << "MNIST testing done." << endl << "Time cost: " << time_cost << "s." << endl; cout << "Total correctness ratio: " << total_correct_ratio << endl << endl; #endif #endif #ifdef _HANY_NET_PREDICT_IMAGE_SERIES #ifdef _HANY_NET_PRINT_MSG cout << "Predicting from image series..." << endl; #endif // VideoWriter wri(output_video_file, CV_FOURCC('M', 'J', 'P', 'G'), 25.0, Size(640, 480)); for(int c = 0; c < net.class_count; c++) { for(int i = 0; i < sample_num; i++) { string file_name = sample_file_pre + to_string(c) + "_" + to_string(i) + ".jpg"; Mat img_read = imread(file_name, CV_LOAD_IMAGE_GRAYSCALE); if(img_read.data == NULL) { break; } Mat img_nor, img_show; resize(img_read, img_show, Size(400, 400)); resize(img_read, img_nor, Size(net.sample_width, net.sample_height)); vector<Mat> input_sample; input_sample.push_back(img_nor); vector<Mat> predict_result = net.predictCNN(input_sample); int pred_rst = (int)predict_result[0].ptr<uchar>(0)[0]; if(pred_rst <= net.class_count) putText(img_show, label_list[pred_rst].second, Point(10, 40), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); putText(img_show, to_string(c)+"-"+to_string(i), Point(img_show.cols-80, 40), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); int frame_count = 25; while(--frame_count) { // wri.write(img_show); } imshow(camera_window_name, img_show); int key_get = waitKey(20); switch(key_get) { case 27: // wri.release(); return 0; default: break; } } } #endif #ifdef _HANY_NET_PREDICT_VEDIO_SERIES #ifdef _HANY_NET_PRINT_MSG cout << "Predicting from video series..." << endl; #endif VideoWriter wri(output_video_file, CV_FOURCC('M', 'J', 'P', 'G'), 25.0, Size(640, 480)); namedWindow(camera_window_name); CascadeClassifier cascade_out; cascade_out.load(haar_file); for(int c = 1; c <= net.class_count; c++) { string file_name = "path_to_face_videos\\" + to_string(c) + ".wmv"; VideoCapture cap(file_name); if(!cap.isOpened()) continue; Mat img_read; while(cap.read(img_read)) { Mat img_gray, nor_gray, img_show; img_read.copyTo(img_show); cvtColor(img_read, img_gray, CV_BGR2GRAY); vector<Rect> faces; equalizeHist(img_gray, img_gray); cascade_out.detectMultiScale(img_gray, faces, 1.1, 2, 0, Size(120, 120)); for(int f = 0; f < faces.size(); f++) { rectangle(img_show, faces[f], Scalar(0, 255, 255), 3); resize(img_gray(faces[f]), nor_gray, Size(net.sample_width, net.sample_height)); vector<Mat> input_sample; input_sample.push_back(nor_gray); vector<Mat> predict_result = net.predictCNN(input_sample); int pred_rst = (int)predict_result[0].ptr<uchar>(0)[0]; if(pred_rst <= net.class_count) putText(img_show, to_string(pred_rst), Point(faces[f].x+faces[f].width, faces[f].y+faces[f].height), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); } int frame_count = 2; while(--frame_count) { wri.write(img_show); } imshow(camera_window_name, img_show); int key_get = waitKey(20); switch(key_get) { case 27: wri.release(); return 0; default: break; } } } wri.release(); #endif #ifdef _HANY_NET_PREDICT_CAMERA #ifdef _HANY_NET_PRINT_MSG cout << "Predicting from camera..." << endl; #endif VideoCapture cap_out(0); if(!cap_out.isOpened()) { cout << "Cannot access camera." << endl; cin.get(); exit(-1); } CascadeClassifier cascade_out; cascade_out.load(haar_file); // VideoWriter wri(output_video_file, CV_FOURCC('M', 'J', 'P', 'G'), 25.0, Size(640, 480)); Mat src_frame; namedWindow(camera_window_name); Mat img_read; while(cap_out.read(img_read)) { Mat img_gray, nor_gray, img_show; img_read.copyTo(img_show); cvtColor(img_read, img_gray, CV_BGR2GRAY); vector<Rect> faces; equalizeHist(img_gray, img_gray); cascade_out.detectMultiScale(img_gray, faces, 1.1, 2, 0, Size(120, 120)); for(int f = 0; f < faces.size(); f++) { rectangle(img_show, faces[f], Scalar(0, 255, 255), 3); resize(img_gray(faces[f]), nor_gray, Size(net.sample_width, net.sample_height)); vector<Mat> input_sample; input_sample.push_back(nor_gray); vector<Mat> predict_result = net.predictCNN(input_sample); int pred_rst = (int)predict_result[0].ptr<uchar>(0)[0]; if(pred_rst <= net.class_count) putText(img_show, label_list[pred_rst].second, Point(faces[f].x+faces[f].width, faces[f].y+faces[f].height), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 255), 2); } int frame_count = 2; while(--frame_count) { // wri.write(img_show); } imshow(camera_window_name, img_show); int key_get = waitKey(20); if(key_get == 27) { // wri.release(); cap_out.release(); return 0; } } #endif cout << "Press any key to quit..." << endl; // waitKey(0); cin.get(); return 0; }