SDoublePlane non_maximum_suppress(const SDoublePlane &input, double threshold, int w, int h) { //double threshold = 0.84 * 255; SDoublePlane output(input.rows(), input.cols()); for (int i = 0; i < input.rows(); i++) { for (int j = 0; j < input.cols(); j++) { if (input[i][j] > threshold && is_max_in_neighbour(input, i, j, w, h)) { if (input[i][j] > 0.85 * 255) { output[i][j] = 255; } else { output[i][j] = (input[i][j] - threshold)/(0.85 - threshold/255); } } else { output[i][j] = 0; } } } return output; }
HammingDistances find_hamming_distance(SDoublePlane &img_input, SDoublePlane &img_template){ int input_rows = img_input.rows(); int input_cols = img_input.cols(); int template_rows = img_template.rows(); int template_cols = img_template.cols(); HammingDistances hm; double sum = 0.0; double max_hamming = 0.0; SDoublePlane output(input_rows, input_cols); for(int i =0;i<input_rows - template_rows ; ++i){ for(int j =0;j<input_cols - template_cols; ++j){ sum=0.0; for(int k =0;k<template_rows;++k){ for(int l=0;l<template_cols;++l){ sum = sum + (img_input[i+k][j+l] * img_template[k][l] + (1 - img_input[i+k][j+l])*(1 - img_template[k][l])); } } output[i][j] = sum; if(sum>max_hamming) max_hamming = sum; } } //printImg2File("input_img_file_Output.txt", img_input); //printImg2File("img2fileOutput.txt", output); hm.hamming_matrix = output; hm.max_hamming_distance= max_hamming; return hm; }
// Apply an edge detector to an image, returns the binary edge map // Pass thresh=0 to ignore binary map, else pass thresh [1-255] // white_value only applies when thresh!=0, pass 1 for 0-1 image, or 255 for 0-255 binary image // Returns edge_map and gradient_angle pair<SDoublePlane, SDoublePlane> find_edges(const SDoublePlane &input, double thresh=0, double white_value=1) { SDoublePlane G(input.rows(), input.cols()); SDoublePlane Rotation(input.rows(), input.cols()); SDoublePlane Gx, Gy; Gx = sobel_gradient_filter(input, true); Gy = sobel_gradient_filter(input, false); for (int i = 0; i < input.rows(); ++i) { for (int j = 0; j < input.cols(); ++j) { G[i][j] = sqrt(Gx[i][j]*Gx[i][j]+Gy[i][j]*Gy[i][j]); if (G[i][j] > 255) G[i][j] = 255; if ( abs(Gx[i][j]) < 0.0001) Rotation[i][j] = PI / 2.0; else Rotation[i][j] = atan(Gy[i][j] / Gx[i][j]); } } if (abs(thresh) > 0.0001) { for (int i = 0; i < G.rows(); ++i) for (int j = 0; j < G.cols(); ++j) G[i][j] = (G[i][j]>thresh?white_value:0); } return make_pair(G, Rotation); }
int get_notes_possitions(const SDoublePlane &input, const SDoublePlane &tmpl, double threshold, SDoublePlane &output, Type t, vector<DetectedSymbol> &symbols) { // non-maximum suppress size int sup_w, sup_h; //shawn calc hamming distance // get template image //SDoublePlane template_note = SImageIO::read_png_file("template1.png"); // get distance SDoublePlane hammdis_note = get_Hamming_distance(input, tmpl); if (t == NOTEHEAD) { write_image("scores4.png", hammdis_note); } //write_image("hamming_dist_note.png", hammdis_note); // print_image_value(hammdis_note); // cout << plane_max(hammdis_note) / 255 << endl; // non-maximum suppress SDoublePlane sup_note = non_maximum_suppress(hammdis_note, threshold*255, tmpl.cols()*0.5, tmpl.rows()-(int)(tmpl.rows()*0.4)); //write_image("sup_hamming_dist_note.png", sup_note); get_symbols(sup_note, symbols, t, tmpl.cols(), tmpl.rows()); return 0; }
SDoublePlane calculate_F(SDoublePlane D, SDoublePlane &binary_template, double threshold) { double sum = 0.0; int m = binary_template.rows(), n = binary_template.cols(); int i, j, k, l, input_rows = D.rows(), input_cols = D.cols(); SDoublePlane output(input_rows, input_cols); for(i=0; i<input_rows - m; i++){ for(j=0; j<input_cols - n; j++){ sum=0.0; for(k =0; k<m; k++){ for(l=0; l<n; l++){ sum += binary_template[k][l] * D[i+k][j+l]; } } output[i][j] = sum; } } //converting low scores to white n others black for(i=0; i<input_rows - m; i++) for(j=0; j<input_cols - n; j++) if(output[i][j] < threshold){ output[i][j] = 255; } else output[i][j] = 0; return output; }
SDoublePlane mrf_stereo(const SDoublePlane &left_image, const SDoublePlane &right_image) { // implement this in step 4... // this placeholder just returns a random disparity map SDoublePlane result(left_image.rows(), left_image.cols()); //INITIALIZE THE MESSAGE TABLE vector<vector<MsgPiece> > msgTable = init_table(left_image,right_image); //LOOP BP for(int i = 0;i<LOOP;++i){ bp_update(msgTable); } //printf("2\n"); //MAP for(int i = 0;i<left_image.rows();++i) for(int j = 0;j<left_image.cols();++j) { double best= THRES; for(int ll = 0;ll<LABEL_NUM;++ll){ double cost = msgTable[i][j].msg[0][ll]; cost += msgTable[i][j].msg[1][ll]; cost += msgTable[i][j].msg[2][ll]; cost += msgTable[i][j].msg[3][ll]; cost += msgTable[i][j].msg[4][ll]; if(cost < best) { best = cost; result[i][j] = ll; } } } // printf("3\n"); for(int i = 0;i<left_image.rows();++i) for(int j = 0;j<left_image.cols();++j) result[i][j] = (result[i][j]*256)/LABEL_NUM; return result; }
SDoublePlane scale_image(const SDoublePlane &input, double ratio) { int m = input.rows(); int n = input.cols(); int m2 = input.rows()*ratio; int n2 = input.cols()*ratio; SDoublePlane output(m2, n2); if (ratio > 0.5) { for (int i = 0; i < m2; i++) { int sk = i/ratio; int ek = (i + 1)/ratio - 0.00001; for (int j = 0; j < n2; j++) { int sl = j/ratio; int el = (j + 1)/ratio - 0.00001; output[i][j] = input[sk][sl]; output[i][j] += input[sk][el]; output[i][j] += input[ek][sl]; output[i][j] += input[ek][el]; output[i][j] /= 4.0; } } } else { int span = 1.0/ratio + 0.5; for (int i = 0; i < m2; i++) { int sk = i/ratio; int ek = (i - 1)/ratio - 0.00001; for (int j = 0; j < n2; j++) { int sl = j/ratio; int el = (j - 1)/ratio - 0.00001; output[i][j] = 0; for (int u = sk; u <= ek; u++) { for (int v = sl; v <= el; v++) { output[i][j] += input[u][v]; } } output[i][j] /= (ek - sk + 1)*(el - sl + 1); } } } return output; }
SDoublePlane flipxy(SDoublePlane input) { for (int i=0; i<input.rows()/2; i++) { for (int j=0; j<input.cols(); j++) { //std::cout<<"Flipping: "<<i<<" "<<j<<std::endl; double temp = input[input.rows() - i - 1][input.cols() - j - 1]; input[input.rows() - i - 1][input.cols() - j - 1] = input[i][j]; input[i][j] = temp; } } return input; }
SDoublePlane normalise_kernel(const SDoublePlane& input) { std::cout<<"Normalising kernel: "<<input.rows()<<","<<input.cols()<<std::endl; debug_png("before-normalise-kernel.png", input); if (input.cols()%2 == 1 && input.rows()%2 == 1) { return input; } SDoublePlane output(input.rows()|1, input.cols()|1); for (int i=0; i<input.rows(); i++) { memcpy(output[i], input[i], sizeof(output[i][0]) * input.cols()); } if (input.rows() + 1 == output.rows()) { memcpy(output[output.rows()-1], input[input.rows()-1], sizeof(output[0][0]) * input.cols()); } if (input.cols() + 1 == output.cols()) { for (int i=0; i<input.rows(); i++) { output[i][output.cols()-1] = input[i][input.cols()-1]; } if (input.rows() + 1 == output.rows()) { output[output.rows() - 1][output.cols() - 1] = input[input.rows()-1][input.cols()-1]; } } debug_png("after-normalise-kernel.png", output); return output; }
// Find symbols in the given image for the given template void find_symbols(HammingDistances hm, SDoublePlane input, SDoublePlane img_template, vector <DetectedSymbol> &symbols, Type symbol_type){ int template_rows = img_template.rows(); int template_cols = img_template.cols(); double max_hamming_distance = hm.max_hamming_distance; SDoublePlane matrix = hm.hamming_matrix; vector<LineLocation> allLinesLocVector; double confidence_threshold; if (symbol_type == NOTEHEAD){ confidence_threshold = 0.9; allLinesLocVector = find_line_location(input); } else confidence_threshold = 0.95; // Finding symbols for(int i =0;i<matrix.rows();i++){ for(int j=0;j<matrix.cols();j++){ double value = matrix [i][j]; if( value >= confidence_threshold * max_hamming_distance) { DetectedSymbol s; s.row = i; s.col = j; s.width = template_cols; s.height = template_rows; s.type = symbol_type; s.confidence = value; s.pitch = ' '; if(symbol_type == NOTEHEAD) set_symbol_marker(s, allLinesLocVector); else s.pitch = ' '; symbols.push_back(s); // Marking the pixels of the template so that they are not detected again for (int x=i; x < i+s.height; x++){ for (int y=j; y < j+s.width; y++){ matrix[x][y] = -100; } } } } } }
// compare two image values // returns true if they are similar, false otherwise bool compare_image_value(const SDoublePlane &image1, const SDoublePlane &image2) { if (image1.rows() != image2.rows() || image1.cols() != image2.cols()) return false; for (int i = 0; i < image1.rows(); ++i) { for (int j = 0; j < image1.cols(); ++j) { if (image1[i][j] != image2[i][j]) return false; } } return true; }
// Convolve an image with a general convolution kernel // SDoublePlane convolve_general(const SDoublePlane &input, const SDoublePlane &filter) { //Requires the dimension of the filter to be smaller than the input. //Switch the parameters otherwise. if ((input.rows() - filter.rows()) * (input.cols() - filter.cols()) < 0) { throw "Mismatched dimensions."; } if (input.rows() < filter.rows()) { return convolve_general(filter, input); } if (filter.rows()%2 == 0 || filter.cols()%2 == 0){ throw "Expected an odd number of rows and columns in the filter"; } //From here, we have ensured dimensions of input is larger than the filter. SDoublePlane output(input.rows(), input.cols()); int filter_rows_num = filter.rows(), filter_cols_num = filter.cols(), image_rows_num = input.rows(), image_cols_num = input.cols(); int start_row = filter.rows()/2, start_col = filter.cols()/2, end_row = input.rows() - start_row, end_col = input.cols() - start_col; for (int i = 0; i < image_rows_num; i++) { for (int j = 0; j < image_cols_num; j++) { int sum = 0; for (int p = -start_row; p <= start_row; p++) { for (int q = -start_col; q <= start_col; q++) { int x = i + p, y = j+q; if (0 > x || x >= image_rows_num || 0 > y || y >= image_cols_num) { if (y < 0) { y = -y; } else if (y >= image_cols_num) { y = 2*image_cols_num - y - 1; } if (x < 0) { x = -x; } else if (x >= image_rows_num) { x = 2*image_rows_num - x - 1; } } sum += input[x][y] * filter[filter_rows_num - p - start_row - 1] [filter_cols_num - q - start_col - 1]; } } output[i][j] = sum; } } return output; }
// Finding Line Location for Q4. As this was a open ended question, we found the number of lines using this approach. vector<LineLocation> find_line_location(SDoublePlane &input){ int rows = input.rows(); int cols = input.cols(); vector<LineLocation> allLinesLocVector; double sum; int lineCounter = 1; int j; for(int i=0;i<rows;i++) { sum =0; for(j=cols - 45;j<cols -40;j++){ sum += input[i][j]; } if(sum/5 < 130){ LineLocation lineLoc; lineLoc.row = i; lineLoc.col = j; set_line_tags(lineLoc, lineCounter); lineCounter++; allLinesLocVector.push_back(lineLoc); } } return allLinesLocVector; }
// Convolve an image with a separable convolution kernel // SDoublePlane convolve_separable(const SDoublePlane &input, const SDoublePlane &row_filter, const SDoublePlane &col_filter) { SDoublePlane output(input.rows(), input.cols()); output = convolve_general(input, row_filter); return convolve_general(output, col_filter); }
SDoublePlane compute_pairwise_cost(SDoublePlane disp, SDoublePlane &labels, int i, int j) { int sum = 0; int min_diff = INT_MAX; int min_label = DLIMIT; SDoublePlane result(disp.rows(), disp.cols()); // for (int i = 1; i < disp.rows() - 1; i++) { // for (int j = 1; j < disp.cols() - 1; j++) { min_diff = INT_MAX; for (int d = 0; d < DLIMIT; d++) { sum = 0; sum += pow(d - disp[i][j - 1], 2); sum += pow(d - disp[i][j + 1], 2); sum += pow(d - disp[i + 1][j], 2); sum += pow(d - disp[i - 1][j], 2); if (sum < min_diff) { min_diff = sum; min_label = d; } } result[i][j] = min_diff; disp[i][j] = min_label; //cout<<min_diff<<endl; // } // } return result; }
SDoublePlane calculate_D(SDoublePlane &binary_image_blur_sobel) { int i, j, input_rows = binary_image_blur_sobel.rows(), input_cols = binary_image_blur_sobel.cols(); SDoublePlane D(input_rows, input_cols); //initialize edges as 0 and others as infinity(10000) in binary_image_blur_sobel for(i=0;i<input_rows;i++) for(j=0;j<input_cols;j++) if(binary_image_blur_sobel[i][j] == 1) D[i][j] = 0; else D[i][j] = 10000; //distance below and to the right of edges for(i=1;i<input_rows;i++) for(j=1;j<input_cols;j++) D[i][j] = dmin(D[i][j], D[i][j-1]+1, D[i-1][j]+1); //distance above and to the left of edges for(i=input_rows-2; i>=0; i--) for(j=input_cols-2; j>=0 ;j--) D[i][j] = dmin(D[i][j], D[i+1][j]+1, D[i][j+1]+1); //bottom row i = input_rows-1; for(j=input_cols-2; j>=0 ;j--) D[i][j] = dmin(D[i][j], D[i-1][j]+1, D[i][j+1]+1); //rightmost column j = input_cols-1; for(i=input_rows-2; i>=0; i--) D[i][j] = dmin(D[i][j], D[i+1][j]+1, D[i][j-1]+1); return D; }
// Apply a sobel operator to an image, returns the result // _gx=true for horizontal gradient, false for vertical gradient SDoublePlane sobel_gradient_filter(const SDoublePlane &input, bool _gx) { SDoublePlane output(input.rows(), input.cols()); SDoublePlane row_filter(1, 3), col_filter(3, 1); if (_gx) { row_filter[0][0] = -1.0; row_filter[0][1] = 0.0; row_filter[0][2] = 1.0; col_filter[0][0] = 1.0/8.0; col_filter[1][0] = 2.0/8.0; col_filter[2][0] = 1.0/8.0; } else { row_filter[0][0] = 1.0/8.0; row_filter[0][1] = 2.0/8.0; row_filter[0][2] = 1.0/8.0; col_filter[0][0] = 1.0; col_filter[1][0] = 0.0; col_filter[2][0] = -1.0; } SDoublePlane sobel = convolve_separable(input, row_filter, col_filter); return sobel; }
void write_staves_image(const string &filename, const SDoublePlane &img, vector<Line> linesVector){ SDoublePlane output_planes[3]; for (int i = 0; i < 3; i++) output_planes[i] = img; int r = img.rows(); int c = img.cols(); int x1, y1, x2, y2; for (int i = 0; i < linesVector.size(); i++) { x1 = linesVector[i].x1; y1 = linesVector[i].y1; x2 = linesVector[i].x2; y2 = linesVector[i].y2; //cout<<"(x1,y1): ("<<x1<<","<<y1<<"), (x2,y2):("<<x2<<","<<y2<<")\n"; overlay_rectangle(output_planes[2], y1, x1, y2, x2, 255, 2); overlay_rectangle(output_planes[0], y1, x1, y2, x2, 0, 2); overlay_rectangle(output_planes[1], y1, x1, y2, x2, 0, 2); } SImageIO::write_png_file(filename.c_str(), output_planes[0], output_planes[1], output_planes[2]); }
// Apply an edge detector to an image, returns the binary edge map SDoublePlane find_edges(const SDoublePlane &input, double thresh = 0) { SDoublePlane output(input.rows(), input.cols()); // Implement an edge detector of your choice, e.g. // use your sobel gradient operator to compute the gradient magnitude and threshold return output; }
// Get Hamming distance map SDoublePlane get_Hamming_distance(const SDoublePlane &input, const SDoublePlane &target) { SDoublePlane output(input.rows(), input.cols()); // change to convolution function later for (int i = 0; i < input.rows(); i++) { for (int j = 0; j < input.cols(); j++) { double sum = 0; for (int u = 0; u < target.rows(); u++) { for (int v = 0; v < target.cols(); v++) { int k = i + u, l = j + v; if (k >= input.rows()) { k = input.rows() - 1 - (k - input.rows() + 1); } if (l >= input.cols()) { l = input.cols() - 1 - (l - input.cols() + 1); } double a = input[k][l] / 255; double b = target[u][v] / 255; sum += a * b; sum += (1 - a) * (1 - b); } } output[i][j] = sum / (target.rows() * target.cols()) * 255; } } return output; }
SDoublePlane direct_sobel(const SDoublePlane &input) { SDoublePlane output1(input.rows(), input.cols()); SDoublePlane output2(input.rows(), input.cols()); SDoublePlane output(input.rows(), input.cols()); // Implement a sobel gradient estimation filter with 1-d filters SDoublePlane sobelHorFilter(3, 3); SDoublePlane sobelVerFilter(3, 3); //initialize sobelHorFilter[0][0] = -1; sobelHorFilter[0][1] = 0; sobelHorFilter[0][2] = 1; sobelHorFilter[1][0] = -2; sobelHorFilter[1][1] = 0; sobelHorFilter[1][2] = 2; sobelHorFilter[2][0] = -1; sobelHorFilter[2][1] = 0; sobelHorFilter[2][2] = 1; sobelVerFilter[0][0] = -1; sobelVerFilter[0][1] = -2; sobelVerFilter[0][2] = -1; sobelVerFilter[1][0] = 0; sobelVerFilter[1][1] = 0; sobelVerFilter[1][2] = 0; sobelVerFilter[2][0] = 1; sobelVerFilter[2][1] = 2; sobelVerFilter[2][2] = 1; output1 = convolve_general(input, sobelHorFilter); output2 = convolve_general(input, sobelVerFilter); for (int i = 0; i < input.rows(); i++) for (int j = 0; j < input.cols(); j++) { output[i][j] = sqrt(output1[i][j]*output1[i][j] + output2[i][j]*output2[i][j]); if(output[i][j] > 200) output[i][j] = 255; else output[i][j] = 0; } return output; }
int main(int argc, char *argv[]) { if(argc != 4 && argc != 3) { cerr << "usage: " << argv[0] << " image_file1 image_file2 [gt_file]" << endl; return 1; } string input_filename1 = argv[1], input_filename2 = argv[2]; string gt_filename; if(argc == 4) gt_filename = argv[3]; // read in images and gt SDoublePlane image1 = SImageIO::read_png_file(input_filename1.c_str()); SDoublePlane image2 = SImageIO::read_png_file(input_filename2.c_str()); SDoublePlane gt; if(gt_filename != "") { gt = SImageIO::read_png_file(gt_filename.c_str()); // gt maps are scaled by a factor of 3, undo this... for(int i=0; i<gt.rows(); i++) for(int j=0; j<gt.cols(); j++) gt[i][j] = gt[i][j] / 3.0; } // do stereo using mrf SDoublePlane disp3 = mrf_stereo(image1, image2); SImageIO::write_png_file("disp_mrf.png", disp3, disp3, disp3); // Measure error with respect to ground truth, if we have it... if(gt_filename != "") { double err=0; for(int i=0; i<gt.rows(); i++) for(int j=0; j<gt.cols(); j++) err += sqrt((disp3[i][j] - gt[i][j])*(disp3[i][j] - gt[i][j])); cout << "MRF stereo technique mean error = " << err/gt.rows()/gt.cols() << endl; } return 0; }
SDoublePlane normalize_votes(const SDoublePlane &acc) { SDoublePlane normalized(acc.rows(),acc.cols()); double min = find_min_vote(acc); double max = find_max_vote(acc); for(int i=0;i<acc.rows();i++){ normalized[i][0] = (acc[i][0] - min)/(max-min); } return normalized; }
vector<vector<MsgPiece> > init_table(const SDoublePlane &left_img, const SDoublePlane &right_img){ //INITIALIZE THE MESSAGE TABLE //CALCULATE THE DISTANCE MsgPiece tmp_; for(int i = 0; i< 5;++i) for(int j = 0; j<LABEL_NUM;++j) tmp_.msg[i][j] = 0.0000; vector<vector<MsgPiece> > table(left_img.rows(),vector<MsgPiece>(left_img.cols(),tmp_)); for(int i = WIN; i<left_img.rows()-WIN;i++) for(int j = WIN; j< left_img.cols()-WIN;j++){ for(int k = 0;k<LABEL_NUM;++k){ if(j-k >= 0){ table[i][j].msg[0][k] = sum_squared(left_img,right_img,j,i,k); } } } return table; }
// Draws a rectangle on an image plane, using the specified gray level value and line width. // void overlay_rectangle(SDoublePlane &input, int _top, int _left, int _bottom, int _right, double graylevel, int width) { for(int w=-width/2; w<=width/2; w++) { int top = _top+w, left = _left+w, right=_right+w, bottom=_bottom+w; // if any of the coordinates are out-of-bounds, truncate them top = min( max( top, 0 ), input.rows()-1); bottom = min( max( bottom, 0 ), input.rows()-1); left = min( max( left, 0 ), input.cols()-1); right = min( max( right, 0 ), input.cols()-1); // draw top and bottom lines for(int j=left; j<=right; j++) input[top][j] = input[bottom][j] = graylevel; // draw left and right lines for(int i=top; i<=bottom; i++) input[i][left] = input[i][right] = graylevel; } }
SDoublePlane disparity_map(const SDoublePlane &input1, const SDoublePlane &input2) { int temp=0; int sum=0; int min=30000; SDoublePlane dup(input1.rows(),input1.cols()); SDoublePlane result(input1.rows(), input1.cols()); int count=0; for(int i=0;i<input1.rows();i++) { min=30000; for(int j=0;j<input1.cols();j++) { min=30000; sum=0; for(int d=0;d<50;d++) { sum=0; for(int k=i-1;k<i+2;k++) { for(int l=j-1;l<j+2;l++) { if(k>=0 && k<input1.rows() && l+d>=0 && l+d<input1.cols()) { sum+=pow((input1[k][l]-input2[k][l+d]),2); } } } //Finding value of d corresponding to minimum "sum" if(sum<min) { min=sum; dup[i][j]=abs(input1[i][j]-input2[i][j+d]); result[i][j]=d; } } } } return result; }
//draw lines on the image after hough transform SDoublePlane get_lines(const SDoublePlane &acc,const SDoublePlane &input,int rgb) { SDoublePlane lines = input; if(rgb==1){ for(int i=0;i<acc.rows();i++){ if(acc[i][0] == 255){ for(int j=0;j<input.cols();j++){ lines[i][j]=255; } } }} else{ for(int i=0;i<acc.rows();i++){ if(acc[i][0] == 255){ for(int j=0;j<input.cols();j++){ lines[i][j]=0; } } }} return lines; }
SDoublePlane complement_image(const SDoublePlane &input) { SDoublePlane output(input); for (int i = 0; i < input.rows(); ++i) { for (int j = 0; j < input.cols(); ++j) { output[i][j] = 255 - output[i][j]; } } return output; }
void print_image_value(const SDoublePlane &input) { for (int i = 0; i < input.rows(); ++i) { for (int j = 0; j < input.cols(); ++j) { cout << input[i][j] << " "; } cout << endl; } }
// Print an image to a file void printImg2File(string filename, SDoublePlane img) { ofstream outFile; outFile.open(filename.c_str()); for (int i = 0; i < img.rows(); i++) { for (int j = 0; j < img.cols(); j++) { outFile << img[i][j] << ","; } outFile << "\n"; } outFile.close(); }