int get_notes_possitions(const SDoublePlane &input, const SDoublePlane &tmpl, double threshold, SDoublePlane &output, Type t, vector<DetectedSymbol> &symbols) { // non-maximum suppress size int sup_w, sup_h; //shawn calc hamming distance // get template image //SDoublePlane template_note = SImageIO::read_png_file("template1.png"); // get distance SDoublePlane hammdis_note = get_Hamming_distance(input, tmpl); if (t == NOTEHEAD) { write_image("scores4.png", hammdis_note); } //write_image("hamming_dist_note.png", hammdis_note); // print_image_value(hammdis_note); // cout << plane_max(hammdis_note) / 255 << endl; // non-maximum suppress SDoublePlane sup_note = non_maximum_suppress(hammdis_note, threshold*255, tmpl.cols()*0.5, tmpl.rows()-(int)(tmpl.rows()*0.4)); //write_image("sup_hamming_dist_note.png", sup_note); get_symbols(sup_note, symbols, t, tmpl.cols(), tmpl.rows()); return 0; }
void set_disparity(int start, int end, vector<SDoublePlane> &D, SDoublePlane &V, vector<vector<SDoublePlane> > &m) { //disp_arg *A = (disp_arg*)obj; double disp_score; double min_score = INT_MAX; int label, t=1; double neighbors_sum; start = (start < 1)? 1:start; end = (end < (V.rows()-1))?end:(V.rows()-1); for (int i = start; i < end-1; i++) { for (int j = 1; j < D[0].cols() - 1; j++) { min_score = INT_MAX; label = DLIMIT; for (int d = 0; d < DLIMIT; d++) { //cout << i<<","<<j<<endl; neighbors_sum = get_messages_from_neighbors(m[d][t], i, j, ALL); disp_score = D[d][i][j] + neighbors_sum; if (disp_score < min_score) { min_score = disp_score; label = d; } } V[i][j] = label; } } }
// Finding Line Location for Q4. As this was a open ended question, we found the number of lines using this approach. vector<LineLocation> find_line_location(SDoublePlane &input){ int rows = input.rows(); int cols = input.cols(); vector<LineLocation> allLinesLocVector; double sum; int lineCounter = 1; int j; for(int i=0;i<rows;i++) { sum =0; for(j=cols - 45;j<cols -40;j++){ sum += input[i][j]; } if(sum/5 < 130){ LineLocation lineLoc; lineLoc.row = i; lineLoc.col = j; set_line_tags(lineLoc, lineCounter); lineCounter++; allLinesLocVector.push_back(lineLoc); } } return allLinesLocVector; }
// Convolve an image with a separable convolution kernel // SDoublePlane convolve_separable(const SDoublePlane &input, const SDoublePlane &row_filter, const SDoublePlane &col_filter) { SDoublePlane output(input.rows(), input.cols()); output = convolve_general(input, row_filter); return convolve_general(output, col_filter); }
void write_staves_image(const string &filename, const SDoublePlane &img, vector<Line> linesVector){ SDoublePlane output_planes[3]; for (int i = 0; i < 3; i++) output_planes[i] = img; int r = img.rows(); int c = img.cols(); int x1, y1, x2, y2; for (int i = 0; i < linesVector.size(); i++) { x1 = linesVector[i].x1; y1 = linesVector[i].y1; x2 = linesVector[i].x2; y2 = linesVector[i].y2; //cout<<"(x1,y1): ("<<x1<<","<<y1<<"), (x2,y2):("<<x2<<","<<y2<<")\n"; overlay_rectangle(output_planes[2], y1, x1, y2, x2, 255, 2); overlay_rectangle(output_planes[0], y1, x1, y2, x2, 0, 2); overlay_rectangle(output_planes[1], y1, x1, y2, x2, 0, 2); } SImageIO::write_png_file(filename.c_str(), output_planes[0], output_planes[1], output_planes[2]); }
// Apply a sobel operator to an image, returns the result // _gx=true for horizontal gradient, false for vertical gradient SDoublePlane sobel_gradient_filter(const SDoublePlane &input, bool _gx) { SDoublePlane output(input.rows(), input.cols()); SDoublePlane row_filter(1, 3), col_filter(3, 1); if (_gx) { row_filter[0][0] = -1.0; row_filter[0][1] = 0.0; row_filter[0][2] = 1.0; col_filter[0][0] = 1.0/8.0; col_filter[1][0] = 2.0/8.0; col_filter[2][0] = 1.0/8.0; } else { row_filter[0][0] = 1.0/8.0; row_filter[0][1] = 2.0/8.0; row_filter[0][2] = 1.0/8.0; col_filter[0][0] = 1.0; col_filter[1][0] = 0.0; col_filter[2][0] = -1.0; } SDoublePlane sobel = convolve_separable(input, row_filter, col_filter); return sobel; }
SDoublePlane non_maximum_suppress(const SDoublePlane &input, double threshold, int w, int h) { //double threshold = 0.84 * 255; SDoublePlane output(input.rows(), input.cols()); for (int i = 0; i < input.rows(); i++) { for (int j = 0; j < input.cols(); j++) { if (input[i][j] > threshold && is_max_in_neighbour(input, i, j, w, h)) { if (input[i][j] > 0.85 * 255) { output[i][j] = 255; } else { output[i][j] = (input[i][j] - threshold)/(0.85 - threshold/255); } } else { output[i][j] = 0; } } } return output; }
SDoublePlane calculate_F(SDoublePlane D, SDoublePlane &binary_template, double threshold) { double sum = 0.0; int m = binary_template.rows(), n = binary_template.cols(); int i, j, k, l, input_rows = D.rows(), input_cols = D.cols(); SDoublePlane output(input_rows, input_cols); for(i=0; i<input_rows - m; i++){ for(j=0; j<input_cols - n; j++){ sum=0.0; for(k =0; k<m; k++){ for(l=0; l<n; l++){ sum += binary_template[k][l] * D[i+k][j+l]; } } output[i][j] = sum; } } //converting low scores to white n others black for(i=0; i<input_rows - m; i++) for(j=0; j<input_cols - n; j++) if(output[i][j] < threshold){ output[i][j] = 255; } else output[i][j] = 0; return output; }
SDoublePlane compute_pairwise_cost(SDoublePlane disp, SDoublePlane &labels, int i, int j) { int sum = 0; int min_diff = INT_MAX; int min_label = DLIMIT; SDoublePlane result(disp.rows(), disp.cols()); // for (int i = 1; i < disp.rows() - 1; i++) { // for (int j = 1; j < disp.cols() - 1; j++) { min_diff = INT_MAX; for (int d = 0; d < DLIMIT; d++) { sum = 0; sum += pow(d - disp[i][j - 1], 2); sum += pow(d - disp[i][j + 1], 2); sum += pow(d - disp[i + 1][j], 2); sum += pow(d - disp[i - 1][j], 2); if (sum < min_diff) { min_diff = sum; min_label = d; } } result[i][j] = min_diff; disp[i][j] = min_label; //cout<<min_diff<<endl; // } // } return result; }
HammingDistances find_hamming_distance(SDoublePlane &img_input, SDoublePlane &img_template){ int input_rows = img_input.rows(); int input_cols = img_input.cols(); int template_rows = img_template.rows(); int template_cols = img_template.cols(); HammingDistances hm; double sum = 0.0; double max_hamming = 0.0; SDoublePlane output(input_rows, input_cols); for(int i =0;i<input_rows - template_rows ; ++i){ for(int j =0;j<input_cols - template_cols; ++j){ sum=0.0; for(int k =0;k<template_rows;++k){ for(int l=0;l<template_cols;++l){ sum = sum + (img_input[i+k][j+l] * img_template[k][l] + (1 - img_input[i+k][j+l])*(1 - img_template[k][l])); } } output[i][j] = sum; if(sum>max_hamming) max_hamming = sum; } } //printImg2File("input_img_file_Output.txt", img_input); //printImg2File("img2fileOutput.txt", output); hm.hamming_matrix = output; hm.max_hamming_distance= max_hamming; return hm; }
SDoublePlane calculate_D(SDoublePlane &binary_image_blur_sobel) { int i, j, input_rows = binary_image_blur_sobel.rows(), input_cols = binary_image_blur_sobel.cols(); SDoublePlane D(input_rows, input_cols); //initialize edges as 0 and others as infinity(10000) in binary_image_blur_sobel for(i=0;i<input_rows;i++) for(j=0;j<input_cols;j++) if(binary_image_blur_sobel[i][j] == 1) D[i][j] = 0; else D[i][j] = 10000; //distance below and to the right of edges for(i=1;i<input_rows;i++) for(j=1;j<input_cols;j++) D[i][j] = dmin(D[i][j], D[i][j-1]+1, D[i-1][j]+1); //distance above and to the left of edges for(i=input_rows-2; i>=0; i--) for(j=input_cols-2; j>=0 ;j--) D[i][j] = dmin(D[i][j], D[i+1][j]+1, D[i][j+1]+1); //bottom row i = input_rows-1; for(j=input_cols-2; j>=0 ;j--) D[i][j] = dmin(D[i][j], D[i-1][j]+1, D[i][j+1]+1); //rightmost column j = input_cols-1; for(i=input_rows-2; i>=0; i--) D[i][j] = dmin(D[i][j], D[i+1][j]+1, D[i][j-1]+1); return D; }
//using the normalized votes find the best row co-ordinates for staff lines SDoublePlane find_best_line_intercepts(const SDoublePlane &row_votes,const SDoublePlane &normed_votes,int best_space,double norm_threshold=0.55,int neighbour_threshold=4,int start=0) { SDoublePlane row_spacing=row_votes; if(start < row_votes.rows()){ SDoublePlane staff_lines(row_votes.rows(),1); int i=0; double best_value=0; int intercept_value=0; while(i<row_votes.rows()){ if(normed_votes[i][0] > norm_threshold){ best_value=normed_votes[i][0]; intercept_value=i; for(int j=1;j<neighbour_threshold;j++){ if(normed_votes[i+j][0] > best_value ){ best_value=normed_votes[i+j][0]; intercept_value=i+j; } } row_spacing=set_staff(row_spacing,best_space,intercept_value,start); i=intercept_value+(4*(best_space))+neighbour_threshold; start=intercept_value+(4*best_space)+neighbour_threshold; } i++; } } return row_spacing; }
// Apply an edge detector to an image, returns the binary edge map SDoublePlane find_edges(const SDoublePlane &input, double thresh = 0) { SDoublePlane output(input.rows(), input.cols()); // Implement an edge detector of your choice, e.g. // use your sobel gradient operator to compute the gradient magnitude and threshold return output; }
SDoublePlane scale_image(const SDoublePlane &input, double ratio) { int m = input.rows(); int n = input.cols(); int m2 = input.rows()*ratio; int n2 = input.cols()*ratio; SDoublePlane output(m2, n2); if (ratio > 0.5) { for (int i = 0; i < m2; i++) { int sk = i/ratio; int ek = (i + 1)/ratio - 0.00001; for (int j = 0; j < n2; j++) { int sl = j/ratio; int el = (j + 1)/ratio - 0.00001; output[i][j] = input[sk][sl]; output[i][j] += input[sk][el]; output[i][j] += input[ek][sl]; output[i][j] += input[ek][el]; output[i][j] /= 4.0; } } } else { int span = 1.0/ratio + 0.5; for (int i = 0; i < m2; i++) { int sk = i/ratio; int ek = (i - 1)/ratio - 0.00001; for (int j = 0; j < n2; j++) { int sl = j/ratio; int el = (j - 1)/ratio - 0.00001; output[i][j] = 0; for (int u = sk; u <= ek; u++) { for (int v = sl; v <= el; v++) { output[i][j] += input[u][v]; } } output[i][j] /= (ek - sk + 1)*(el - sl + 1); } } } return output; }
SDoublePlane normalize_votes(const SDoublePlane &acc) { SDoublePlane normalized(acc.rows(),acc.cols()); double min = find_min_vote(acc); double max = find_max_vote(acc); for(int i=0;i<acc.rows();i++){ normalized[i][0] = (acc[i][0] - min)/(max-min); } return normalized; }
void print_image_value(const SDoublePlane &input) { for (int i = 0; i < input.rows(); ++i) { for (int j = 0; j < input.cols(); ++j) { cout << input[i][j] << " "; } cout << endl; } }
SDoublePlane complement_image(const SDoublePlane &input) { SDoublePlane output(input); for (int i = 0; i < input.rows(); ++i) { for (int j = 0; j < input.cols(); ++j) { output[i][j] = 255 - output[i][j]; } } return output; }
// Print an image to a file void printImg2File(string filename, SDoublePlane img) { ofstream outFile; outFile.open(filename.c_str()); for (int i = 0; i < img.rows(); i++) { for (int j = 0; j < img.cols(); j++) { outFile << img[i][j] << ","; } outFile << "\n"; } outFile.close(); }
// The rest of these functions are incomplete. These are just suggestions to // get you started -- feel free to add extra functions, change function // parameters, etc. // // // Print the value of the image // void print_image_value1(const SDoublePlane &input) { int sum=0; for (int i = 0; i < input.cols(); ++i) { sum=0; for (int j = 0; j < input.rows(); ++j) { sum=sum+input[j][i]; //cout << input[i][j] << " "; } //cout << endl; cout<<sum<<"|||"<<i<<endl; } }
// Find symbols in the given image for the given template void find_symbols(HammingDistances hm, SDoublePlane input, SDoublePlane img_template, vector <DetectedSymbol> &symbols, Type symbol_type){ int template_rows = img_template.rows(); int template_cols = img_template.cols(); double max_hamming_distance = hm.max_hamming_distance; SDoublePlane matrix = hm.hamming_matrix; vector<LineLocation> allLinesLocVector; double confidence_threshold; if (symbol_type == NOTEHEAD){ confidence_threshold = 0.9; allLinesLocVector = find_line_location(input); } else confidence_threshold = 0.95; // Finding symbols for(int i =0;i<matrix.rows();i++){ for(int j=0;j<matrix.cols();j++){ double value = matrix [i][j]; if( value >= confidence_threshold * max_hamming_distance) { DetectedSymbol s; s.row = i; s.col = j; s.width = template_cols; s.height = template_rows; s.type = symbol_type; s.confidence = value; s.pitch = ' '; if(symbol_type == NOTEHEAD) set_symbol_marker(s, allLinesLocVector); else s.pitch = ' '; symbols.push_back(s); // Marking the pixels of the template so that they are not detected again for (int x=i; x < i+s.height; x++){ for (int y=j; y < j+s.width; y++){ matrix[x][y] = -100; } } } } } }
SDoublePlane normalize_image(const SDoublePlane &input) { SDoublePlane output(input); double max = image_max(output); double min = image_min(output); for (int i = 0; i < input.rows(); ++i) { for (int j = 0; j < input.cols(); ++j) { output[i][j] = (output[i][j] - min) / (max - min) * 255; } } return output; }
// Resize image SDoublePlane resize_image(SDoublePlane &input, double newScaleRatio) { if (newScaleRatio == 1) return input; int rows = input.rows(); int cols = input.cols(); int newWidth = newScaleRatio * cols; int newHeight = newScaleRatio * rows ; SDoublePlane output(newHeight, newWidth); set<int> mySetRows; while( mySetRows.size() < newHeight ){ mySetRows.insert( rand()% rows +1 ); } set<int> mySetCols; while( mySetCols.size() < newWidth ){ mySetCols.insert( rand()% cols+1 ); } int ii =0; for(int i = 1; i <rows;i++){ if(mySetRows.find(i) == mySetRows.end()){ continue; } int jj=0; for(int j = 1; j<cols;j++){ if(mySetCols.find(j) == mySetCols.end()) { continue; } if(i<rows-1 && j<cols-1){ output[ii][jj] = (input[i][j] + input [i+1][j] + input[i][j+1] + input[i+1][j+1])/4; } jj++; } ii++; } //write_detection_image("Resized_pic.png", output); return output; }
// Hough Transform // Inspired by Lecture from Prof. William Hoff, Colorado School of Mines, Engineering Division // https://www.youtube.com/watch?v=o-n7NoLArcs and http://goo.gl/TxbGWi // SDoublePlane runHoughTransform(SDoublePlane &img){ //printImg2File("sobelPNG.txt",img); int r = img.rows(); int c = img.cols(); double hough_height; // Initialize Accumulator matrix if (r>c) hough_height = r / sqrt(2); else hough_height = c / sqrt(2); int maxDist = round(hough_height * 2); int theta = 180; double rho,t; int iRho; SDoublePlane H(maxDist, theta); for (int i = 0; i < maxDist; i++) for (int j = 0; j < theta; j++) H[i][j] = 0; double center_x = c/2; double center_y = r/2; for (int i = 0; i < r; i++) { for (int j = 0; j < c; j++) { if (img[i][j] > 250){ // Fill accumulator array for (int iTheta = 0 ; iTheta < theta ; iTheta++){ // Getting angle in radians t = iTheta * M_PI / 180; // Calculate distance from origin for this angle rho = ( j - center_x) * cos (t) + ( i - center_y) * sin(t); iRho = int(round(rho + hough_height)); H[iRho][iTheta]++; } } } } return H; }
double image_min(const SDoublePlane &input) { double min=0; for (int i = 0; i < input.rows(); ++i) { for (int j = 0; j < input.cols(); ++j) { if (input[i][j] < min) { min = input[i][j]; } } } return min; }
double image_max(const SDoublePlane &input) { double max=0; for (int i = 0; i < input.rows(); ++i) { for (int j = 0; j < input.cols(); ++j) { if (input[i][j] > max) { max = input[i][j]; } } } return max; }
SDoublePlane display_binary(SDoublePlane &input){ int rows = input.rows(); int cols = input.cols(); SDoublePlane output(rows,cols); for(int i= 0; i<rows; ++i) for(int j=0; j<cols; ++j){ if(input[i][j] < 0.3) output[i][j] = 0; else output[i][j] = 255; } return output; }
SDoublePlane convert_blur_to_binary(SDoublePlane &input){ int rows = input.rows(); int cols = input.cols(); SDoublePlane output(rows,cols); for(int i= 0; i<rows; ++i) for(int j=0; j<cols; ++j){ if(input[i][j] < 1) output[i][j] = 0; else output[i][j] = 1; } return output; }
SDoublePlane compute_distance_matrix(SDoublePlane &edge_map) { SDoublePlane D(edge_map.rows(), edge_map.cols()); // Do a dijkstra in O(nlgn), n=total number of pixel in edge_map priority_queue< pair<int,double>, vector< pair<int,double> >, compare_priority_queue> Q; const int n_col = D.cols(); const int n_row = D.rows(); for (int i = 0; i < n_row; ++i) { for (int j = 0; j < n_col; ++j) { if ( edge_map[i][j] > 0.1) { D[i][j] = 0; Q.push(make_pair(i*n_col+j, 0.0)); } else D[i][j] = -1; } } while (Q.empty() == false) { pair<int,double> u = Q.top(); int row = u.first / n_col; int col = u.first % n_col; double w; Q.pop(); for (int i = -1; i <= 1; ++i) { for (int j = -1; j <= 1; ++j) { if (row+i<0 || row+i>=n_row || col+j<0 || col+j>=n_col || (i==0 && j==0)) continue; w = (i*j==0?1:1.414); if ( abs(D[row+i][col+j]+1) < 0.0001 || D[row+i][col+j] > D[row][col] + w) { D[row+i][col+j] = D[row][col] + w; Q.push( make_pair( (row+i)*n_col+(col+j), D[row+i][col+j] ) ); } } } } return D; }
SDoublePlane convert_BW(SDoublePlane &input){ int rows = input.rows(); int cols = input.cols(); SDoublePlane output(rows,cols); double threshold = 100.0; for(int i= 0;i<rows;++i) for(int j=0;j<cols;++j){ if(input[i][j] >= threshold || input[i][j] == 1) output[i][j] = 255; else output[i][j] =0; } return output; }
//from the row co-ordinates/best space matrix find the space parameter with high votes int find_best_spacing(const SDoublePlane &row_spacing) { long max=0,sum=0; int best_space=0; for(int i=2;i<row_spacing.cols();i++){ sum=0; for(int j=0;j<row_spacing.rows();j++){ sum=sum+row_spacing[j][i]; } if(sum > max){ max=sum; best_space=i; } } return best_space; }