void faster_corner_detect_10(const IplImage* I, mrpt::vision::TSimpleFeatureList & corners, int barrier, uint8_t octave, std::vector<size_t> * out_feats_index_by_row) { MRPT_UNUSED_PARAM(octave); corners.reserve(corners.size()+500); //corners.mark_kdtree_as_outdated(); size_t *ptr_feat_index_by_row; if (out_feats_index_by_row) { out_feats_index_by_row->resize(I->height); ptr_feat_index_by_row = &(*out_feats_index_by_row)[0]; } else { ptr_feat_index_by_row = NULL; } // 3 first rows have no features: if (ptr_feat_index_by_row) { *ptr_feat_index_by_row++ = corners.size(); *ptr_feat_index_by_row++ = corners.size(); *ptr_feat_index_by_row++ = corners.size(); } const int w = I->width; const int stride = 3*I->widthStep; // 3*w; // The compiler refuses to reserve a register for this register const __m128i barriers = _mm_set1_epi8((uint8_t)barrier); int xend = I->width - 3; xend -= (I->width-3) % 16; for(int y=3; y < I->height - 3; y++) { if (ptr_feat_index_by_row) // save index by row: *ptr_feat_index_by_row++=corners.size(); for(int x=3; x < 16; x++) if(is_corner_10<Less>((const uint8_t*)I->imageData+I->widthStep*y+x, I->widthStep, barrier) || is_corner_10<Greater>((const uint8_t*)I->imageData+I->widthStep*y+x, I->widthStep, barrier)) corners.push_back_fast(x, y); for(int x=16; x < xend; x+=16) { const uint8_t* p = (const uint8_t*)I->imageData+I->widthStep*y+x; __m128i lo, hi; { const __m128i here = load_si128<Aligned>((const __m128i*)(p)); lo = _mm_subs_epu8(here, barriers); hi = _mm_adds_epu8(barriers, here); } unsigned int ans_b, ans_e; { __m128i top = load_si128<Aligned>((const __m128i*)(p-stride)); __m128i bottom = load_si128<Aligned>((const __m128i*)(p+stride)); CHECK_BARRIER(lo, hi, top, ans_b); CHECK_BARRIER(lo, hi, bottom, ans_e); if (!(ans_b | ans_e)) continue; } unsigned int ans_m, ans_p, possible; { __m128i ul = _mm_loadu_si128((const __m128i*)(p-2-2*w)); __m128i lr = _mm_loadu_si128((const __m128i*)(p+2+2*w)); CHECK_BARRIER(lo, hi, ul, ans_m); CHECK_BARRIER(lo, hi, lr, ans_p); possible = (ans_m & ans_b) | (ans_e & ans_p); if (!possible) continue; } unsigned int ans_o, ans_n; { __m128i ll = _mm_loadu_si128((const __m128i*)(p-2+2*w)); __m128i ur = _mm_loadu_si128((const __m128i*)(p+2-2*w)); CHECK_BARRIER(lo, hi, ll, ans_o); CHECK_BARRIER(lo, hi, ur, ans_n); possible &= ans_o | (ans_b & ans_n); possible &= ans_n | (ans_e & ans_o); if (!possible) continue; } unsigned int ans_h, ans_k; { __m128i left = _mm_loadu_si128((const __m128i*)(p-3)); __m128i right = _mm_loadu_si128((const __m128i*)(p+3)); CHECK_BARRIER(lo, hi, left, ans_h); CHECK_BARRIER(lo, hi, right, ans_k); possible &= ans_h | (ans_n & ans_k & ans_p); possible &= ans_k | (ans_m & ans_h & ans_o); if (!possible) continue; } unsigned int ans_a, ans_c; { __m128i a = _mm_loadu_si128((const __m128i*)(p-1-stride)); __m128i c = _mm_insert_epi16(_mm_srli_si128(a,2), *(const unsigned short*)(p+15-stride), 7); //__m128i c = _mm_loadu_si128((const __m128i*)(p+1-stride)); CHECK_BARRIER(lo, hi, a, ans_a); CHECK_BARRIER(lo, hi, c, ans_c); possible &= ans_a | (ans_e & ans_p); possible &= ans_c | (ans_o & ans_e); if (!possible) continue; } unsigned int ans_d, ans_f; { __m128i d = _mm_loadu_si128((const __m128i*)(p-1+stride)); __m128i f = _mm_insert_epi16(_mm_srli_si128(d,2), *(const unsigned short*)(p+15+stride), 7); //__m128i f = _mm_loadu_si128((const __m128i*)(p+1+stride)); CHECK_BARRIER(lo, hi, d, ans_d); CHECK_BARRIER(lo, hi, f, ans_f); const unsigned int ans_abc = ans_a & ans_b & ans_c; possible &= ans_d | (ans_abc & ans_n); possible &= ans_f | (ans_m & ans_abc); if (!possible) continue; } unsigned int ans_g, ans_i; { __m128i g = _mm_loadu_si128((const __m128i*)(p-3-w)); __m128i ii = _mm_loadu_si128((const __m128i*)(p-3+w)); CHECK_BARRIER(lo, hi, g, ans_g); CHECK_BARRIER(lo, hi, ii, ans_i); possible &= ans_g | (ans_f & ans_p & ans_k); possible &= ans_i | (ans_c & ans_n & ans_k); if (!possible) continue; } unsigned int ans_j, ans_l; { __m128i jj = _mm_loadu_si128((const __m128i*)(p+3-w)); __m128i l = _mm_loadu_si128((const __m128i*)(p+3+w)); CHECK_BARRIER(lo, hi, jj, ans_j); CHECK_BARRIER(lo, hi, l, ans_l); const unsigned int ans_ghi = ans_g & ans_h & ans_i; possible &= ans_j | (ans_d & ans_o & ans_ghi); possible &= ans_l | (ans_m & ans_a & ans_ghi); if (!possible) continue; } possible |= (possible >> 16); //if(possible & 0x0f) //Does this make it faster? { if(possible & (1<< 0)) corners.push_back_fast(x + 0, y); if(possible & (1<< 1)) corners.push_back_fast(x + 1, y); if(possible & (1<< 2)) corners.push_back_fast(x + 2, y); if(possible & (1<< 3)) corners.push_back_fast(x + 3, y); if(possible & (1<< 4)) corners.push_back_fast(x + 4, y); if(possible & (1<< 5)) corners.push_back_fast(x + 5, y); if(possible & (1<< 6)) corners.push_back_fast(x + 6, y); if(possible & (1<< 7)) corners.push_back_fast(x + 7, y); } //if(possible & 0xf0) //Does this mak( , fast)r? { if(possible & (1<< 8)) corners.push_back_fast(x + 8, y); if(possible & (1<< 9)) corners.push_back_fast(x + 9, y); if(possible & (1<<10)) corners.push_back_fast(x +10, y); if(possible & (1<<11)) corners.push_back_fast(x +11, y); if(possible & (1<<12)) corners.push_back_fast(x +12, y); if(possible & (1<<13)) corners.push_back_fast(x +13, y); if(possible & (1<<14)) corners.push_back_fast(x +14, y); if(possible & (1<<15)) corners.push_back_fast(x +15, y); } } for(int x=xend; x < I->width - 3; x++) if(is_corner_10<Less>((const uint8_t*)I->imageData+I->widthStep*y+x, I->widthStep, barrier) || is_corner_10<Greater>((const uint8_t*)I->imageData+I->widthStep*y+x, I->widthStep, barrier)) corners.push_back_fast(x, y); } // 3 last rows have no features: if (ptr_feat_index_by_row) { *ptr_feat_index_by_row++ = corners.size(); *ptr_feat_index_by_row++ = corners.size(); *ptr_feat_index_by_row++ = corners.size(); } }
void faster_corner_detect_9(const IplImage* I, mrpt::vision::TSimpleFeatureList & corners, int barrier, uint8_t octave, std::vector<size_t> * out_feats_index_by_row) { corners.reserve(corners.size()+500); //corners.mark_kdtree_as_outdated(); size_t *ptr_feat_index_by_row; if (out_feats_index_by_row) { out_feats_index_by_row->resize(I->height); ptr_feat_index_by_row = &(*out_feats_index_by_row)[0]; } else { ptr_feat_index_by_row = NULL; } const int w = I->width; const int stride = 3*I->widthStep; // 3*w; // The compiler refuses to reserve a register for this register const __m128i barriers = _mm_set1_epi8((uint8_t)barrier); int xend = I->width - 3; xend -= (I->width-3) % 16; // 3 first rows have no features: if (ptr_feat_index_by_row) { *ptr_feat_index_by_row++ = corners.size(); *ptr_feat_index_by_row++ = corners.size(); *ptr_feat_index_by_row++ = corners.size(); } for(int y=3; y < I->height - 3; y++) { if (ptr_feat_index_by_row) // save index by row: *ptr_feat_index_by_row++=corners.size(); for(int x=3; x < 16; x++) if(is_corner_9<Less>( (const uint8_t*)I->imageData+I->widthStep*y+x, I->widthStep, barrier) || is_corner_9<Greater>((const uint8_t*)I->imageData+I->widthStep*y+x, I->widthStep, barrier)) corners.push_back_fast(x<<octave, y<<octave); for(int x=16; x < xend; x+=16) { const uint8_t* p = (const uint8_t*)I->imageData+I->widthStep*y+x; //(const uint8_t*)I->imageData+I->widthStep*y+x; __m128i lo, hi; { const __m128i here = load_si128<Aligned>((const __m128i*)(p)); lo = _mm_subs_epu8(here, barriers); hi = _mm_adds_epu8(barriers, here); } unsigned int ans_0, ans_8, possible; { __m128i top = load_si128<Aligned>((const __m128i*)(p-stride)); __m128i bottom = load_si128<Aligned>((const __m128i*)(p+stride)); CHECK_BARRIER(lo, hi, top, ans_0); CHECK_BARRIER(lo, hi, bottom, ans_8); possible = ans_0 | ans_8; if (!possible) continue; } unsigned int ans_15, ans_1; { __m128i a = _mm_loadu_si128((const __m128i*)(p-1-stride)); __m128i c = _mm_insert_epi16(_mm_srli_si128(a,2), *(const unsigned short*)(p+15-stride), 7); CHECK_BARRIER(lo, hi, a, ans_15); CHECK_BARRIER(lo, hi, c, ans_1); possible &= ans_8 | (ans_15 & ans_1); if (!possible) continue; } unsigned int ans_9, ans_7; { __m128i d = _mm_loadu_si128((const __m128i*)(p-1+stride)); __m128i f = _mm_insert_epi16(_mm_srli_si128(d,2), *(const unsigned short*)(p+15+stride), 7); CHECK_BARRIER(lo, hi, d, ans_9); CHECK_BARRIER(lo, hi, f, ans_7); possible &= ans_9 | (ans_0 & ans_1); possible &= ans_7 | (ans_15 & ans_0); if (!possible) continue; } unsigned int ans_12, ans_4; { __m128i left = _mm_loadu_si128((const __m128i*)(p-3)); __m128i right = _mm_loadu_si128((const __m128i*)(p+3)); CHECK_BARRIER(lo, hi, left, ans_12); CHECK_BARRIER(lo, hi, right, ans_4); possible &= ans_12 | (ans_4 & (ans_1 | ans_7)); possible &= ans_4 | (ans_12 & (ans_9 | ans_15)); if (!possible) continue; } unsigned int ans_14, ans_6; { __m128i ul = _mm_loadu_si128((const __m128i*)(p-2-2*w)); __m128i lr = _mm_loadu_si128((const __m128i*)(p+2+2*w)); CHECK_BARRIER(lo, hi, ul, ans_14); CHECK_BARRIER(lo, hi, lr, ans_6); { const unsigned int ans_6_7 = ans_6 & ans_7; possible &= ans_14 | (ans_6_7 & (ans_4 | (ans_8 & ans_9))); possible &= ans_1 | (ans_6_7) | ans_12; } { const unsigned int ans_14_15 = ans_14 & ans_15; possible &= ans_6 | (ans_14_15 & (ans_12 | (ans_0 & ans_1))); possible &= ans_9 | (ans_14_15) | ans_4; } if (!possible) continue; } unsigned int ans_10, ans_2; { __m128i ll = _mm_loadu_si128((const __m128i*)(p-2+2*w)); __m128i ur = _mm_loadu_si128((const __m128i*)(p+2-2*w)); CHECK_BARRIER(lo, hi, ll, ans_10); CHECK_BARRIER(lo, hi, ur, ans_2); { const unsigned int ans_1_2 = ans_1 & ans_2; possible &= ans_10 | (ans_1_2 & ((ans_0 & ans_15) | ans_4)); possible &= ans_12 | (ans_1_2) | (ans_6 & ans_7); } { const unsigned int ans_9_10 = ans_9 & ans_10; possible &= ans_2 | (ans_9_10 & ((ans_7 & ans_8) | ans_12)); possible &= ans_4 | (ans_9_10) | (ans_14 & ans_15); } possible &= ans_8 | ans_14 | ans_2; possible &= ans_0 | ans_10 | ans_6; if (!possible) continue; } unsigned int ans_13, ans_5; { __m128i g = _mm_loadu_si128((const __m128i*)(p-3-w)); __m128i l = _mm_loadu_si128((const __m128i*)(p+3+w)); CHECK_BARRIER(lo, hi, g, ans_13); CHECK_BARRIER(lo, hi, l, ans_5); const unsigned int ans_15_0 = ans_15 & ans_0; const unsigned int ans_7_8 = ans_7 & ans_8; { const unsigned int ans_12_13 = ans_12 & ans_13; possible &= ans_5 | (ans_12_13 & ans_14 & ((ans_15_0) | ans_10)); possible &= ans_7 | (ans_1 & ans_2) | (ans_12_13); possible &= ans_2 | (ans_12_13) | (ans_7_8); } { const unsigned int ans_4_5 = ans_4 & ans_5; const unsigned int ans_9_10 = ans_9 & ans_10; possible &= ans_13 | (ans_4_5 & ans_6 & ((ans_7_8) | ans_2)); possible &= ans_15 | (ans_4_5) | (ans_9_10); possible &= ans_10 | (ans_4_5) | (ans_15_0); possible &= ans_15 | (ans_9_10) | (ans_4_5); } possible &= ans_8 | (ans_13 & ans_14) | ans_2; possible &= ans_0 | (ans_5 & ans_6) | ans_10; if (!possible) continue; } unsigned int ans_11, ans_3; { __m128i ii = _mm_loadu_si128((const __m128i*)(p-3+w)); __m128i jj = _mm_loadu_si128((const __m128i*)(p+3-w)); CHECK_BARRIER(lo, hi, ii, ans_11); CHECK_BARRIER(lo, hi, jj, ans_3); { const unsigned int ans_2_3 = ans_2 & ans_3; possible &= ans_11 | (ans_2_3 & ans_4 & ((ans_0 & ans_1) | (ans_5 & ans_6))); possible &= ans_13 | (ans_7 & ans_8) | (ans_2_3); possible &= ans_8 | (ans_2_3) | (ans_13 & ans_14); } { const unsigned int ans_11_12 = ans_11 & ans_12; possible &= ans_3 | (ans_10 & ans_11_12 & ((ans_8 & ans_9) | (ans_13 & ans_14))); possible &= ans_1 | (ans_11_12) | (ans_6 & ans_7); possible &= ans_6 | (ans_0 & ans_1) | (ans_11_12); } { const unsigned int ans_3_4 = ans_3 & ans_4; possible &= ans_9 | (ans_3_4) | (ans_14 & ans_15); possible &= ans_14 | (ans_8 & ans_9) | (ans_3_4); } { const unsigned int ans_10_11 = ans_10 & ans_11; possible &= ans_5 | (ans_15 & ans_0) | (ans_10_11); possible &= ans_0 | (ans_10_11) | (ans_5 & ans_6); } if (!possible) continue; } possible |= (possible >> 16); //if(possible & 0x0f) //Does this make it faster? { if(possible & (1<< 0)) corners.push_back_fast((x + 0)<<octave, y<<octave); if(possible & (1<< 1)) corners.push_back_fast((x + 1)<<octave, y<<octave); if(possible & (1<< 2)) corners.push_back_fast((x + 2)<<octave, y<<octave); if(possible & (1<< 3)) corners.push_back_fast((x + 3)<<octave, y<<octave); if(possible & (1<< 4)) corners.push_back_fast((x + 4)<<octave, y<<octave); if(possible & (1<< 5)) corners.push_back_fast((x + 5)<<octave, y<<octave); if(possible & (1<< 6)) corners.push_back_fast((x + 6)<<octave, y<<octave); if(possible & (1<< 7)) corners.push_back_fast((x + 7)<<octave, y<<octave); } //if(possible & 0xf0) //Does this mak( , fast)r? { if(possible & (1<< 8)) corners.push_back_fast((x + 8)<<octave, y<<octave); if(possible & (1<< 9)) corners.push_back_fast((x + 9)<<octave, y<<octave); if(possible & (1<<10)) corners.push_back_fast((x +10)<<octave, y<<octave); if(possible & (1<<11)) corners.push_back_fast((x +11)<<octave, y<<octave); if(possible & (1<<12)) corners.push_back_fast((x +12)<<octave, y<<octave); if(possible & (1<<13)) corners.push_back_fast((x +13)<<octave, y<<octave); if(possible & (1<<14)) corners.push_back_fast((x +14)<<octave, y<<octave); if(possible & (1<<15)) corners.push_back_fast((x +15)<<octave, y<<octave); } } for(int x=xend; x < I->width - 3; x++) if(is_corner_9<Less>((const uint8_t*)I->imageData+I->widthStep*y+x, I->widthStep, barrier) || is_corner_9<Greater>((const uint8_t*)I->imageData+I->widthStep*y+x, I->widthStep, barrier)) corners.push_back_fast(x<<octave, y<<octave); } // 3 last rows have no features: if (ptr_feat_index_by_row) { *ptr_feat_index_by_row++ = corners.size(); *ptr_feat_index_by_row++ = corners.size(); *ptr_feat_index_by_row++ = corners.size(); } }