image_u8_t *image_u8_create_from_f32(image_f32_t *fim) { image_u8_t *im = image_u8_create(fim->width, fim->height); for (int y = 0; y < fim->height; y++) { for (int x = 0; x < fim->width; x++) { float v = fim->buf[y*fim->stride + x]; im->buf[y*im->stride + x] = (int) (255 * v); } } return im; }
image_u8_t *image_u8_rotate(const image_u8_t *in, double rad, uint8_t pad) { int iwidth = in->width, iheight = in->height; rad = -rad; // interpret y as being "down" float c = cos(rad), s = sin(rad); float p[][2] = { { 0, 0}, { iwidth, 0 }, { iwidth, iheight }, { 0, iheight} }; float xmin = HUGE, xmax = -HUGE, ymin = HUGE, ymax = -HUGE; float icx = iwidth / 2.0, icy = iheight / 2.0; for (int i = 0; i < 4; i++) { float px = p[i][0] - icx; float py = p[i][1] - icy; float nx = px*c - py*s; float ny = px*s + py*c; xmin = fmin(xmin, nx); xmax = fmax(xmax, nx); ymin = fmin(ymin, ny); ymax = fmax(ymax, ny); } int owidth = ceil(xmax-xmin), oheight = ceil(ymax - ymin); image_u8_t *out = image_u8_create(owidth, oheight); // iterate over output pixels. for (int oy = 0; oy < oheight; oy++) { for (int ox = 0; ox < owidth; ox++) { // work backwards from destination coordinates... // sample pixel centers. float sx = ox - owidth / 2.0 + .5; float sy = oy - oheight / 2.0 + .5; // project into input-image space int ix = floor(sx*c + sy*s + icx); int iy = floor(-sx*s + sy*c + icy); if (ix >= 0 && iy >= 0 && ix < iwidth && iy < iheight) out->buf[oy*out->stride+ox] = in->buf[iy*in->stride + ix]; else out->buf[oy*out->stride+ox] = pad; } } return out; }
image_u8_t *image_u8_copy(const image_u8_t *in) { uint8_t *buf = malloc(in->height*in->stride*sizeof(uint8_t)); memcpy(buf, in->buf, in->height*in->stride*sizeof(uint8_t)); // const initializer image_u8_t tmp = { .width = in->width, .height = in->height, .stride = in->stride, .buf = buf }; image_u8_t *copy = calloc(1, sizeof(image_u8_t)); memcpy(copy, &tmp, sizeof(image_u8_t)); return copy; } void image_u8_destroy(image_u8_t *im) { if (!im) return; free(im->buf); free(im); } //////////////////////////////////////////////////////////// // PNM file i/o image_u8_t *image_u8_create_from_pnm(const char *path) { pnm_t *pnm = pnm_create_from_file(path); if (pnm == NULL) return NULL; image_u8_t *im = NULL; switch (pnm->format) { case PNM_FORMAT_GRAY: { im = image_u8_create(pnm->width, pnm->height); for (int y = 0; y < im->height; y++) memcpy(&im->buf[y*im->stride], &pnm->buf[y*im->width], im->width); break; } case PNM_FORMAT_RGB: { im = image_u8_create(pnm->width, pnm->height); // Gray conversion for RGB is gray = (r + g + g + b)/4 for (int y = 0; y < im->height; y++) { for (int x = 0; x < im->width; x++) { uint8_t gray = (pnm->buf[y*im->width*3 + 3*x+0] + // r pnm->buf[y*im->width*3 + 3*x+1] + // g pnm->buf[y*im->width*3 + 3*x+1] + // g pnm->buf[y*im->width*3 + 3*x+2]) // b / 4; im->buf[y*im->stride + x] = gray; } } break; } } pnm_destroy(pnm); return im; }
// basically the same as threshold(), but assumes the input image is a // bayer image. It collects statistics separately for each 2x2 block // of pixels. image_u8_t *threshold_bayer(apriltag_detector_t *td, image_u8_t *im) { int w = im->width, h = im->height, s = im->stride; image_u8_t *threshim = image_u8_create(w, h); assert(threshim->stride == s); int tilesz = 32; assert((tilesz & 1) == 0); // must be multiple of 2 int tw = w/tilesz + 1; int th = h/tilesz + 1; uint8_t *im_max[4], *im_min[4]; for (int i = 0; i < 4; i++) { im_max[i] = (uint8_t *)calloc(tw*th, sizeof(uint8_t)); im_min[i] = (uint8_t *)calloc(tw*th, sizeof(uint8_t)); } for (int ty = 0; ty < th; ty++) { for (int tx = 0; tx < tw; tx++) { uint8_t max[4] = { 0, 0, 0, 0}; uint8_t min[4] = { 255, 255, 255, 255 }; for (int dy = 0; dy < tilesz; dy++) { if (ty*tilesz+dy >= h) continue; for (int dx = 0; dx < tilesz; dx++) { if (tx*tilesz+dx >= w) continue; // which bayer element is this pixel? int idx = (2*(dy&1) + (dx&1)); uint8_t v = im->buf[(ty*tilesz+dy)*s + tx*tilesz + dx]; if (v < min[idx]) min[idx] = v; if (v > max[idx]) max[idx] = v; } } for (int i = 0; i < 4; i++) { im_max[i][ty*tw+tx] = max[i]; im_min[i][ty*tw+tx] = min[i]; } } } for (int ty = 0; ty < th; ty++) { for (int tx = 0; tx < tw; tx++) { uint8_t max[4] = { 0, 0, 0, 0}; uint8_t min[4] = { 255, 255, 255, 255 }; for (int dy = -1; dy <= 1; dy++) { if (ty+dy < 0 || ty+dy >= th) continue; for (int dx = -1; dx <= 1; dx++) { if (tx+dx < 0 || tx+dx >= tw) continue; for (int i = 0; i < 4; i++) { uint8_t m = im_max[i][(ty+dy)*tw+tx+dx]; if (m > max[i]) max[i] = m; m = im_min[i][(ty+dy)*tw+tx+dx]; if (m < min[i]) min[i] = m; } } } // XXX CONSTANT // if (max - min < 30) // continue; // argument for biasing towards dark; specular highlights // can be substantially brighter than white tag parts uint8_t thresh[4]; for (int i = 0; i < 4; i++) { thresh[i] = min[i] + (max[i] - min[i]) / 2; } for (int dy = 0; dy < tilesz; dy++) { int y = ty*tilesz + dy; if (y >= h) continue; for (int dx = 0; dx < tilesz; dx++) { int x = tx*tilesz + dx; if (x >= w) continue; // which bayer element is this pixel? int idx = (2*(y&1) + (x&1)); uint8_t v = im->buf[y*s+x]; threshim->buf[y*s+x] = v > thresh[idx]; } } } } for (int i = 0; i < 4; i++) { free(im_min[i]); free(im_max[i]); } timeprofile_stamp(td->tp, "threshold"); return threshim; }
image_u8_t *threshold(apriltag_detector_t *td, image_u8_t *im) { int w = im->width, h = im->height, s = im->stride; image_u8_t *threshim = image_u8_create(w, h); assert(threshim->stride == s); // The idea is to find the maximum and minimum values in a // window around each pixel. If it's a contrast-free region // (max-min is small), don't try to binarize. Otherwise, // threshold according to (max+min)/2. // however, computing max/min around every pixel is needlessly // expensive. We compute max/min for tiles. To avoid artifacts // that arise when high-contrast features appear near a tile // edge (and thus moving from one tile to another results in a // large change in max/min value), the max/min values used for // any pixel are computed from all 3x3 surrounding tiles. Thus, // the max/min sampling area for nearby pixels overlap by at least // on tile. // // The important thing is that the windows be large enough to // capture edge transitions; the tag does not need to fit into // a tile. int tilesz = 16; int tw = w/tilesz + 1; int th = h/tilesz + 1; uint8_t *im_max = (uint8_t *)calloc(tw*th, sizeof(uint8_t)); uint8_t *im_min = (uint8_t *)calloc(tw*th, sizeof(uint8_t)); // first, collect min/max statistics for each tile for (int ty = 0; ty < th; ty++) { for (int tx = 0; tx < tw; tx++) { uint8_t max = 0, min = 255; for (int dy = 0; dy < tilesz; dy++) { if (ty*tilesz+dy >= h) continue; for (int dx = 0; dx < tilesz; dx++) { if (tx*tilesz+dx >= w) continue; uint8_t v = im->buf[(ty*tilesz+dy)*s + tx*tilesz + dx]; if (v < min) min = v; if (v > max) max = v; } } im_max[ty*tw+tx] = max; im_min[ty*tw+tx] = min; } } // second, apply 3x3 max/min convolution to "blur" these values // over larger areas. This reduces artifacts due to abrupt changes // in the threshold value. for (int ty = 0; ty < th; ty++) { for (int tx = 0; tx < tw; tx++) { uint8_t max = 0, min = 255; for (int dy = -1; dy <= 1; dy++) { if (ty+dy < 0 || ty+dy >= th) continue; for (int dx = -1; dx <= 1; dx++) { if (tx+dx < 0 || tx+dx >= tw) continue; uint8_t m = im_max[(ty+dy)*tw+tx+dx]; if (m > max) max = m; m = im_min[(ty+dy)*tw+tx+dx]; if (m < min) min = m; } } // XXX Tunable if (max - min < td->qtp.min_white_black_diff) continue; // argument for biasing towards dark; specular highlights // can be substantially brighter than white tag parts uint8_t thresh = min + (max - min) / 2; for (int dy = 0; dy < tilesz; dy++) { int y = ty*tilesz + dy; if (y >= h) continue; for (int dx = 0; dx < tilesz; dx++) { int x = tx*tilesz + dx; if (x >= w) continue; uint8_t v = im->buf[y*s+x]; threshim->buf[y*s+x] = v > thresh; } } } } free(im_min); free(im_max); timeprofile_stamp(td->tp, "threshold"); return threshim; }
zarray_t *apriltag_quad_thresh(apriltag_detector_t *td, image_u8_t *im) { //////////////////////////////////////////////////////// // step 1. threshold the image, creating the edge image. int w = im->width, h = im->height, s = im->stride; image_u8_t *threshim = threshold(td, im); assert(threshim->stride == s); image_u8_t *edgeim = image_u8_create(w, h); if (1) { image_u8_t *sumim = image_u8_create(w, h); // apply a horizontal sum kernel of width 3 for (int y = 0; y < h; y++) { for (int x = 1; x+1 < w; x++) { sumim->buf[y*s + x] = threshim->buf[y*s + x - 1] + threshim->buf[y*s + x + 0] + threshim->buf[y*s + x + 1]; } } timeprofile_stamp(td->tp, "sumim"); // deglitch if (td->qtp.deglitch) { for (int y = 1; y+1 < h; y++) { for (int x = 1; x+1 < w; x++) { // edge: black pixel next to white pixel if (threshim->buf[y*s + x] == 0 && sumim->buf[y*s + x - s] + sumim->buf[y*s + x] + sumim->buf[y*s + x + s] == 8) { threshim->buf[y*s + x] = 1; sumim->buf[y*s + x - 1]++; sumim->buf[y*s + x + 0]++; sumim->buf[y*s + x + 1]++; } if (threshim->buf[y*s + x] == 1 && sumim->buf[y*s + x - s] + sumim->buf[y*s + x] + sumim->buf[y*s + x + s] == 1) { threshim->buf[y*s + x] = 0; sumim->buf[y*s + x - 1]--; sumim->buf[y*s + x + 0]--; sumim->buf[y*s + x + 1]--; } } } timeprofile_stamp(td->tp, "deglitch"); } // apply a vertical sum kernel of width 3; check if any // over-threshold pixels are adjacent to an under-threshold // pixel. // // There are two types of edges: white pixels neighboring a // black pixel, and black pixels neighboring a white pixel. We // label these separately. (Values 0xc0 and 0x3f are picked // such that they add to 255 (see below) and so that they can be // viewed as pixel intensities for visualization purposes.) // // symmetry of detection. We don't want to use JUST "black // near white" (or JUST "white near black"), because that // biases the detection towards one side of the edge. This // measurably reduces detection performance. // // On large tags, we could treat "neighbor" pixels the same // way. But on very small tags, there may be other edges very // near the tag edge. Since each of these edges is effectively // two pixels thick (the white pixel near the black pixel, and // the black pixel near the white pixel), it becomes likely // that these two nearby edges will actually touch. // // A partial solution to this problem is to define edges to be // adjacent white-near-black and black-near-white pixels. // for (int y = 1; y+1 < h; y++) { for (int x = 1; x+1 < w; x++) { if (threshim->buf[y*s + x] == 0) { // edge: black pixel next to white pixel if (sumim->buf[y*s + x - s] + sumim->buf[y*s + x] + sumim->buf[y*s + x + s] > 0) edgeim->buf[y*s + x] = 0xc0; } else { // edge: white pixel next to black pixel when both // edge types are on, we get less bias towards one // side of the edge. if (sumim->buf[y*s + x - s] + sumim->buf[y*s + x] + sumim->buf[y*s + x + s] < 9) edgeim->buf[y*s + x] = 0x3f; } } } if (td->debug) { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { threshim->buf[y*s + x] *= 255; } } image_u8_write_pnm(threshim, "debug_threshold.pnm"); image_u8_write_pnm(edgeim, "debug_edge.pnm"); // image_u8_destroy(edgeim2); } image_u8_destroy(threshim); image_u8_destroy(sumim); } timeprofile_stamp(td->tp, "edges"); //////////////////////////////////////////////////////// // step 2. find connected components. unionfind_t *uf = unionfind_create(w * h); for (int y = 1; y < h - 1; y++) { for (int x = 1; x < w -1; x++) { uint8_t v = edgeim->buf[y*s + x]; if (v==0) continue; // (dx,dy) pairs for 8 connectivity: // (REFERENCE) (1, 0) // (-1, 1) (0, 1) (1, 1) // // i.e., the minimum value of dx should be: // y=0: 1 // y=1: -1 for (int dy = 0; dy <= 1; dy++) { for (int dx = 1-2*dy; dx <= 1; dx++) { if (edgeim->buf[(y+dy)*s + (x+dx)] == v) { unionfind_connect(uf, y*w + x, (y+dy)*w + x + dx); } } } } } timeprofile_stamp(td->tp, "unionfind"); zhash_t *clustermap = zhash_create(sizeof(uint64_t), sizeof(zarray_t*), zhash_uint64_hash, zhash_uint64_equals); for (int y = 1; y < h-1; y++) { for (int x = 1; x < w-1; x++) { uint8_t v0 = edgeim->buf[y*s + x]; if (v0 == 0) continue; uint64_t rep0 = unionfind_get_representative(uf, y*w + x); // 8 connectivity. (4 neighbors to check). // for (int dy = 0; dy <= 1; dy++) { // for (int dx = 1-2*dy; dx <= 1; dx++) { // 4 connectivity. (2 neighbors to check) for (int n = 1; n <= 2; n++) { int dy = n & 1; int dx = (n & 2) >> 1; uint8_t v1 = edgeim->buf[(y+dy)*s + x + dx]; if (v0 + v1 != 255) continue; uint64_t rep1 = unionfind_get_representative(uf, (y+dy)*w + x+dx); uint64_t clusterid; if (rep0 < rep1) clusterid = (rep1 << 32) + rep0; else clusterid = (rep0 << 32) + rep1; zarray_t *cluster = NULL; if (!zhash_get(clustermap, &clusterid, &cluster)) { cluster = zarray_create(sizeof(struct pt)); zhash_put(clustermap, &clusterid, &cluster, NULL, NULL); } // NB: We will add some points multiple times to a // given cluster. I don't know an efficient way to // avoid that here; we remove them later on when we // sort points by pt_compare_theta. if (1) { struct pt p = { .x = x, .y = y}; zarray_add(cluster, &p); } if (1) { struct pt p = { .x = x+dx, .y = y+dy}; zarray_add(cluster, &p); } } } } // make segmentation image. if (td->debug) { image_u8_t *d = image_u8_create(w, h); assert(d->stride == s); uint8_t *colors = (uint8_t*) calloc(w*h, 1); for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { uint32_t v = unionfind_get_representative(uf, y*w+x); uint32_t sz = unionfind_get_set_size(uf, y*w+x); if (sz < td->qtp.min_cluster_pixels) continue; uint8_t color = colors[v]; if (color == 0) { const int bias = 20; color = bias + (random() % (255-bias)); colors[v] = color; } float mix = 0.7; mix = 1.0; d->buf[y*d->stride + x] = mix*color + (1-mix)*im->buf[y*im->stride + x]; } } free(colors); image_u8_write_pnm(d, "debug_segmentation.pnm"); image_u8_destroy(d); } timeprofile_stamp(td->tp, "make clusters"); //////////////////////////////////////////////////////// // step 3. process each connected component. zarray_t *clusters = zhash_values(clustermap); zhash_destroy(clustermap); zarray_t *quads = zarray_create(sizeof(struct quad)); int sz = zarray_size(clusters); int chunksize = 1 + sz / (APRILTAG_TASKS_PER_THREAD_TARGET * td->nthreads); struct quad_task tasks[sz / chunksize + 1]; int ntasks = 0; for (int i = 0; i < sz; i += chunksize) { tasks[ntasks].td = td; tasks[ntasks].cidx0 = i; tasks[ntasks].cidx1 = imin(sz, i + chunksize); tasks[ntasks].h = h; tasks[ntasks].w = w; tasks[ntasks].quads = quads; tasks[ntasks].clusters = clusters; tasks[ntasks].im = im; workerpool_add_task(td->wp, do_quad_task, &tasks[ntasks]); ntasks++; } workerpool_run(td->wp); timeprofile_stamp(td->tp, "fit quads to clusters"); if (td->debug) { FILE *f = fopen("debug_lines.ps", "w"); fprintf(f, "%%!PS\n\n"); image_u8_t *im2 = image_u8_copy(im); image_u8_darken(im2); image_u8_darken(im2); // assume letter, which is 612x792 points. double scale = fmin(612.0/im->width, 792.0/im2->height); fprintf(f, "%.15f %.15f scale\n", scale, scale); fprintf(f, "0 %d translate\n", im2->height); fprintf(f, "1 -1 scale\n"); postscript_image(f, im); for (int i = 0; i < zarray_size(quads); i++) { struct quad *q; zarray_get_volatile(quads, i, &q); float rgb[3]; int bias = 100; for (int i = 0; i < 3; i++) rgb[i] = bias + (random() % (255-bias)); fprintf(f, "%f %f %f setrgbcolor\n", rgb[0]/255.0f, rgb[1]/255.0f, rgb[2]/255.0f); fprintf(f, "%.15f %.15f moveto %.15f %.15f lineto %.15f %.15f lineto %.15f %.15f lineto %.15f %.15f lineto stroke\n", q->p[0][0], q->p[0][1], q->p[1][0], q->p[1][1], q->p[2][0], q->p[2][1], q->p[3][0], q->p[3][1], q->p[0][0], q->p[0][1]); } fclose(f); } // printf(" %d %d %d %d\n", indices[0], indices[1], indices[2], indices[3]); /* if (td->debug) { for (int i = 0; i < 4; i++) { int i0 = indices[i]; int i1 = indices[(i+1)&3]; if (i1 < i0) i1 += zarray_size(cluster); for (int j = i0; j <= i1; j++) { struct pt *p; zarray_get_volatile(cluster, j % zarray_size(cluster), &p); edgeim->buf[p->y*edgeim->stride + p->x] = 30+64*i; } } } */ unionfind_destroy(uf); for (int i = 0; i < zarray_size(clusters); i++) { zarray_t *cluster; zarray_get(clusters, i, &cluster); zarray_destroy(cluster); } zarray_destroy(clusters); image_u8_destroy(edgeim); return quads; }