/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //detect and save detected boxes double *detect(IplImage *IM,MODEL *MO,double thresh,int *D_NUMS,double *A_SCORE) { //for time measurement clock_t t1,t2,t3; //initialize scale information for hierachical detection double *scales=ini_scales(MO->MI,IM,IM->width,IM->height); //initialize feature-size matrix int *featsize=ini_featsize(MO->MI); //calculate feature pyramid t1=clock(); double **feature=calc_f_pyramid(IM,MO->MI,featsize,scales); t2=clock(); //detect boundary boxes double *boxes = get_boxes(feature,scales,featsize,MO,D_NUMS,A_SCORE,thresh); t3=clock(); s_free(scales); //release scale-information s_free(featsize); //release feat size information free_features(feature,MO->MI); return boxes; }
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //detect and save detected boxes FLOAT *detect(IplImage *IM,MODEL *MO,FLOAT thresh,int *D_NUMS,FLOAT *A_SCORE) { /* for measurement */ struct timeval tv; struct timeval tv_ini_scales_start, tv_ini_scales_end; float time_ini_scales; struct timeval tv_ini_feat_size_start, tv_ini_feat_size_end; float time_ini_feat_size; struct timeval tv_get_boxes_start, tv_get_boxes_end; float time_get_boxes; struct timeval tv_calc_f_pyramid_start, tv_calc_f_pyramid_end; float time_calc_f_pyramid = 0; //for time measurement clock_t t1,t2,t3; //initialize scale information for hierachical detection gettimeofday(&tv_ini_scales_start, NULL); FLOAT *scales=ini_scales(MO->MI,IM,IM->width,IM->height); gettimeofday(&tv_ini_scales_end, NULL); //initialize feature-size matrix gettimeofday(&tv_ini_feat_size_start, NULL); int *featsize=ini_featsize(MO->MI); gettimeofday(&tv_ini_feat_size_end, NULL); //calculate feature pyramid t1=clock(); gettimeofday(&tv_calc_f_pyramid_start, NULL); FLOAT **feature=calc_f_pyramid(IM,MO->MI,featsize,scales); gettimeofday(&tv_calc_f_pyramid_end, NULL); tvsub(&tv_calc_f_pyramid_end, &tv_calc_f_pyramid_start, &tv); time_kernel += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; time_calc_f_pyramid += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; printf("\n"); printf("calc_f_pyramid %f[ms]\n", time_calc_f_pyramid); t2=clock(); //detect boundary boxes gettimeofday(&tv_get_boxes_start, NULL); FLOAT *boxes = get_boxes(feature,scales,featsize,MO,D_NUMS,A_SCORE,thresh); gettimeofday(&tv_get_boxes_end, NULL); t3=clock(); #if 1 // tvsub(&tv_ini_scales_end, &tv_ini_scales_start, &tv); // time_ini_scales = tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; // tvsub(&tv_ini_feat_size_end, &tv_ini_feat_size_start, &tv); // time_ini_feat_size = tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; // tvsub(&tv_calc_f_pyramid_end, &tv_calc_f_pyramid_start, &tv); // time_calc_f_pyramid = tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; // tvsub(&tv_get_boxes_end, &tv_get_boxes_start, &tv); // time_get_boxes = tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; // printf("ini_scales : %f\n", time_ini_scales); // printf("ini_feat_size : %f\n", time_ini_feat_size); // printf("calc_f_pyramid : %f\n", time_calc_f_pyramid); // printf("get_boxes : %f\n", time_get_boxes); printf("\n"); #endif s_free(scales); //release scale-information s_free(featsize); //release feat size information free_features(feature,MO->MI); return boxes; }
bool Run(Node *node) { const Tensor *loc_tensor = node->GetInputTensor(0); const Tensor *conf_tensor = node->GetInputTensor(1); const Tensor *priorbox_tensor = node->GetInputTensor(2); Tensor *output_tensor = node->GetOutputTensor(0); DetectionOutput *detect_op = dynamic_cast<DetectionOutput *>(node->GetOp()); DetectionOutputParam *param_ = detect_op->GetParam(); //location [b,num_prior*4,1,1] float *location = (float *)get_tensor_mem(loc_tensor); //confidence [b,num_prior*21,1,1] float *confidence = (float *)get_tensor_mem(conf_tensor); //priorbox [b,2,num_prior*4,1] float *priorbox = (float *)get_tensor_mem(priorbox_tensor); const std::vector<int>& dims=priorbox_tensor->GetShape().GetDim(); const int num_priorx4=dims[2]; const int num_prior =num_priorx4/4; const int num_classes = param_->num_classes; //const int batch=dims[0]; // only support for batch=1 //for(int b=0;b<batch;b++) //{ int b=0; float* loc_ptr=location+b*num_priorx4; float* conf_ptr=confidence+b*num_prior*num_classes; float* prior_ptr=priorbox+b*num_priorx4*2; std::vector<Box> boxes(num_prior); get_boxes(boxes, num_prior,loc_ptr,prior_ptr); std::vector< std::vector<Box> > all_class_bbox_rects; all_class_bbox_rects.resize(num_classes); // start from 1 to ignore background class for(int i=1;i<num_classes;i++) { std::vector<Box> class_box; for(int j=0;j<num_prior;j++) { float score= conf_ptr[j*num_classes +i]; if(score > param_->confidence_threshold) { boxes[j].score=score; boxes[j].class_idx=i; class_box.push_back(boxes[j]); } } //sort std::sort(class_box.begin(),class_box.end(), [](const Box& a, const Box& b) {return a.score > b.score;}); // keep nms_top_k if (param_->nms_top_k < (int)class_box.size()) { class_box.resize(param_->nms_top_k); } // apply nms std::vector<int> picked; nms_sorted_bboxes(class_box, picked, param_->nms_threshold); // select for (int j = 0; j < (int)picked.size(); j++) { int z = picked[j]; all_class_bbox_rects[i].push_back(class_box[z]); } } // gather all class std::vector< Box> bbox_rects; for (int i = 0; i < num_classes; i++) { const std::vector<Box>& class_bbox_rects = all_class_bbox_rects[i]; bbox_rects.insert(bbox_rects.end(), class_bbox_rects.begin(), class_bbox_rects.end()); } // global sort inplace std::sort(bbox_rects.begin(),bbox_rects.end(), [](const Box& a, const Box& b) {return a.score > b.score;}); // keep_top_k if (param_->keep_top_k < (int)bbox_rects.size()) { bbox_rects.resize(param_->keep_top_k); } // output [b,num,6,1] int num_detected = bbox_rects.size(); int total_size=num_detected*6*4; // alloc mem void * mem_addr=mem_alloc(total_size); set_tensor_mem(output_tensor,mem_addr,total_size,mem_free); float *output = (float *)get_tensor_mem(output_tensor); TShape &out_shape = output_tensor->GetShape(); std::vector<int> outdim={1,num_detected,6,1}; out_shape.SetDim(outdim); for (int i = 0; i < num_detected; i++) { const Box& r = bbox_rects[i]; float* outptr = output+i*6; outptr[0] = r.class_idx; outptr[1] = r.score; outptr[2] = r.x0; outptr[3] = r.y0; outptr[4] = r.x1; outptr[5] = r.y1; } return true; }
void extract_boxes(char *cfgfile, char *weightfile) { network net = parse_network_cfg(cfgfile); if(weightfile){ load_weights(&net, weightfile); } set_batch_network(&net, 1); fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); srand(time(0)); char *val_images = "/home/pjreddie/data/voc/test/train.txt"; list *plist = get_paths(val_images); char **paths = (char **)list_to_array(plist); layer l = net.layers[net.n - 1]; int num_boxes = l.side; int num = l.n; int classes = l.classes; int j; box *boxes = calloc(num_boxes*num_boxes*num, sizeof(box)); float **probs = calloc(num_boxes*num_boxes*num, sizeof(float *)); for(j = 0; j < num_boxes*num_boxes*num; ++j) probs[j] = calloc(classes+1, sizeof(float *)); int N = plist->size; int i=0; int k; int count = 0; float iou_thresh = .3; for (i = 0; i < N; ++i) { fprintf(stderr, "%5d %5d\n", i, count); char *path = paths[i]; image orig = load_image_color(path, 0, 0); image resized = resize_image(orig, net.w, net.h); float *X = resized.data; float *predictions = network_predict(net, X); get_boxes(predictions+1+classes, num, num_boxes, 5+classes, boxes); get_probs(predictions, num*num_boxes*num_boxes, classes, 5+classes, probs); char *labelpath = find_replace(path, "images", "labels"); labelpath = find_replace(labelpath, "JPEGImages", "labels"); labelpath = find_replace(labelpath, ".jpg", ".txt"); labelpath = find_replace(labelpath, ".JPEG", ".txt"); int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); FILE *label = stdin; for(k = 0; k < num_boxes*num_boxes*num; ++k){ int overlaps = 0; for (j = 0; j < num_labels; ++j) { box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; float iou = box_iou(boxes[k], t); if (iou > iou_thresh){ if (!overlaps) { char buff[256]; sprintf(buff, "/data/extracted/labels/%d.txt", count); label = fopen(buff, "w"); overlaps = 1; } fprintf(label, "%d %f\n", truth[j].id, iou); } } if (overlaps) { char buff[256]; sprintf(buff, "/data/extracted/imgs/%d", count++); int dx = (boxes[k].x - boxes[k].w/2) * orig.w; int dy = (boxes[k].y - boxes[k].h/2) * orig.h; int w = boxes[k].w * orig.w; int h = boxes[k].h * orig.h; image cropped = crop_image(orig, dx, dy, w, h); image sized = resize_image(cropped, 224, 224); #ifdef OPENCV save_image_jpg(sized, buff); #endif free_image(sized); free_image(cropped); fclose(label); } } free(truth); free_image(orig); free_image(resized); } }
void validate_recall(char *cfgfile, char *weightfile) { network net = parse_network_cfg(cfgfile); if(weightfile){ load_weights(&net, weightfile); } set_batch_network(&net, 1); fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); srand(time(0)); char *val_images = "/home/pjreddie/data/voc/test/2007_test.txt"; list *plist = get_paths(val_images); char **paths = (char **)list_to_array(plist); layer l = net.layers[net.n - 1]; int num_boxes = l.side; int num = l.n; int classes = l.classes; int j; box *boxes = calloc(num_boxes*num_boxes*num, sizeof(box)); float **probs = calloc(num_boxes*num_boxes*num, sizeof(float *)); for(j = 0; j < num_boxes*num_boxes*num; ++j) probs[j] = calloc(classes+1, sizeof(float *)); int N = plist->size; int i=0; int k; float iou_thresh = .5; float thresh = .1; int total = 0; int correct = 0; float avg_iou = 0; int nms = 1; int proposals = 0; int save = 1; for (i = 0; i < N; ++i) { char *path = paths[i]; image orig = load_image_color(path, 0, 0); image resized = resize_image(orig, net.w, net.h); float *X = resized.data; float *predictions = network_predict(net, X); get_boxes(predictions+1+classes, num, num_boxes, 5+classes, boxes); get_probs(predictions, num*num_boxes*num_boxes, classes, 5+classes, probs); if (nms) do_nms(boxes, probs, num*num_boxes*num_boxes, (classes>0) ? classes : 1, iou_thresh); char *labelpath = find_replace(path, "images", "labels"); labelpath = find_replace(labelpath, "JPEGImages", "labels"); labelpath = find_replace(labelpath, ".jpg", ".txt"); labelpath = find_replace(labelpath, ".JPEG", ".txt"); int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); for(k = 0; k < num_boxes*num_boxes*num; ++k){ if(probs[k][0] > thresh){ ++proposals; if(save){ char buff[256]; sprintf(buff, "/data/extracted/nms_preds/%d", proposals); int dx = (boxes[k].x - boxes[k].w/2) * orig.w; int dy = (boxes[k].y - boxes[k].h/2) * orig.h; int w = boxes[k].w * orig.w; int h = boxes[k].h * orig.h; image cropped = crop_image(orig, dx, dy, w, h); image sized = resize_image(cropped, 224, 224); #ifdef OPENCV save_image_jpg(sized, buff); #endif free_image(sized); free_image(cropped); sprintf(buff, "/data/extracted/nms_pred_boxes/%d.txt", proposals); char *im_id = basecfg(path); FILE *fp = fopen(buff, "w"); fprintf(fp, "%s %d %d %d %d\n", im_id, dx, dy, dx+w, dy+h); fclose(fp); free(im_id); } } } for (j = 0; j < num_labels; ++j) { ++total; box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; float best_iou = 0; for(k = 0; k < num_boxes*num_boxes*num; ++k){ float iou = box_iou(boxes[k], t); if(probs[k][0] > thresh && iou > best_iou){ best_iou = iou; } } avg_iou += best_iou; if(best_iou > iou_thresh){ ++correct; } } free(truth); free_image(orig); free_image(resized); fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); } }