void CvFaceElement::FindRects(IplImage* img, IplImage* thresh, int nLayers, int dMinSize) { FindContours(img, thresh, nLayers, dMinSize / 4); if (0 == m_seqRects->total) return; Energy(); cvSeqSort(m_seqRects, CompareEnergy, NULL); CvTrackingRect* pR = (CvTrackingRect*)cvGetSeqElem(m_seqRects, 0); if (m_seqRects->total < 32) { MergeRects(dMinSize / 8); Energy(); cvSeqSort(m_seqRects, CompareEnergy, NULL); } pR = (CvTrackingRect*)cvGetSeqElem(m_seqRects, 0); if ((pR->iEnergy > 100 && m_seqRects->total < 32) || (m_seqRects->total < 16)) { MergeRects(dMinSize / 4); Energy(); cvSeqSort(m_seqRects, CompareEnergy, NULL); } pR = (CvTrackingRect*)cvGetSeqElem(m_seqRects, 0); if ((pR->iEnergy > 100 && m_seqRects->total < 16) || (pR->iEnergy > 200 && m_seqRects->total < 32)) { MergeRects(dMinSize / 2); Energy(); cvSeqSort(m_seqRects, CompareEnergy, NULL); } }// void CvFaceElement::FindRects(IplImage* img, IplImage* thresh, int nLayers, int dMinSize)
void FaceDetection::FindContours(IplImage* imgGray) { ReallocImage(&m_imgThresh, cvGetSize(imgGray), 1); if (NULL == m_imgThresh) return; // int iNumLayers = m_iNumLayers; int iMinLevel = 0, iMaxLevel = 255, iStep = 255 / iNumLayers; ThresholdingParam(imgGray, iNumLayers, iMinLevel, iMaxLevel, iStep); // init cvReleaseMemStorage(&m_mstgContours); m_mstgContours = cvCreateMemStorage(); if (NULL == m_mstgContours) return; memset(m_seqContours, 0, sizeof(CvSeq*) * MAX_LAYERS); cvReleaseMemStorage(&m_mstgRects); m_mstgRects = cvCreateMemStorage(); if (NULL == m_mstgRects) return; m_seqRects = cvCreateSeq(0, sizeof(CvSeq), sizeof(CvContourRect), m_mstgRects); if (NULL == m_seqRects) return; // find contours for (int l = iMinLevel, i = 0; l < iMaxLevel; l += iStep, i++) { cvThreshold(imgGray, m_imgThresh, (double)l, (double)255, CV_THRESH_BINARY); if (cvFindContours(m_imgThresh, m_mstgContours, &m_seqContours[i], sizeof(CvContour), CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE)) AddContours2Rect(m_seqContours[i], l, i); } // sort rects cvSeqSort(m_seqRects, CompareContourRect, NULL); }// void FaceDetection::FindContours(IplImage* imgGray)
int SiftGPU::DoSift( IplImage* img ) { printf("\n ----------- DoSift START --------------- \n"); IplImage* init_img; IplImage*** dog_pyr; CvMemStorage* storage; CvSeq* features; int octvs, i, n = 0; meanFilter->CreateBuffersIn(4*img->width*img->height*sizeof(float),5); meanFilter->CreateBuffersOut(4*img->width*img->height*sizeof(float),3); /* check arguments */ if( ! img ) printf( "NULL pointer error, %s, line %d", __FILE__, __LINE__ ); /* build scale space pyramid; smallest dimension of top level is ~4 pixels */ init_img = CreateInitialImg( img, img_dbl, sigma ); octvs = log( (float)MIN( init_img->width, init_img->height ) ) / log((float)2) - 2; gauss_pyr = BuildGaussPyr( init_img, octvs, intvls, sigma ); dog_pyr = BuildDogPyr( gauss_pyr, octvs, intvls ); storage = cvCreateMemStorage( 0 ); features = ScaleSpaceExtrema( dog_pyr, octvs, intvls, contr_thr, curv_thr, storage ); /* sort features by decreasing scale and move from CvSeq to array */ cvSeqSort( features, (CvCmpFunc)FeatureCmp, NULL ); n = features->total; feat = (feature*)calloc( n, sizeof(feature) ); feat = (feature*)cvCvtSeqToArray( features, feat, CV_WHOLE_SEQ ); for( i = 0; i < n; i++ ) { free( feat[i].feature_data ); feat[i].feature_data = NULL; } cvReleaseMemStorage( &storage ); cvReleaseImage( &init_img ); ReleasePyr( &gauss_pyr, octvs, intvls + 3 ); ReleasePyr( &dog_pyr, octvs, intvls + 2 ); printf("Found: %d \n", n); printf("\n ----------- DoSift End --------------- \n"); return n; }
int SearchInMem( IplImage* RGBA, IplImage* depth, IplImage* mask, char* DBFolderName, char* categoryFolders[], int numCategories, char* outputFileNames[], float* outputScores, int num){ char cat_dir_path[FLEN]; // calculate features for the query image FEATURE feature; MakeFeatureInMem(RGBA, depth, mask, &feature); //temporary storage for files CvMemStorage* storage = cvCreateMemStorage(0); //store comp scores CvSeq* scores = cvCreateSeq(0, sizeof(CvSeq), sizeof(FileWithScore), storage); // loop through the files in the category folders in DB. for (int i=0; i<numCategories; i++){ strcpy(cat_dir_path, DBFolderName); strcat(cat_dir_path,"/"); strcat(cat_dir_path, categoryFolders[i]); SearchFolder(cat_dir_path, &feature, scores, CompareImage); } //sort images by scores cvSeqSort(scores, CompFileWithScore, NULL); //release memories for scores & strings for (int i=0; i<scores->total; i++){ FileWithScore* sc = (FileWithScore*) cvGetSeqElem(scores, i); if (i<num){ outputFileNames[i] = (char*)malloc(strlen(sc->img_file)); strcpy(outputFileNames[i], sc->img_file); outputScores[i] = sc->score; } //free(sc->img_file); } for (int i=scores->total; i<num; i++){ outputFileNames[i] = NULL; outputScores[i] = -1.0; } cvClearSeq(scores); cvReleaseMemStorage( &storage ); ReleaseFeature(&feature); return 0; }
/** Finds SIFT features in an image using user-specified parameter values. All detected features are stored in the array pointed to by \a feat. @param img the image in which to detect features @param fea a pointer to an array in which to store detected features @param intvls the number of intervals sampled per octave of scale space @param sigma the amount of Gaussian smoothing applied to each image level before building the scale space representation for an octave @param cont_thr a threshold on the value of the scale space function \f$\left|D(\hat{x})\right|\f$, where \f$\hat{x}\f$ is a vector specifying feature location and scale, used to reject unstable features; assumes pixel values in the range [0, 1] @param curv_thr threshold on a feature's ratio of principle curvatures used to reject features that are too edge-like @param img_dbl should be 1 if image doubling prior to scale space construction is desired or 0 if not @param descr_width the width, \f$n\f$, of the \f$n \times n\f$ array of orientation histograms used to compute a feature's descriptor @param descr_hist_bins the number of orientations in each of the histograms in the array used to compute a feature's descriptor @return Returns the number of keypoints stored in \a feat or -1 on failure @see sift_keypoints() */ int _sift_features( IplImage* img, struct feature** feat, int intvls, double sigma, double contr_thr, int curv_thr, int img_dbl, int descr_width, int descr_hist_bins ) { IplImage* init_img; IplImage*** gauss_pyr,***dog_pyr; CvMemStorage* storage; CvSeq* features; int octvs, i, n = 0; /* check arguments */ if( ! img ) fatal_error( "NULL pointer error, %s, line %d", __FILE__, __LINE__ ); if( ! feat ) fatal_error( "NULL pointer error, %s, line %d", __FILE__, __LINE__ ); /* build scale space pyramid; smallest dimension of top level is ~4 pixels */ init_img = create_init_img( img, img_dbl, sigma ); octvs = log( MIN( init_img->width, init_img->height ) ) / log(2) - 2; gauss_pyr = build_gauss_pyr( init_img, octvs, intvls, sigma ); dog_pyr = build_dog_pyr( gauss_pyr, octvs, intvls ); storage = cvCreateMemStorage( 0 ); //极值检测 features = scale_space_extrema( dog_pyr, octvs, intvls, contr_thr, curv_thr, storage ); //计算特征的尺度 calc_feature_scales( features, sigma, intvls ); //如果最先开始图像的尺寸被加倍了,那么调整features的参数 if( img_dbl ) adjust_for_img_dbl( features ); //计算特征点方向和幅值 calc_feature_oris( features, gauss_pyr ); //计算特征描述子,最难理解的函数 compute_descriptors( features, gauss_pyr, descr_width, descr_hist_bins ); /* sort features by decreasing scale and move from CvSeq to array */ cvSeqSort( features, (CvCmpFunc)feature_cmp, NULL ); n = features->total; *feat = calloc( n, sizeof(struct feature) ); *feat = cvCvtSeqToArray( features, *feat, CV_WHOLE_SEQ );//features 里面放的是feature* 数据,所以*feat而不是feat for( i = 0; i < n; i++ )//释放feature结构体中的feature_data,因为已经计算完了,得到了描述子 { free( (*feat)[i].feature_data ); (*feat)[i].feature_data = NULL; } cvReleaseMemStorage( &storage ); cvReleaseImage( &init_img ); release_pyr( &gauss_pyr, octvs, intvls + 3 ); release_pyr( &dog_pyr, octvs, intvls + 2 ); return n; }
/** Finds SIFT features in an image using user-specified parameter values. All detected features are stored in the array pointed to by \a feat. @param img the image in which to detect features @param fea a pointer to an array in which to store detected features @param intvls the number of intervals sampled per octave of scale space @param sigma the amount of Gaussian smoothing applied to each image level before building the scale space representation for an octave @param cont_thr a threshold on the value of the scale space function \f$\left|D(\hat{x})\right|\f$, where \f$\hat{x}\f$ is a vector specifying feature location and scale, used to reject unstable features; assumes pixel values in the range [0, 1] @param curv_thr threshold on a feature's ratio of principle curvatures used to reject features that are too edge-like @param img_dbl should be 1 if image doubling prior to scale space construction is desired or 0 if not @param descr_width the width, \f$n\f$, of the \f$n \times n\f$ array of orientation histograms used to compute a feature's descriptor @param descr_hist_bins the number of orientations in each of the histograms in the array used to compute a feature's descriptor @return Returns the number of keypoints stored in \a feat or -1 on failure @see sift_keypoints() */ int _sift_features( IplImage* img, struct feature** feat, int intvls, double sigma, double contr_thr, int curv_thr, int img_dbl, int descr_width, int descr_hist_bins ) { IplImage *init_img; IplImage ***gauss_pyr, ***dog_pyr; CvMemStorage *storage; CvSeq *features; int octvs, i, n = 0; /* build scale space pyramid; smallest dimension of top level is ~4 pixels */ init_img = create_init_img( img, img_dbl, sigma ); octvs = log( MIN( init_img->width, init_img->height ) ) / log(2) - 2; gauss_pyr = build_gauss_pyr( init_img, octvs, intvls, sigma ); dog_pyr = build_dog_pyr( gauss_pyr, octvs, intvls ); storage = cvCreateMemStorage( 0 ); features = scale_space_extrema( dog_pyr, octvs, intvls, contr_thr, curv_thr, storage ); calc_feature_scales( features, sigma, intvls ); if( img_dbl ) adjust_for_img_dbl( features ); calc_feature_oris( features, gauss_pyr ); compute_descriptors( features, gauss_pyr, descr_width, descr_hist_bins ); /* sort features by decreasing scale and move from CvSeq to array */ cvSeqSort( features, (CvCmpFunc)feature_cmp, NULL ); n = features->total; *feat = calloc( n, sizeof(struct feature) ); *feat = cvCvtSeqToArray( features, *feat, CV_WHOLE_SEQ ); for( i = 0; i < n; i++ ) { free( (*feat)[i].feature_data ); (*feat)[i].feature_data = NULL; } cvReleaseMemStorage( &storage ); cvReleaseImage( &init_img ); release_pyr( &gauss_pyr, octvs, intvls + 3 ); release_pyr( &dog_pyr, octvs, intvls + 2 ); return n; }
/** Finds SIFT features in an image using user-specified parameter values. All detected features are stored in the array pointed to by \a feat. @param img the image in which to detect features @param feat a pointer to an array in which to store detected features @param intvls the number of intervals sampled per octave of scale space @param sigma the amount of Gaussian smoothing applied to each image level before building the scale space representation for an octave @param cont_thr a threshold on the value of the scale space function \f$\left|D(\hat{x})\right|\f$, where \f$\hat{x}\f$ is a vector specifying feature location and scale, used to reject unstable features; assumes pixel values in the range [0, 1] @param curv_thr threshold on a feature's ratio of principle curvatures used to reject features that are too edge-like @param img_dbl should be 1 if image doubling prior to scale space construction is desired or 0 if not @param descr_width the width, \f$n\f$, of the \f$n \times n\f$ array of orientation histograms used to compute a feature's descriptor @param descr_hist_bins the number of orientations in each of the histograms in the array used to compute a feature's descriptor @return Returns the number of keypoints stored in \a feat or -1 on failure @see sift_keypoints() */ int _sift_features( IplImage* img, struct feature** feat, int intvls, double sigma, double contr_thr, int curv_thr, int img_dbl, int descr_width, int descr_hist_bins ) { IplImage* init_img;//原图经初始化后的图像,归一化的32位灰度图 IplImage*** gauss_pyr, *** dog_pyr;//三级指针,高斯金字塔图像组,DoG金字塔图像组 CvMemStorage* storage;//存储器 CvSeq* features;//存储特征点的序列,序列中存放的是struct feature类型的指针 int octvs, i, n = 0; //输入参数检查 /* check arguments */ if( ! img ) fatal_error( "NULL pointer error, %s, line %d", __FILE__, __LINE__ ); if( ! feat ) fatal_error( "NULL pointer error, %s, line %d", __FILE__, __LINE__ ); /* build scale space pyramid; smallest dimension of top level is ~4 pixels */ //██步骤一:██:建立尺度空间,即建立高斯差分(DoG)金字塔dog_pyr //将原图转换为32位灰度图并归一化,然后进行一次高斯平滑,并根据参数img_dbl决定是否将图像尺寸放大为原图的2倍 init_img = create_init_img( img, img_dbl, sigma ); //计算高斯金字塔的组数octvs octvs = log( MIN( init_img->width, init_img->height ) ) / log(2) - 2; //为了保证连续性,在每一层的顶层继续用高斯模糊生成3幅图像,所以高斯金字塔每组有intvls+3层,DOG金字塔每组有intvls+2层 //建立高斯金字塔gauss_pyr,是一个octvs*(intvls+3)的图像数组 gauss_pyr = build_gauss_pyr( init_img, octvs, intvls, sigma ); //建立高斯差分(DoG)金字塔dog_pyr,是一个octvs*(intvls+2)的图像数组 dog_pyr = build_dog_pyr( gauss_pyr, octvs, intvls ); //██步骤二:██:在尺度空间中检测极值点,并进行精确定位和筛选 //创建默认大小的内存存储器 storage = cvCreateMemStorage( 0 ); //在尺度空间中检测极值点,通过插值精确定位,去除低对比度的点,去除边缘点,返回检测到的特征点序列 features = scale_space_extrema( dog_pyr, octvs, intvls, contr_thr, curv_thr, storage ); //计算特征点序列features中每个特征点的尺度 calc_feature_scales( features, sigma, intvls ); //若设置了将图像放大为原图的2倍 if( img_dbl )//将特征点序列中每个特征点的坐标减半(当设置了将图像放大为原图的2倍时,特征点检测完之后调用) adjust_for_img_dbl( features ); //██步骤三:██:特征点方向赋值,完成此步骤后,每个特征点有三个信息:位置、尺度、方向 //计算每个特征点的梯度直方图,找出其主方向,若一个特征点有不止一个主方向,将其分为两个特征点 calc_feature_oris( features, gauss_pyr ); //██步骤四:██:计算特征描述子 //计算特征点序列中每个特征点的特征描述子向量 compute_descriptors( features, gauss_pyr, descr_width, descr_hist_bins ); /* sort features by decreasing scale and move from CvSeq to array */ //按特征点尺度的降序排列序列中元素的顺序,feature_cmp是自定义的比较函数 cvSeqSort( features, (CvCmpFunc)feature_cmp, NULL ); //将CvSeq类型的特征点序列features转换为通用的struct feature类型的数组feat n = features->total;//特征点个数 *feat = calloc( n, sizeof(struct feature) );//分配控件 //将序列features中的元素拷贝到数组feat中,返回数组指针给feat *feat = cvCvtSeqToArray( features, *feat, CV_WHOLE_SEQ ); //释放特征点数组feat中所有特征点的feature_data成员,因为此成员中的数据在检测完特征点后就没用了 for( i = 0; i < n; i++ ) { free( (*feat)[i].feature_data ); (*feat)[i].feature_data = NULL; } //释放各种临时数据的存储空间 cvReleaseMemStorage( &storage );//释放内存存储器 cvReleaseImage( &init_img );//释放初始化后的图像 release_pyr( &gauss_pyr, octvs, intvls + 3 );//释放高斯金字塔图像组 release_pyr( &dog_pyr, octvs, intvls + 2 );//释放高斯差分金字塔图像组 return n;//返回检测到的特征点的个数 }
int main(int argc, char *argv[]) { /* initialisation of the parameters */ LOOV_params *param=alloc_init_LOOV_params(); param = parse_arg(param, argc, argv); /* initialisation of the boxes sequences */ CvMemStorage* storage_box = cvCreateMemStorage(0); CvSeq* seq_box = cvCreateSeq( 0, sizeof(CvSeq), sizeof(box*), storage_box); // list of boxes that are shown in the current frame CvMemStorage* storage_box_final = cvCreateMemStorage(0); CvSeq* seq_box_final = cvCreateSeq( 0, sizeof(CvSeq), sizeof(box*), storage_box_final); // boxes list that no longer appear if (param->videoName==NULL) { fprintf(stderr,"enter video name after parameter -v\n"); exit(0); } CvCapture* capture = cvCaptureFromFile(param->videoName); // read video if (!capture) { printf("error on video %s\n",param->videoName); exit(1); } cvSetCaptureProperty(capture, CV_CAP_PROP_POS_FRAMES, param->startFrame); // get video property IplImage* frame_temp = cvQueryFrame( capture ); // get the first frame /* computed parameters depending on the image size */ int video_depth=1; for (int i=0;i<frame_temp->depth;i++) video_depth=video_depth*2; // find the max threshold param->max_thr = video_depth-1; param->it_connected_caractere = round_me((float)frame_temp->width*param->aspect_ratio*param->it_connected_caractere); param->y_min_size_text = round_me((float)frame_temp->height*param->y_min_size_text); param->x_min_size_text = round_me((float)frame_temp->width*param->aspect_ratio*param->x_min_size_text); /* read mask image, to process only a part of the images */ IplImage* frame=cvCreateImage(cvSize(frame_temp->width*param->aspect_ratio, frame_temp->height), frame_temp->depth, frame_temp->nChannels); cvResize(frame_temp, frame, CV_INTER_CUBIC); IplImage* im_mask=0; if (param->path_im_mask!=NULL) { im_mask=cvLoadImage(param->path_im_mask, CV_LOAD_IMAGE_GRAYSCALE); if ((frame->width!=im_mask->width) || (frame->height!=im_mask->height)){ IplImage* im_mask_resize = cvCreateImage(cvSize(frame->width, frame->height),im_mask->depth, 1); // resize mask to the images video size cvResize(im_mask, im_mask_resize, CV_INTER_CUBIC); cvReleaseImage(&im_mask); im_mask = cvCloneImage(im_mask_resize); cvReleaseImage(&im_mask_resize); } } printf("processing of frames from %d to %d\n", param->startFrame, param->startFrame+param->nbFrame); IplImage* frame_BW=cvCreateImage(cvSize(frame_temp->width*param->aspect_ratio, frame_temp->height), frame_temp->depth, 1); IplImage* frame_BW_temp=cvCreateImage(cvSize(frame_temp->width, frame_temp->height), frame_temp->depth, 1); int frameNum=param->startFrame; while((frameNum<param->startFrame+param->nbFrame) && (frame_temp = cvQueryFrame( capture ))) { // capture the current frame and put it in frame_temp frameNum++; if( frame_temp ) { cvCvtColor(frame_temp, frame_BW_temp, CV_RGB2GRAY); // convert frame from color to gray cvResize(frame_temp, frame, CV_INTER_CUBIC); // resize for aspect ratio cvResize(frame_BW_temp, frame_BW, CV_INTER_CUBIC); cvCvtColor(frame, frame_BW, CV_RGB2GRAY); IplImage* im = cvCloneImage(frame_BW); im = sobel_double_H(im, param); // find edge of characters if (param->path_im_mask!=NULL) cvAnd(im,im_mask,im, NULL); // apply mask if it exists im = connected_caractere(im, param); // connect edges of a same line im = delete_horizontal_bar(im, param); // filter noise on the resulting image im = delete_vertical_bar(im, param); // filter noise on the resulting image if (param->path_im_mask!=NULL) cvAnd(im,im_mask,im, NULL); // apply mask if it exists spatial_detection_box(im, seq_box, frameNum, frame_BW, frame, frame, im_mask, param); // Detect boxes spatial position temporal_detection_box(seq_box, seq_box_final, frameNum, frame_BW, im_mask, param); // Temporal tracking of the boxes cvReleaseImage(&im); } } cvReleaseImage(&frame_BW); cvReleaseImage(&im_mask); /* finish the transcriptin of the boxes in seq_box */ for (int i=0;i<seq_box->total;i++){ box* pt_search_box = *(box**)cvGetSeqElem(seq_box, i); if (pt_search_box->stop_frame - pt_search_box->start_frame > param->min_duration_box) { cvSeqPush(seq_box_final, &pt_search_box); // copy boxes in seq_box_final cvSeqSort(pt_search_box->seq_thr_t, cmp_thr, 0); int* thr_med = (int*)cvGetSeqElem( pt_search_box->seq_thr_t, (int)(pt_search_box->nb_img_detect_avg_t/2) ); set_threshold_OCR_Image(pt_search_box->im_average_mask_t,*thr_med); transcription_box(pt_search_box, param); // process transcription of the boxes if (param->print_text == 1){ // print transcription printf("box_%d img_avg ymin=%d ymax=%d xmin=%d xmax=%d " ,pt_search_box->num ,round_me(pt_search_box->ymin_avg), round_me(pt_search_box->xmin_avg), round_me(pt_search_box->ymax_avg), round_me(pt_search_box->xmax_avg)); print_transcription_image(get_img_OCR_Image(pt_search_box->im_average_mask_t), round_me(pt_search_box->thr_med), param); } } else free_box(pt_search_box); } /* Write transcription in output_path+".OCR" file */ char * file_txt_temp=sprintf_alloc("%s.OCR", param->output_path); FILE * file_txt = fopen(file_txt_temp, "w"); free(file_txt_temp); cvSeqSort( seq_box_final, cmp_box_by_frame, 0); for (int i=0;i<seq_box_final->total;i++){ file_print_box(file_txt, *(box**)cvGetSeqElem(seq_box_final, i), param); // } fclose(file_txt); /* free memory */ for (int i=0;i<seq_box_final->total;i++){ free_box(*(box**)cvGetSeqElem(seq_box_final, i)); } cvClearSeq(seq_box); cvReleaseMemStorage( &storage_box ); cvReleaseImage(&im_mask); cvClearSeq(seq_box_final); cvReleaseMemStorage( &storage_box_final ); cvReleaseCapture( &capture ); return 0; }