main(int argc, char **argv) { l_int32 i, w, h, d; l_float32 time; PIX *pixs, *pixf, *pixd; PIXA *pixa; char *filein, *fileout; static char mainName[] = "edgetest"; if (argc != 3) exit(ERROR_INT(" Syntax: edgetest filein fileout", mainName, 1)); filein = argv[1]; fileout = argv[2]; if ((pixs = pixRead(filein)) == NULL) exit(ERROR_INT("pix not made", mainName, 1)); pixGetDimensions(pixs, &w, &h, &d); if (d != 8) exit(ERROR_INT("pix not 8 bpp", mainName, 1)); /* Speed: about 12 Mpix/GHz/sec */ startTimer(); pixf = pixSobelEdgeFilter(pixs, L_HORIZONTAL_EDGES); pixd = pixThresholdToBinary(pixf, 60); pixInvert(pixd, pixd); time = stopTimer(); fprintf(stderr, "Time = %7.3f sec\n", time); fprintf(stderr, "MPix/sec: %7.3f\n", 0.000001 * w * h / time); pixDisplay(pixs, 0, 0); pixInvert(pixf, pixf); pixDisplay(pixf, 480, 0); pixDisplay(pixd, 960, 0); pixWrite(fileout, pixf, IFF_PNG); pixDestroy(&pixd); /* Threshold at different values */ pixInvert(pixf, pixf); for (i = 10; i <= 120; i += 10) { pixd = pixThresholdToBinary(pixf, i); pixInvert(pixd, pixd); pixDisplayWrite(pixd, 1); pixDestroy(&pixd); } pixDestroy(&pixf); /* Display tiled */ pixa = pixaReadFiles("/tmp", "junk_write_display"); pixd = pixaDisplayTiledAndScaled(pixa, 8, 400, 3, 0, 25, 2); pixWrite("/tmp/junktiles.jpg", pixd, IFF_JFIF_JPEG); pixDestroy(&pixd); pixaDestroy(&pixa); pixDestroy(&pixs); exit(0); }
jlong Java_com_googlecode_leptonica_android_Edge_nativePixSobelEdgeFilter(JNIEnv *env, jclass clazz, jlong nativePix, jint orientFlag) { PIX *pixs = (PIX *) nativePix; PIX *pixd = pixSobelEdgeFilter(pixs, (l_int32) orientFlag); return (jlong) pixd; }
char* _process_frame_tickertext(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index) { PIX *im; PIX *edge_im; PIX *lum_im; PIX *feat_im; char *subtitle_text=NULL; im = pixCreate(width,height,32); lum_im = pixCreate(width,height,32); feat_im = pixCreate(width,height,32); int i,j; for(i=(92*height)/100;i<height;i++) { for(j=0;j<width;j++) { int p=j*3+i*frame->linesize[0]; int r=frame->data[0][p]; int g=frame->data[0][p+1]; int b=frame->data[0][p+2]; pixSetRGBPixel(im,j,i,r,g,b); float L,A,B; rgb_to_lab((float)r,(float)g,(float)b,&L,&A,&B); if(L > ctx->lum_thresh) pixSetRGBPixel(lum_im,j,i,255,255,255); else pixSetRGBPixel(lum_im,j,i,0,0,0); } } //Handle the edge image edge_im = pixCreate(width,height,8); edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); edge_im = pixDilateGray(edge_im, 21, 11); edge_im = pixThresholdToBinary(edge_im,50); for(i=92*(height/100);i<height;i++) { for(j=0;j<width;j++) { unsigned int p1,p2,p3; pixGetPixel(edge_im,j,i,&p1); // pixGetPixel(pixd,j,i,&p2); pixGetPixel(lum_im,j,i,&p3); if(p1==0&&p3>0) pixSetRGBPixel(feat_im,j,i,255,255,255); else pixSetRGBPixel(feat_im,j,i,0,0,0); } } // Tesseract OCR for the ticker text here subtitle_text = get_ocr_text_simple(ctx, lum_im); char write_path[100]; sprintf(write_path,"./lum_im%04d.jpg",index); pixWrite(write_path,lum_im,IFF_JFIF_JPEG); sprintf(write_path,"./im%04d.jpg",index); pixWrite(write_path,im,IFF_JFIF_JPEG); pixDestroy(&lum_im); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&feat_im); return subtitle_text; }
void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int timestamp) { // Debug: Display the frame after processing PIX *im; im = pixCreate(width,height,32); PIX *hue_im = pixCreate(width,height,32); int i,j; for(i=0;i<height;i++) { for(j=0;j<width;j++) { int p=j*3+i*frame->linesize[0]; int r=frame->data[0][p]; int g=frame->data[0][p+1]; int b=frame->data[0][p+2]; pixSetRGBPixel(im,j,i,r,g,b); float H,S,V; rgb_to_hsv((float)r,(float)g,(float)b,&H,&S,&V); if(abs(H-ctx->hue)<20) { pixSetRGBPixel(hue_im,j,i,r,g,b); } } } PIX *edge_im = pixCreate(width,height,8),*edge_im_2 = pixCreate(width,height,8); edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); edge_im = pixDilateGray(edge_im, 21, 1); edge_im = pixThresholdToBinary(edge_im,50); PIX *pixd = pixCreate(width,height,1); pixSauvolaBinarize(pixConvertRGBToGray(hue_im,0.0,0.0,0.0), 15, 0.3, 1, NULL, NULL, NULL, &pixd); edge_im_2 = pixConvertRGBToGray(hue_im,0.0,0.0,0.0); edge_im_2 = pixDilateGray(edge_im_2, 5, 5); PIX *feat_im = pixCreate(width,height,32); for(i=3*(height/4);i<height;i++) { for(j=0;j<width;j++) { unsigned int p1,p2,p3,p4; pixGetPixel(edge_im,j,i,&p1); pixGetPixel(pixd,j,i,&p2); // pixGetPixel(hue_im,j,i,&p3); pixGetPixel(edge_im_2,j,i,&p4); if(p1==0&&p2==0&&p4>0)//if(p4>0&&p1==0)//if(p2==0&&p1==0&&p3>0) { pixSetRGBPixel(feat_im,j,i,255,255,255); } } } char *txt=NULL; // txt = get_ocr_text_simple(ctx, feat_im); // txt=get_ocr_text_wordwise_threshold(ctx, feat_im, ctx->conf_thresh); // if(txt != NULL)printf("%s\n", txt); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&feat_im); pixDestroy(&edge_im_2); pixDestroy(&pixd); }
char* _process_frame_white_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index) { //printf("frame : %04d\n", index); PIX *im; PIX *edge_im; PIX *lum_im; PIX *feat_im; char *subtitle_text=NULL; im = pixCreate(width,height,32); lum_im = pixCreate(width,height,32); feat_im = pixCreate(width,height,32); int i,j; for(i=(3*height)/4;i<height;i++) { for(j=0;j<width;j++) { int p=j*3+i*frame->linesize[0]; int r=frame->data[0][p]; int g=frame->data[0][p+1]; int b=frame->data[0][p+2]; pixSetRGBPixel(im,j,i,r,g,b); float L,A,B; rgb_to_lab((float)r,(float)g,(float)b,&L,&A,&B); if(L > ctx->lum_thresh) pixSetRGBPixel(lum_im,j,i,255,255,255); else pixSetRGBPixel(lum_im,j,i,0,0,0); } } //Handle the edge image edge_im = pixCreate(width,height,8); edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); edge_im = pixDilateGray(edge_im, 21, 11); edge_im = pixThresholdToBinary(edge_im,50); for(i=3*(height/4);i<height;i++) { for(j=0;j<width;j++) { unsigned int p1,p2,p3; pixGetPixel(edge_im,j,i,&p1); // pixGetPixel(pixd,j,i,&p2); pixGetPixel(lum_im,j,i,&p3); if(p1==0&&p3>0) pixSetRGBPixel(feat_im,j,i,255,255,255); else pixSetRGBPixel(feat_im,j,i,0,0,0); } } if(ctx->detect_italics) { ctx->ocr_mode = HARDSUBX_OCRMODE_WORD; } // TESSERACT OCR FOR THE FRAME HERE switch(ctx->ocr_mode) { case HARDSUBX_OCRMODE_WORD: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_wordwise_threshold(ctx, lum_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_wordwise(ctx, lum_im); break; case HARDSUBX_OCRMODE_LETTER: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_letterwise_threshold(ctx, lum_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_letterwise(ctx, lum_im); break; case HARDSUBX_OCRMODE_FRAME: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_simple_threshold(ctx, lum_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_simple(ctx, lum_im); break; default: fatal(EXIT_MALFORMED_PARAMETER,"Invalid OCR Mode"); } pixDestroy(&lum_im); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&feat_im); return subtitle_text; }
char *_process_frame_color_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index) { char *subtitle_text=NULL; PIX *im; im = pixCreate(width,height,32); PIX *hue_im = pixCreate(width,height,32); int i,j; for(i=0;i<height;i++) { for(j=0;j<width;j++) { int p=j*3+i*frame->linesize[0]; int r=frame->data[0][p]; int g=frame->data[0][p+1]; int b=frame->data[0][p+2]; pixSetRGBPixel(im,j,i,r,g,b); float H,S,V; rgb_to_hsv((float)r,(float)g,(float)b,&H,&S,&V); if(abs(H-ctx->hue)<20) { pixSetRGBPixel(hue_im,j,i,r,g,b); } } } PIX *edge_im = pixCreate(width,height,8),*edge_im_2 = pixCreate(width,height,8); edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); edge_im = pixDilateGray(edge_im, 21, 1); edge_im = pixThresholdToBinary(edge_im,50); PIX *pixd = pixCreate(width,height,1); pixSauvolaBinarize(pixConvertRGBToGray(hue_im,0.0,0.0,0.0), 15, 0.3, 1, NULL, NULL, NULL, &pixd); edge_im_2 = pixConvertRGBToGray(hue_im,0.0,0.0,0.0); edge_im_2 = pixDilateGray(edge_im_2, 5, 5); PIX *feat_im = pixCreate(width,height,32); for(i=3*(height/4);i<height;i++) { for(j=0;j<width;j++) { unsigned int p1,p2,p3,p4; pixGetPixel(edge_im,j,i,&p1); pixGetPixel(pixd,j,i,&p2); // pixGetPixel(hue_im,j,i,&p3); pixGetPixel(edge_im_2,j,i,&p4); if(p1==0&&p2==0&&p4>0)//if(p4>0&&p1==0)//if(p2==0&&p1==0&&p3>0) { pixSetRGBPixel(feat_im,j,i,255,255,255); } } } if(ctx->detect_italics) { ctx->ocr_mode = HARDSUBX_OCRMODE_WORD; } // TESSERACT OCR FOR THE FRAME HERE switch(ctx->ocr_mode) { case HARDSUBX_OCRMODE_WORD: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_wordwise_threshold(ctx, feat_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_wordwise(ctx, feat_im); break; case HARDSUBX_OCRMODE_LETTER: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_letterwise_threshold(ctx, feat_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_letterwise(ctx, feat_im); break; case HARDSUBX_OCRMODE_FRAME: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_simple_threshold(ctx, feat_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_simple(ctx, feat_im); break; default: fatal(EXIT_MALFORMED_PARAMETER,"Invalid OCR Mode"); } pixDestroy(&feat_im); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&hue_im); return subtitle_text; }