char* _process_frame_tickertext(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index) { PIX *im; PIX *edge_im; PIX *lum_im; PIX *feat_im; char *subtitle_text=NULL; im = pixCreate(width,height,32); lum_im = pixCreate(width,height,32); feat_im = pixCreate(width,height,32); int i,j; for(i=(92*height)/100;i<height;i++) { for(j=0;j<width;j++) { int p=j*3+i*frame->linesize[0]; int r=frame->data[0][p]; int g=frame->data[0][p+1]; int b=frame->data[0][p+2]; pixSetRGBPixel(im,j,i,r,g,b); float L,A,B; rgb_to_lab((float)r,(float)g,(float)b,&L,&A,&B); if(L > ctx->lum_thresh) pixSetRGBPixel(lum_im,j,i,255,255,255); else pixSetRGBPixel(lum_im,j,i,0,0,0); } } //Handle the edge image edge_im = pixCreate(width,height,8); edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); edge_im = pixDilateGray(edge_im, 21, 11); edge_im = pixThresholdToBinary(edge_im,50); for(i=92*(height/100);i<height;i++) { for(j=0;j<width;j++) { unsigned int p1,p2,p3; pixGetPixel(edge_im,j,i,&p1); // pixGetPixel(pixd,j,i,&p2); pixGetPixel(lum_im,j,i,&p3); if(p1==0&&p3>0) pixSetRGBPixel(feat_im,j,i,255,255,255); else pixSetRGBPixel(feat_im,j,i,0,0,0); } } // Tesseract OCR for the ticker text here subtitle_text = get_ocr_text_simple(ctx, lum_im); char write_path[100]; sprintf(write_path,"./lum_im%04d.jpg",index); pixWrite(write_path,lum_im,IFF_JFIF_JPEG); sprintf(write_path,"./im%04d.jpg",index); pixWrite(write_path,im,IFF_JFIF_JPEG); pixDestroy(&lum_im); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&feat_im); return subtitle_text; }
void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int timestamp) { // Debug: Display the frame after processing PIX *im; im = pixCreate(width,height,32); PIX *hue_im = pixCreate(width,height,32); int i,j; for(i=0;i<height;i++) { for(j=0;j<width;j++) { int p=j*3+i*frame->linesize[0]; int r=frame->data[0][p]; int g=frame->data[0][p+1]; int b=frame->data[0][p+2]; pixSetRGBPixel(im,j,i,r,g,b); float H,S,V; rgb_to_hsv((float)r,(float)g,(float)b,&H,&S,&V); if(abs(H-ctx->hue)<20) { pixSetRGBPixel(hue_im,j,i,r,g,b); } } } PIX *edge_im = pixCreate(width,height,8),*edge_im_2 = pixCreate(width,height,8); edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); edge_im = pixDilateGray(edge_im, 21, 1); edge_im = pixThresholdToBinary(edge_im,50); PIX *pixd = pixCreate(width,height,1); pixSauvolaBinarize(pixConvertRGBToGray(hue_im,0.0,0.0,0.0), 15, 0.3, 1, NULL, NULL, NULL, &pixd); edge_im_2 = pixConvertRGBToGray(hue_im,0.0,0.0,0.0); edge_im_2 = pixDilateGray(edge_im_2, 5, 5); PIX *feat_im = pixCreate(width,height,32); for(i=3*(height/4);i<height;i++) { for(j=0;j<width;j++) { unsigned int p1,p2,p3,p4; pixGetPixel(edge_im,j,i,&p1); pixGetPixel(pixd,j,i,&p2); // pixGetPixel(hue_im,j,i,&p3); pixGetPixel(edge_im_2,j,i,&p4); if(p1==0&&p2==0&&p4>0)//if(p4>0&&p1==0)//if(p2==0&&p1==0&&p3>0) { pixSetRGBPixel(feat_im,j,i,255,255,255); } } } char *txt=NULL; // txt = get_ocr_text_simple(ctx, feat_im); // txt=get_ocr_text_wordwise_threshold(ctx, feat_im, ctx->conf_thresh); // if(txt != NULL)printf("%s\n", txt); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&feat_im); pixDestroy(&edge_im_2); pixDestroy(&pixd); }
char *_process_frame_color_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index) { char *subtitle_text=NULL; PIX *im; im = pixCreate(width,height,32); PIX *hue_im = pixCreate(width,height,32); int i,j; for(i=0;i<height;i++) { for(j=0;j<width;j++) { int p=j*3+i*frame->linesize[0]; int r=frame->data[0][p]; int g=frame->data[0][p+1]; int b=frame->data[0][p+2]; pixSetRGBPixel(im,j,i,r,g,b); float H,S,V; rgb_to_hsv((float)r,(float)g,(float)b,&H,&S,&V); if(abs(H-ctx->hue)<20) { pixSetRGBPixel(hue_im,j,i,r,g,b); } } } PIX *edge_im = pixCreate(width,height,8),*edge_im_2 = pixCreate(width,height,8); edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); edge_im = pixDilateGray(edge_im, 21, 1); edge_im = pixThresholdToBinary(edge_im,50); PIX *pixd = pixCreate(width,height,1); pixSauvolaBinarize(pixConvertRGBToGray(hue_im,0.0,0.0,0.0), 15, 0.3, 1, NULL, NULL, NULL, &pixd); edge_im_2 = pixConvertRGBToGray(hue_im,0.0,0.0,0.0); edge_im_2 = pixDilateGray(edge_im_2, 5, 5); PIX *feat_im = pixCreate(width,height,32); for(i=3*(height/4);i<height;i++) { for(j=0;j<width;j++) { unsigned int p1,p2,p3,p4; pixGetPixel(edge_im,j,i,&p1); pixGetPixel(pixd,j,i,&p2); // pixGetPixel(hue_im,j,i,&p3); pixGetPixel(edge_im_2,j,i,&p4); if(p1==0&&p2==0&&p4>0)//if(p4>0&&p1==0)//if(p2==0&&p1==0&&p3>0) { pixSetRGBPixel(feat_im,j,i,255,255,255); } } } if(ctx->detect_italics) { ctx->ocr_mode = HARDSUBX_OCRMODE_WORD; } // TESSERACT OCR FOR THE FRAME HERE switch(ctx->ocr_mode) { case HARDSUBX_OCRMODE_WORD: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_wordwise_threshold(ctx, feat_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_wordwise(ctx, feat_im); break; case HARDSUBX_OCRMODE_LETTER: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_letterwise_threshold(ctx, feat_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_letterwise(ctx, feat_im); break; case HARDSUBX_OCRMODE_FRAME: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_simple_threshold(ctx, feat_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_simple(ctx, feat_im); break; default: fatal(EXIT_MALFORMED_PARAMETER,"Invalid OCR Mode"); } pixDestroy(&feat_im); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&hue_im); return subtitle_text; }
char* _process_frame_white_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index) { //printf("frame : %04d\n", index); PIX *im; PIX *edge_im; PIX *lum_im; PIX *feat_im; char *subtitle_text=NULL; im = pixCreate(width,height,32); lum_im = pixCreate(width,height,32); feat_im = pixCreate(width,height,32); int i,j; for(i=(3*height)/4;i<height;i++) { for(j=0;j<width;j++) { int p=j*3+i*frame->linesize[0]; int r=frame->data[0][p]; int g=frame->data[0][p+1]; int b=frame->data[0][p+2]; pixSetRGBPixel(im,j,i,r,g,b); float L,A,B; rgb_to_lab((float)r,(float)g,(float)b,&L,&A,&B); if(L > ctx->lum_thresh) pixSetRGBPixel(lum_im,j,i,255,255,255); else pixSetRGBPixel(lum_im,j,i,0,0,0); } } //Handle the edge image edge_im = pixCreate(width,height,8); edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); edge_im = pixDilateGray(edge_im, 21, 11); edge_im = pixThresholdToBinary(edge_im,50); for(i=3*(height/4);i<height;i++) { for(j=0;j<width;j++) { unsigned int p1,p2,p3; pixGetPixel(edge_im,j,i,&p1); // pixGetPixel(pixd,j,i,&p2); pixGetPixel(lum_im,j,i,&p3); if(p1==0&&p3>0) pixSetRGBPixel(feat_im,j,i,255,255,255); else pixSetRGBPixel(feat_im,j,i,0,0,0); } } if(ctx->detect_italics) { ctx->ocr_mode = HARDSUBX_OCRMODE_WORD; } // TESSERACT OCR FOR THE FRAME HERE switch(ctx->ocr_mode) { case HARDSUBX_OCRMODE_WORD: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_wordwise_threshold(ctx, lum_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_wordwise(ctx, lum_im); break; case HARDSUBX_OCRMODE_LETTER: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_letterwise_threshold(ctx, lum_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_letterwise(ctx, lum_im); break; case HARDSUBX_OCRMODE_FRAME: if(ctx->conf_thresh > 0) subtitle_text = get_ocr_text_simple_threshold(ctx, lum_im, ctx->conf_thresh); else subtitle_text = get_ocr_text_simple(ctx, lum_im); break; default: fatal(EXIT_MALFORMED_PARAMETER,"Invalid OCR Mode"); } pixDestroy(&lum_im); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&feat_im); return subtitle_text; }
bool CJBig2File::MemoryToJBig2(unsigned char* pBufferBGRA ,int BufferSize, int nWidth, int nHeight, std::wstring sDstFileName) { // check for valid input parameters /////////////////////////////////////////////////////////// if ( NULL == pBufferBGRA ) return false; int lBufferSize = BufferSize; unsigned char *pSourceBuffer = pBufferBGRA; PIX *pSource = pixCreate( nWidth, nHeight, 32 ); if ( !pSource ) return false; for ( int nY = 0; nY < nHeight; nY++ ) { for ( int nX = 0; nX < nWidth; nX++, pSourceBuffer += 3 )//todooo сделать 3 ? 4 { pixSetRGBPixel( pSource, nX, nY, pSourceBuffer[ 2 ], pSourceBuffer[ 1 ], pSourceBuffer[ 0 ] ); } } jbig2ctx *pContext = jbig2_init( m_dTreshold, 0.5, 0, 0, ! m_bPDFMode, m_bRefine ? 10 : -1 ); // Пока сделаем запись одной картинки в JBig2 // TO DO: надо будет сделать запись нескольких картинок в 1 JBig2 файл // Убираем ColorMap PIX *pPixL = NULL; if ( NULL == ( pPixL = pixRemoveColormap( pSource, REMOVE_CMAP_BASED_ON_SRC ) ) ) { pixDestroy( &pSource ); jbig2_destroy( pContext ); return false; } pixDestroy( &pSource ); PIX *pPixT = NULL; if ( pPixL->d > 1 ) { PIX *pGray = NULL; if ( pPixL->d > 8 ) { pGray = pixConvertRGBToGrayFast( pPixL ); if ( !pGray ) { pixDestroy( &pSource ); jbig2_destroy( pContext ); return false; } } else { pGray = pixClone( pPixL ); } if ( m_bUpscale2x ) { pPixT = pixScaleGray2xLIThresh( pGray, m_nBwTreshold ); } else if ( m_bUpscale4x ) { pPixT = pixScaleGray4xLIThresh( pGray, m_nBwTreshold ); } else { pPixT = pixThresholdToBinary( pGray, m_nBwTreshold ); } pixDestroy( &pGray ); } else { pPixT = pixClone( pPixL ); } if ( m_sOutputTreshold.length() > 0 ) { pixWrite( m_sOutputTreshold.c_str(), pPixT, IFF_BMP ); } if ( m_bSegment && pPixL->d > 1 ) { PIX *pGraphics = segment_image( pPixT, pPixL ); if ( pGraphics ) { char *sFilename; asprintf( &sFilename, "%s.%04d.%s", m_sBaseName.c_str(), 0, ".bmp" ); pixWrite( sFilename, pGraphics, IFF_BMP ); free( sFilename ); } if ( !pPixT ) { // Ничего не делаем return true; } } pixDestroy( &pPixL ); if ( !m_bSymbolMode ) { int nLength = 0; uint8_t *pBuffer = jbig2_encode_generic( pPixT, !m_bPDFMode, 0, 0, m_bDuplicateLineRemoval, &nLength ); bool bRes = true; NSFile::CFileBinary file; if (file.CreateFileW(sDstFileName ) == true ) { file.WriteFile(pBuffer, nLength); file.CloseFile(); bRes = true; } else bRes = false; pixDestroy( &pPixT ); if ( pBuffer ) free( pBuffer ); jbig2_destroy( pContext ); return bRes; } int nNumPages = 1; jbig2_add_page( pContext, pPixT ); pixDestroy( &pPixT ); int nLength = 0; uint8_t *pBuffer = jbig2_pages_complete( pContext, &nLength ); if ( !pBuffer ) { jbig2_destroy( pContext ); return false; } if ( m_bPDFMode ) { std::wstring sFileName = sDstFileName;//m_sBaseName + _T(".sym"); NSFile::CFileBinary file; if ( file.CreateFileW(sFileName) == false) { free( pBuffer ); jbig2_destroy( pContext ); return false; } file.WriteFile( pBuffer, nLength ); file.CloseFile(); } free( pBuffer ); for ( int nIndex = 0; nIndex < nNumPages; ++nIndex ) { pBuffer = jbig2_produce_page( pContext, nIndex, -1, -1, &nLength ); if ( m_bPDFMode ) { std::wstring sFileName = m_sBaseName + L".0000"; NSFile::CFileBinary file; if ( file.CreateFileW(sFileName) ==false) { free( pBuffer ); jbig2_destroy( pContext ); return false; } file.WriteFile( pBuffer, nLength ); file.CloseFile(); } free( pBuffer ); } jbig2_destroy( pContext ); return true; }