Exemplo n.º 1
0
char* _process_frame_tickertext(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index)
{
	PIX *im;
	PIX *edge_im;
	PIX *lum_im;
	PIX *feat_im;
	char *subtitle_text=NULL;
	im = pixCreate(width,height,32);
	lum_im = pixCreate(width,height,32);
	feat_im = pixCreate(width,height,32);
	int i,j;
	for(i=(92*height)/100;i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			int p=j*3+i*frame->linesize[0];
			int r=frame->data[0][p];
			int g=frame->data[0][p+1];
			int b=frame->data[0][p+2];
			pixSetRGBPixel(im,j,i,r,g,b);
			float L,A,B;
			rgb_to_lab((float)r,(float)g,(float)b,&L,&A,&B);
			if(L > ctx->lum_thresh)
				pixSetRGBPixel(lum_im,j,i,255,255,255);
			else
				pixSetRGBPixel(lum_im,j,i,0,0,0);
		}
	}

	//Handle the edge image
	edge_im = pixCreate(width,height,8);
	edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0);
	edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES);
	edge_im = pixDilateGray(edge_im, 21, 11);
	edge_im = pixThresholdToBinary(edge_im,50);

	for(i=92*(height/100);i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			unsigned int p1,p2,p3;
			pixGetPixel(edge_im,j,i,&p1);
			// pixGetPixel(pixd,j,i,&p2);
			pixGetPixel(lum_im,j,i,&p3);
			if(p1==0&&p3>0)
				pixSetRGBPixel(feat_im,j,i,255,255,255);
			else
				pixSetRGBPixel(feat_im,j,i,0,0,0);
		}
	}

	// Tesseract OCR for the ticker text here
	subtitle_text = get_ocr_text_simple(ctx, lum_im);
	char write_path[100];
	sprintf(write_path,"./lum_im%04d.jpg",index);
	pixWrite(write_path,lum_im,IFF_JFIF_JPEG);
	sprintf(write_path,"./im%04d.jpg",index);
	pixWrite(write_path,im,IFF_JFIF_JPEG);

	pixDestroy(&lum_im);
	pixDestroy(&im);
	pixDestroy(&edge_im);
	pixDestroy(&feat_im);

	return subtitle_text;
}
Exemplo n.º 2
0
void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int timestamp)
{
	// Debug: Display the frame after processing
	PIX *im;
	im = pixCreate(width,height,32);
	PIX *hue_im = pixCreate(width,height,32);

	int i,j;
	for(i=0;i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			int p=j*3+i*frame->linesize[0];
			int r=frame->data[0][p];
			int g=frame->data[0][p+1];
			int b=frame->data[0][p+2];
			pixSetRGBPixel(im,j,i,r,g,b);
			float H,S,V;
			rgb_to_hsv((float)r,(float)g,(float)b,&H,&S,&V);
			if(abs(H-ctx->hue)<20)
			{
				pixSetRGBPixel(hue_im,j,i,r,g,b);
			}
		}
	}

	PIX *edge_im = pixCreate(width,height,8),*edge_im_2 = pixCreate(width,height,8);
	edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0);
	edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES);
	edge_im = pixDilateGray(edge_im, 21, 1);
	edge_im = pixThresholdToBinary(edge_im,50);
	PIX *pixd = pixCreate(width,height,1);
	pixSauvolaBinarize(pixConvertRGBToGray(hue_im,0.0,0.0,0.0), 15, 0.3, 1, NULL, NULL, NULL, &pixd);

	edge_im_2 = pixConvertRGBToGray(hue_im,0.0,0.0,0.0);
	edge_im_2 = pixDilateGray(edge_im_2, 5, 5);

	PIX *feat_im = pixCreate(width,height,32);
	for(i=3*(height/4);i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			unsigned int p1,p2,p3,p4;
			pixGetPixel(edge_im,j,i,&p1);
			pixGetPixel(pixd,j,i,&p2);
			// pixGetPixel(hue_im,j,i,&p3);
			pixGetPixel(edge_im_2,j,i,&p4);
			if(p1==0&&p2==0&&p4>0)//if(p4>0&&p1==0)//if(p2==0&&p1==0&&p3>0)
			{
				pixSetRGBPixel(feat_im,j,i,255,255,255);
			}
		}
	}

	char *txt=NULL;
	// txt = get_ocr_text_simple(ctx, feat_im);
	// txt=get_ocr_text_wordwise_threshold(ctx, feat_im, ctx->conf_thresh);
	// if(txt != NULL)printf("%s\n", txt);

	pixDestroy(&im);
	pixDestroy(&edge_im);
	pixDestroy(&feat_im);
	pixDestroy(&edge_im_2);
	pixDestroy(&pixd);
}
Exemplo n.º 3
0
char *_process_frame_color_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index)
{
	char *subtitle_text=NULL;
	PIX *im;
	im = pixCreate(width,height,32);
	PIX *hue_im = pixCreate(width,height,32);

	int i,j;
	for(i=0;i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			int p=j*3+i*frame->linesize[0];
			int r=frame->data[0][p];
			int g=frame->data[0][p+1];
			int b=frame->data[0][p+2];
			pixSetRGBPixel(im,j,i,r,g,b);
			float H,S,V;
			rgb_to_hsv((float)r,(float)g,(float)b,&H,&S,&V);
			if(abs(H-ctx->hue)<20)
			{
				pixSetRGBPixel(hue_im,j,i,r,g,b);
			}
		}
	}

	PIX *edge_im = pixCreate(width,height,8),*edge_im_2 = pixCreate(width,height,8);
	edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0);
	edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES);
	edge_im = pixDilateGray(edge_im, 21, 1);
	edge_im = pixThresholdToBinary(edge_im,50);
	PIX *pixd = pixCreate(width,height,1);
	pixSauvolaBinarize(pixConvertRGBToGray(hue_im,0.0,0.0,0.0), 15, 0.3, 1, NULL, NULL, NULL, &pixd);

	edge_im_2 = pixConvertRGBToGray(hue_im,0.0,0.0,0.0);
	edge_im_2 = pixDilateGray(edge_im_2, 5, 5);

	PIX *feat_im = pixCreate(width,height,32);
	for(i=3*(height/4);i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			unsigned int p1,p2,p3,p4;
			pixGetPixel(edge_im,j,i,&p1);
			pixGetPixel(pixd,j,i,&p2);
			// pixGetPixel(hue_im,j,i,&p3);
			pixGetPixel(edge_im_2,j,i,&p4);
			if(p1==0&&p2==0&&p4>0)//if(p4>0&&p1==0)//if(p2==0&&p1==0&&p3>0)
			{
				pixSetRGBPixel(feat_im,j,i,255,255,255);
			}
		}
	}


	if(ctx->detect_italics)
	{
		ctx->ocr_mode = HARDSUBX_OCRMODE_WORD;
	}

	// TESSERACT OCR FOR THE FRAME HERE
	switch(ctx->ocr_mode)
	{
		case HARDSUBX_OCRMODE_WORD:
			if(ctx->conf_thresh > 0)
				subtitle_text = get_ocr_text_wordwise_threshold(ctx, feat_im, ctx->conf_thresh);
			else
				subtitle_text = get_ocr_text_wordwise(ctx, feat_im);
			break;
		case HARDSUBX_OCRMODE_LETTER:
			if(ctx->conf_thresh > 0)
				subtitle_text = get_ocr_text_letterwise_threshold(ctx, feat_im, ctx->conf_thresh);
			else
				subtitle_text = get_ocr_text_letterwise(ctx, feat_im);
			break;
		case HARDSUBX_OCRMODE_FRAME:
			if(ctx->conf_thresh > 0)
				subtitle_text = get_ocr_text_simple_threshold(ctx, feat_im, ctx->conf_thresh);
			else
				subtitle_text = get_ocr_text_simple(ctx, feat_im);
			break;
		default:
			fatal(EXIT_MALFORMED_PARAMETER,"Invalid OCR Mode");
	}

	pixDestroy(&feat_im);
	pixDestroy(&im);
	pixDestroy(&edge_im);
	pixDestroy(&hue_im);

	return subtitle_text;
}
Exemplo n.º 4
0
char* _process_frame_white_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index)
{
	//printf("frame : %04d\n", index);
	PIX *im;
	PIX *edge_im;
	PIX *lum_im;
	PIX *feat_im;
	char *subtitle_text=NULL;
	im = pixCreate(width,height,32);
	lum_im = pixCreate(width,height,32);
	feat_im = pixCreate(width,height,32);
	int i,j;
	for(i=(3*height)/4;i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			int p=j*3+i*frame->linesize[0];
			int r=frame->data[0][p];
			int g=frame->data[0][p+1];
			int b=frame->data[0][p+2];
			pixSetRGBPixel(im,j,i,r,g,b);
			float L,A,B;
			rgb_to_lab((float)r,(float)g,(float)b,&L,&A,&B);
			if(L > ctx->lum_thresh)
				pixSetRGBPixel(lum_im,j,i,255,255,255);
			else
				pixSetRGBPixel(lum_im,j,i,0,0,0);
		}
	}

	//Handle the edge image
	edge_im = pixCreate(width,height,8);
	edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0);
	edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES);
	edge_im = pixDilateGray(edge_im, 21, 11);
	edge_im = pixThresholdToBinary(edge_im,50);

	for(i=3*(height/4);i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			unsigned int p1,p2,p3;
			pixGetPixel(edge_im,j,i,&p1);
			// pixGetPixel(pixd,j,i,&p2);
			pixGetPixel(lum_im,j,i,&p3);
			if(p1==0&&p3>0)
				pixSetRGBPixel(feat_im,j,i,255,255,255);
			else
				pixSetRGBPixel(feat_im,j,i,0,0,0);
		}
	}

	if(ctx->detect_italics)
	{
		ctx->ocr_mode = HARDSUBX_OCRMODE_WORD;
	}

	// TESSERACT OCR FOR THE FRAME HERE
	switch(ctx->ocr_mode)
	{
		case HARDSUBX_OCRMODE_WORD:
			if(ctx->conf_thresh > 0)
				subtitle_text = get_ocr_text_wordwise_threshold(ctx, lum_im, ctx->conf_thresh);
			else
				subtitle_text = get_ocr_text_wordwise(ctx, lum_im);
			break;
		case HARDSUBX_OCRMODE_LETTER:
			if(ctx->conf_thresh > 0)
				subtitle_text = get_ocr_text_letterwise_threshold(ctx, lum_im, ctx->conf_thresh);
			else
				subtitle_text = get_ocr_text_letterwise(ctx, lum_im);
			break;
		case HARDSUBX_OCRMODE_FRAME:
			if(ctx->conf_thresh > 0)
				subtitle_text = get_ocr_text_simple_threshold(ctx, lum_im, ctx->conf_thresh);
			else
				subtitle_text = get_ocr_text_simple(ctx, lum_im);
			break;
		default:
			fatal(EXIT_MALFORMED_PARAMETER,"Invalid OCR Mode");
	}

	pixDestroy(&lum_im);
	pixDestroy(&im);
	pixDestroy(&edge_im);
	pixDestroy(&feat_im);

	return subtitle_text;
}
Exemplo n.º 5
0
bool CJBig2File::MemoryToJBig2(unsigned char* pBufferBGRA ,int BufferSize, int nWidth, int nHeight, std::wstring sDstFileName)
{
	// check for valid input parameters

///////////////////////////////////////////////////////////
	if ( NULL == pBufferBGRA )	return false;

	int lBufferSize   = BufferSize;
	unsigned char *pSourceBuffer = pBufferBGRA;

	PIX  *pSource = pixCreate( nWidth, nHeight, 32 );
	if ( !pSource )	return false;

	for ( int nY = 0; nY < nHeight; nY++ )
	{
		for ( int nX = 0; nX < nWidth; nX++, pSourceBuffer += 3 )//todooo сделать 3 ? 4
		{
			pixSetRGBPixel( pSource, nX, nY, pSourceBuffer[ 2 ], pSourceBuffer[ 1 ], pSourceBuffer[ 0 ] );
		}
	}


	jbig2ctx *pContext = jbig2_init( m_dTreshold, 0.5, 0, 0, ! m_bPDFMode,  m_bRefine ? 10 : -1 );

	// Пока сделаем запись одной картинки в JBig2
	// TO DO: надо будет сделать запись нескольких картинок в 1 JBig2 файл

	// Убираем ColorMap
	PIX *pPixL = NULL;
	if ( NULL == ( pPixL = pixRemoveColormap( pSource, REMOVE_CMAP_BASED_ON_SRC ) ) ) 
	{
		pixDestroy( &pSource );
		jbig2_destroy( pContext );
		return false;
	}
	pixDestroy( &pSource );

	PIX *pPixT = NULL;
	if ( pPixL->d > 1 ) 
	{
		PIX *pGray = NULL;

		if ( pPixL->d > 8 ) 
		{
			pGray = pixConvertRGBToGrayFast( pPixL );
			if ( !pGray )
			{
				pixDestroy( &pSource );
				jbig2_destroy( pContext );
				return false;
			}
		} 
		else 
		{
			pGray = pixClone( pPixL );
		}

		if (  m_bUpscale2x ) 
		{
			pPixT = pixScaleGray2xLIThresh( pGray,  m_nBwTreshold );
		} 
		else if (  m_bUpscale4x ) 
		{
			pPixT = pixScaleGray4xLIThresh( pGray,  m_nBwTreshold );
		} 
		else 
		{
			pPixT = pixThresholdToBinary( pGray,  m_nBwTreshold );
		}

		pixDestroy( &pGray );
	} 
	else 
	{
		pPixT = pixClone( pPixL );
	}

	if ( m_sOutputTreshold.length() > 0 ) 
	{
		pixWrite( m_sOutputTreshold.c_str(), pPixT, IFF_BMP );
	}

	if (  m_bSegment && pPixL->d > 1 ) 
	{
		PIX *pGraphics = segment_image( pPixT, pPixL );
		if ( pGraphics ) 
		{
			char *sFilename;
			asprintf( &sFilename, "%s.%04d.%s", m_sBaseName.c_str(), 0, ".bmp" );
			pixWrite( sFilename, pGraphics, IFF_BMP );
			free( sFilename );
		} 
		if ( !pPixT ) 
		{
			// Ничего не делаем
			return true;
		}
	}

	pixDestroy( &pPixL );

	if ( !m_bSymbolMode ) 
	{
		int nLength = 0;
		uint8_t *pBuffer = jbig2_encode_generic( pPixT, !m_bPDFMode, 0, 0, m_bDuplicateLineRemoval, &nLength );

		bool bRes = true;
        NSFile::CFileBinary file;
        if (file.CreateFileW(sDstFileName ) == true )
        {
            file.WriteFile(pBuffer, nLength);
            file.CloseFile();
			bRes = true;
        }
		else
			bRes = false;

		pixDestroy( &pPixT );
		if ( pBuffer ) free( pBuffer );
		jbig2_destroy( pContext );

		return bRes;
	}

	int nNumPages = 1;
	jbig2_add_page( pContext, pPixT );
	pixDestroy( &pPixT );

	int nLength = 0;
	uint8_t *pBuffer = jbig2_pages_complete( pContext, &nLength );
	if ( !pBuffer )
	{
		jbig2_destroy( pContext );
		return false;
	}

	if ( m_bPDFMode ) 
	{
		std::wstring sFileName = sDstFileName;//m_sBaseName + _T(".sym");

        NSFile::CFileBinary file;
        if ( file.CreateFileW(sFileName) == false)
		{
			free( pBuffer );
			jbig2_destroy( pContext );
			return false;
		}
        file.WriteFile( pBuffer, nLength );
        file.CloseFile();
	}
	free( pBuffer );

	for ( int nIndex = 0; nIndex < nNumPages; ++nIndex ) 
	{
		pBuffer = jbig2_produce_page( pContext, nIndex, -1, -1, &nLength );
		if ( m_bPDFMode ) 
		{
            std::wstring sFileName = m_sBaseName + L".0000";

            NSFile::CFileBinary file;
            if ( file.CreateFileW(sFileName) ==false)
            {
				free( pBuffer );
				jbig2_destroy( pContext );
				return false;
			}
            file.WriteFile( pBuffer, nLength );
            file.CloseFile();
		} 
		free( pBuffer );
	}

	jbig2_destroy( pContext );

	return true;
}