Example #1
0
bool GMainEditor::forkProccesOCR_(pidID *pidIDArray,int ID, int maxFork){
    string str;
    pechaImg=LoadImageData(inputData.data["inputFile"],0);
    int pidID=fork();
    
	if (pidID < 0){
		error((char*)"ERROR on fork");
        return 0;
    }    
	if (pidID == 0)  {
		str=substr(0,str.rfind("."),str);
		str+=".html";   cout_<<str<<endl;
		inputData.data["inputFileName"]=inputData.data["siteName"];
		inputData.data["inputFileName"]+=substr(inputData.data["siteRoot"].size(),inputData.data["inputFile"]);
		//readPageHTML();
        cout<<"NEW START "<<inputData.data["inputFile"]<<endl;
		pechaDataLoaded=0;
		startOCR(pechaImg);
        pidIDArray[ID].status=0;
        cout<<"done "<<ID<<endl;
        remove(inputData.data["log"].c_str());
		exit(0);
	}
    pechaImg->destroy();
    return 1;
}
Example #2
0
void GMainEditor::forkProccesOCR(int pidID,GBitmap *pechaImgID_){

	if (pidID < 0)
		error((char*)"ERROR on fork");
	if (pidID == 0)  {
		string str=inputData.data["inputFile"];
		str=substr(0,(int)str.rfind("."),str);
		str+=".html";   cout_<<str<<endl;
		inputData.data["inputFileName"]=inputData.data["siteName"];
		inputData.data["inputFileName"]+=substr((int)inputData.data["siteRoot"].size(),inputData.data["inputFile"]);
		//readPageHTML();
        cout_<<"NEW START "<<inputData.data["inputFile"]<<endl;
		pechaDataLoaded=0;
		startOCR(pechaImgID_);
		//if(stat)writeLetterStat();
		exit(0);
	}
}
Example #3
0
KOCRBase::KOCRBase( QWidget *parent, KSpellConfig *spellConfig,
                    KDialogBase::DialogType face )
   :KDialogBase( face, i18n("Optical Character Recognition"),
		 User2|Close|User1, User1, parent,0, false, true,
		 KGuiItem( i18n("Start OCR" ), "launch",
			   i18n("Start the Optical Character Recognition process" )),
                 KGuiItem( i18n("Cancel" ), "stopocr",
			   i18n("Stop the OCR Process" ))),
    m_animation(0L),
    m_metaBox(0L),
    m_imgHBox(0L),
    m_previewPix(0L),
    m_currImg(0L),
    m_spellConfig(spellConfig),
    m_wantSpellCfg(true),
    m_userWantsSpellCheck(true),
    m_cbWantCheck(0L),
    m_gbSpellOpts(0L)
{
    kdDebug(28000) << "OCR Base Dialog!" << endl;
    // Layout-Boxes

    KConfig *konf = KGlobal::config ();
    KConfigGroupSaver gs( konf, CFG_OCR_KSPELL );
    m_userWantsSpellCheck = konf->readBoolEntry(CFG_WANT_KSPELL, true);

    /* Connect signals which disable the fields and store the configuration */
    connect( this, SIGNAL( user1Clicked()), this, SLOT( writeConfig()));
    connect( this, SIGNAL( user1Clicked()), this, SLOT( startOCR() ));
    connect( this, SIGNAL( user2Clicked()), this, SLOT( stopOCR() ));
    m_previewSize.setWidth(200);
    m_previewSize.setHeight(300);

    enableButton( User1, true );   /* start ocr */
    enableButton( User2, false );  /* Cancel    */
    enableButton( Close, true );
}
Example #4
0
void GMainEditor::startOCRBatch(){
    
    imageEditor=(GImageEditor*)inputData.imageEditor;
    fontEditor=(GFontEditor*)inputData.fontEditor;
    logicProcessor=(GLogicProcessor*)inputData.logicProcessor;
    if(!logicProcessor->dictionaryReady)logicProcessor->readDictionary();
    aliKali=fontEditor->aliKali;

    
	string strHeaderHTML,srcLine,str;
	string  path=inputData.data["tablePath"]+"/header.xml";
	DIR *dir;
	//int mode;	
	int i=0;
    //читаем статистику использования букв книги
    //readLetterStat();
    //загружаем базу данных букв
    
#ifdef FORK
     int maxFork=inputData.num_cores*0.75;
     int pid;
     pidID *pidIDArray;
     
    
    if(inputData.fileList.size()>1){
        int countFork=0; 
        MemoryFile *pidData_mf;  //main file for conection with child process
        //inputData.data["statPath"].c_str()
        time_t seconds;    seconds = time (NULL);
        ostringstream out;  out<<"/tmp/"<<seconds;
        string path=out.str();
        pidData_mf=MemoryFile::create(path.c_str(), MemoryFile::if_exists_keep_if_dont_exists_create);
        pidData_mf->resize(sizeof(pidID)*maxFork);
        pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes.
        
        
        for(int index=0;index<maxFork;index++){
            //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl;
            pidIDArray[index].status=0;
        }

        int ID=0;
        
        while(i<inputData.fileList.size()){
            cout<<"NEW file#1 "<<inputData.fileList[i]<<endl;
            if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){
                i++; continue;
            }	
            
            inputData.data["inputFile"]=inputData.fileList[i];
            
            string  path=inputData.data["inputFile"];    //проверяем есть ли такой распознаный файл
            path=substr(0,(int)path.rfind("."),path);
            string volume=path;
            string fileIndex=fileName(path);
            path+=".xml";
            if(is_file(path)){i++; continue;}
            
            
            if(!forkProccesOCR_(pidIDArray,ID,maxFork)){
                cout<<"ERROR on fork return";
                sleep(1); continue; 
            };
            i++;if(i==inputData.fileList.size())break;
            countFork++;

            for(int index=0;index<maxFork;index++)cout<<pidIDArray[index].status<<" ";
            cout<<endl;
            
            ID=100; int status;
            while(ID==100){
                if(countFork<=maxFork){     //есть свободные слоты для новых процессов
                    for(int index=0;index<maxFork;index++){  //маркируем слот как занятый
                        //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl;
                        if(pidIDArray[index].status==0){
                          ID=index;
                          pidIDArray[index].status=1;
                          break;
                        }    
                    }
                }else{
                    int forkStatusCount=0;
                    for(int index=0;index<maxFork;index++){   //подсчитываем количество активных процессов
                        if(pidIDArray[index].status==1)forkStatusCount++;
                    }
                    if(forkStatusCount==countFork){sleep(1); continue;};     //ждем завершения процесса
                    wait(&status);                           //регистрируем с системе завершенный процесс
                    countFork--;
                }   
            }	
        }
            
    }else{
        inputData.data["inputFile"]=inputData.fileList[0];
        pechaImg=LoadImageData(inputData.data["inputFile"],0); 
        startOCR(pechaImg);
    }
    
    
#else
    cout<<"NO FORK";
	while(i<inputData.fileList.size()){
		if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){
			readDirectoryToArray(inputData.fileList, inputData.fileList[i],"img");
			i++; continue;
		}	
		GBitmap* pechaImg;
		inputData.data["inputFile"]=inputData.fileList[i];

        if(!is_file(inputData.data["inputFile"]))continue;
        pechaImg=LoadImageData(inputData.data["inputFile"],0); 
        if(!pechaImg){cout_<<"no open file"<<inputData.data["inputFile"]<<endl; return;}
        
        str=inputData.data["inputFile"];
		str=substr(0,str.rfind("."),str);
		str+=".html";   //cout_<<str<<endl;
		inputData.data["inputFileName"]=inputData.data["siteName"];
		inputData.data["inputFileName"]+=substr(inputData.data["siteRoot"].size(),inputData.data["inputFile"]);

		//readPageHTML();
		inputData.c_out.open(str.c_str());
		pechaDataLoaded=0;
		startOCR(pechaImg);
        pechaImg->destroy();
		inputData.c_out.close();
		i++;
	}
	
#endif		
	//drawLettersInCorrectionTable(DRAW_BASE);
	
	cout_<<"COMPLETE"<<endl;
	
}//____________________________________________________________________________
Example #5
0
void GMainEditor::startOCRSystem(){
    
	string strHeaderHTML,srcLine,str;
	string  path=inputData.data["tablePath"]+"/header.xml";
	DIR *dir;
	//int mode;
	int i=0;
    uint freeMemory;
    //читаем статистику использования букв книги
    //readLetterStat();

    int maxProcess=inputData.num_cores*0.85;
    pidID *pidIDArray;
    
    if(inputData.data["pathDB"]!=""){
        inputData.data["inputFile"]=inputData.fileList[0];
        pechaImg=LoadImageData(inputData.data["inputFile"],0);
        startOCR(pechaImg);

        MemoryFile *pidData_mf=MemoryFile::create(inputData.data["pathDB"].c_str(),
                                                  MemoryFile::if_exists_keep_if_dont_exists_create);
        int ID=atoi(inputData.data["ID"].c_str());
        pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes.
        pidIDArray[ID].status=0;
        cout<<"done OCR";
        inputData.log<<" done OCR"<<endl;
        inputData.log.close();
        remove(inputData.data["log"].c_str());
        exit(0);
    }
    
    if(inputData.fileList.size()>1){
        int countProcess=0;
        MemoryFile *pidData_mf;  //main file for inter proccess communications
        //inputData.data["statPath"].c_str()
        time_t seconds;    seconds = time (NULL);
        ostringstream out;  out<<"/tmp/"<<seconds;
        string pathDB=out.str();
        pidData_mf=MemoryFile::create(pathDB.c_str(), MemoryFile::if_exists_keep_if_dont_exists_create);
        pidData_mf->resize(sizeof(pidID)*maxProcess);
        pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes.
        
        
        for(int index=0;index<maxProcess;index++){
            //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl;
            pidIDArray[index].status=0;
        }
        
        int ID=0;
        
        while(i<inputData.fileList.size()){

            if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){
                i++; continue;
            }
            
            inputData.data["inputFile"]=inputData.fileList[i];
            string next="";
            if(i<inputData.fileList.size()-2){
                next=inputData.fileList[i+1];
                next=str_replace(".tif",".html",next);
                next=str_replace(".jpg",".html",next);
            }
            
            string  path=inputData.data["inputFile"];    //проверяем есть ли такой распознаный файл
            path=substr(0,(int)path.rfind("."),path);
            string volume=path;
            string fileIndex=fileName(path);
            path+=".html";
            if(is_file(path)){i++; continue;}
            
            
            while(1){
                //проверяем загрузку системы
                str=run("vm_stat");
                vector<string>vm=explode("\n", str);
                str=str_replace("Pages free:", "", vm[1]);
                str=str_replace(" ", "", str);
                str=str_replace(".", "", str);
                freeMemory=atoi(str.c_str());
                freeMemory=(freeMemory*4.096)/1000;
                //cout<<"@freeMemory:"<<freeMemory<<endl;
                
                if(freeMemory<10){
                    cout<<"critical memory loading error";
                    run("killall OCRLib");
                    exit(0);
                }
                if(freeMemory>500)break;
                cout<<"@ no free memory for process. freeMemory:"<<freeMemory<<endl;
                sleep(10);
            }

            cout<<"NEW file#1 "<<inputData.fileList[i]<<endl;
            ostringstream out;
            out<<inputData.data["rootApp"]<<" \"xml=<fileList>"<<inputData.data["inputFile"]<<
            "</fileList><ocrData>"<<inputData.data["ocrData"]<<"</ocrData>"<<
            "<ocrLn>"<<inputData.data["ocrLn"]<<"</ocrLn>"<<"<pathDB>"<<pathDB<<"</pathDB>"<<
            "<ID>"<<ID<<"</ID><nextPage>"<<next<<"</nextPage><scale>"<<inputData.data["scale"]<<"</scale>\" &";
            string cmd=out.str();
            //cout<<cmd; exit(0);
            system(cmd.c_str());
            //sleep(1); //continue;
            
            
            i++;if(i==inputData.fileList.size())break;
            countProcess++;
            
            for(int index=0;index<maxProcess;index++)cout<<pidIDArray[index].status<<" ";
            cout<<endl;
            
            ID=100; int status;
            while(ID==100){
                if(countProcess<=maxProcess){     //есть свободные слоты для новых процессов
                    for(int index=0;index<maxProcess;index++){  //маркируем слот как занятый
                        //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl;
                        if(pidIDArray[index].status==0){
                            ID=index;
                            pidIDArray[index].status=1;
                            break;
                        }
                    }
                }else{
                    int processStatusCount=0;
                    for(int index=0;index<maxProcess;index++){   //подсчитываем количество активных процессов
                        if(pidIDArray[index].status==1)processStatusCount++;
                    }
                    if(processStatusCount==countProcess){sleep(1); continue;};     //ждем завершения процесса
                    countProcess--;
                }
            }
        }
        
    }else{
        i=0;
        //while(i<inputData.fileList.size()){
        //        if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){
        //            i++; continue;
        //        }
            inputData.data["inputFile"]=inputData.fileList[0];
            pechaImg=LoadImageData(inputData.data["inputFile"],0);
            startOCR(pechaImg);
        //    i++;
        //}
    }
}