예제 #1
0
void GMainEditor::startOCRBatch(){
    
    imageEditor=(GImageEditor*)inputData.imageEditor;
    fontEditor=(GFontEditor*)inputData.fontEditor;
    logicProcessor=(GLogicProcessor*)inputData.logicProcessor;
    if(!logicProcessor->dictionaryReady)logicProcessor->readDictionary();
    aliKali=fontEditor->aliKali;

    
	string strHeaderHTML,srcLine,str;
	string  path=inputData.data["tablePath"]+"/header.xml";
	DIR *dir;
	//int mode;	
	int i=0;
    //читаем статистику использования букв книги
    //readLetterStat();
    //загружаем базу данных букв
    
#ifdef FORK
     int maxFork=inputData.num_cores*0.75;
     int pid;
     pidID *pidIDArray;
     
    
    if(inputData.fileList.size()>1){
        int countFork=0; 
        MemoryFile *pidData_mf;  //main file for conection with child process
        //inputData.data["statPath"].c_str()
        time_t seconds;    seconds = time (NULL);
        ostringstream out;  out<<"/tmp/"<<seconds;
        string path=out.str();
        pidData_mf=MemoryFile::create(path.c_str(), MemoryFile::if_exists_keep_if_dont_exists_create);
        pidData_mf->resize(sizeof(pidID)*maxFork);
        pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes.
        
        
        for(int index=0;index<maxFork;index++){
            //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl;
            pidIDArray[index].status=0;
        }

        int ID=0;
        
        while(i<inputData.fileList.size()){
            cout<<"NEW file#1 "<<inputData.fileList[i]<<endl;
            if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){
                i++; continue;
            }	
            
            inputData.data["inputFile"]=inputData.fileList[i];
            
            string  path=inputData.data["inputFile"];    //проверяем есть ли такой распознаный файл
            path=substr(0,(int)path.rfind("."),path);
            string volume=path;
            string fileIndex=fileName(path);
            path+=".xml";
            if(is_file(path)){i++; continue;}
            
            
            if(!forkProccesOCR_(pidIDArray,ID,maxFork)){
                cout<<"ERROR on fork return";
                sleep(1); continue; 
            };
            i++;if(i==inputData.fileList.size())break;
            countFork++;

            for(int index=0;index<maxFork;index++)cout<<pidIDArray[index].status<<" ";
            cout<<endl;
            
            ID=100; int status;
            while(ID==100){
                if(countFork<=maxFork){     //есть свободные слоты для новых процессов
                    for(int index=0;index<maxFork;index++){  //маркируем слот как занятый
                        //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl;
                        if(pidIDArray[index].status==0){
                          ID=index;
                          pidIDArray[index].status=1;
                          break;
                        }    
                    }
                }else{
                    int forkStatusCount=0;
                    for(int index=0;index<maxFork;index++){   //подсчитываем количество активных процессов
                        if(pidIDArray[index].status==1)forkStatusCount++;
                    }
                    if(forkStatusCount==countFork){sleep(1); continue;};     //ждем завершения процесса
                    wait(&status);                           //регистрируем с системе завершенный процесс
                    countFork--;
                }   
            }	
        }
            
    }else{
        inputData.data["inputFile"]=inputData.fileList[0];
        pechaImg=LoadImageData(inputData.data["inputFile"],0); 
        startOCR(pechaImg);
    }
    
    
#else
    cout<<"NO FORK";
	while(i<inputData.fileList.size()){
		if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){
			readDirectoryToArray(inputData.fileList, inputData.fileList[i],"img");
			i++; continue;
		}	
		GBitmap* pechaImg;
		inputData.data["inputFile"]=inputData.fileList[i];

        if(!is_file(inputData.data["inputFile"]))continue;
        pechaImg=LoadImageData(inputData.data["inputFile"],0); 
        if(!pechaImg){cout_<<"no open file"<<inputData.data["inputFile"]<<endl; return;}
        
        str=inputData.data["inputFile"];
		str=substr(0,str.rfind("."),str);
		str+=".html";   //cout_<<str<<endl;
		inputData.data["inputFileName"]=inputData.data["siteName"];
		inputData.data["inputFileName"]+=substr(inputData.data["siteRoot"].size(),inputData.data["inputFile"]);

		//readPageHTML();
		inputData.c_out.open(str.c_str());
		pechaDataLoaded=0;
		startOCR(pechaImg);
        pechaImg->destroy();
		inputData.c_out.close();
		i++;
	}
	
#endif		
	//drawLettersInCorrectionTable(DRAW_BASE);
	
	cout_<<"COMPLETE"<<endl;
	
}//____________________________________________________________________________
예제 #2
0
string GLogicProcessor::startConvert(){
	//cout<<"Start inputData.data[\"inputFolder\"]"<<inputData.data["inputFolder"]<<END;
	string str,path;
	vector<string> strVector;     
	
	 int index=0;
     DIR *dir;
	int fileFlag;
    string ocrData=inputData.data["ocrData"];

	 while(index<inputData.fileList.size()){
		if( ( dir=opendir(inputData.fileList[index].c_str()))!=NULL){
			if(ocrData=="RTFToYagpo"){
			    fileFlag=readDirectoryToArray(inputData.fileList, inputData.fileList[index],"rtf");
			}else{
			    fileFlag=readDirectoryToArray(inputData.fileList, inputData.fileList[index],"txt");
			}
			inputData.fileList.erase(inputData.fileList.begin()+index);
			continue;
		}
		index++;
	}

    if(ocrData=="YagpoToWylie"){
        string mainString;
        if(inputData.data["InputMethod"]=="fileList"){
            cout<<"YagpoToWylieConverter inputData.fileList.size()="<<inputData.fileList.size()<<END;
            for(int i=0;i<inputData.fileList.size();i++){
                strVector.resize(0);
                path=inputData.fileList[i]+"_out.txt";
                readText(strVector, inputData.fileList[i]);
                mainString="";
                for(int i=0;i<strVector.size();i++){
                    mainString+=TibUniToWylie(strVector[i],2);
                    mainString+="\n";
                }
                writeText(mainString, path);
                cout<<"done convert";
            }
        }else{
            mainString="";
            for(int i=0;i<inputData.fileList.size();i++){
                mainString+=TibUniToWylie(inputData.fileList[i],2);
                mainString+="\n";
            }
            return mainString;
        }
    }
    
	if(ocrData=="CXS_to_UTF"){
		loadMapFilePali("CXS_UTF_HTML.xml");
		cout<<"fileList.size()="<<inputData.fileList.size();
		for(int i=0;i<inputData.fileList.size();i++){
			inputData.data["inputFile"]=inputData.fileList[i];
			cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<END;
			strVector.resize(0);
			readText(strVector,inputData.data["inputFile"]);
			for(int m=0;m<strVector.size();m++){
				//strVector[m]= regex_replace(  strVector[m], date, format );
				strVector[m]=Unicode_to_UTF(strVector[m]);
				convertCXS_to_UTF_nocopy(strVector[m]);
				//cout<<strVector[m];
			}
			writeText(strVector,inputData.data["inputFile"]);

		}

	}
	if(ocrData=="SinhalaUniToYagpo"){

		
	}
	if(ocrData=="ConcatenateFolder"){
		cout<<"fileList.size()="<<inputData.fileList.size();
		ofstream srcOutput;
		str=inputData.data["inputFolder"];
		str+="/allText.txt";
		srcOutput.open(str.c_str());
		for(int i=0;i<inputData.fileList.size();i++){
			inputData.data["inputFile"]=inputData.fileList[i];
			readText(str,inputData.data["inputFile"].c_str());
			cout<<"cat "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<END;
			srcOutput<<str<<endl;
		}
		srcOutput.close();
	}

	if(ocrData=="LowerCase"){
		loadMapFilePali("CXS_UTF_HTML.xml");
		cout<<"fileList.size()="<<inputData.fileList.size();
		for(int i=0;i<inputData.fileList.size();i++){
			inputData.data["inputFile"]=inputData.fileList[i];
			cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<END;
			strVector.resize(0);
			readText(strVector,inputData.data["inputFile"]);
			int step=0;
			for(int m=0;m<strVector.size();m++){
				//strVector[m]= regex_replace(  strVector[m], date, format );
				//strVector[m]=Unicode_to_UTF(strVector[m]);
				lowerCase_nocopy(strVector[m]);
				if(step==1000){
					cout<<m<<"."<<strVector[m]<<END;
				step=0;}step++;
			}
			writeText(strVector,inputData.data["inputFile"]);

		}

	}

	//cout<<"inputData.data[\"ocrData\"]="<<ocrData<<END;

	if(ocrData=="PaliUTFToEng"){

		cout<<"PaliUTFToEng"<<END;

	}
	if(ocrData=="RTFToYagpo"){
		ofstream c_out; c_out.open("/_out.txt");
		cout<<"RTFToYagpo inputData.fileList.size()="<<inputData.fileList.size()<<END;
		string mainString;
		for(int i=0;i<inputData.fileList.size();i++){
			mainString="";
#ifdef COCOA
			RTFtoYagpoConverter(mainString,inputData.fileList[i]);
#endif
			string path=inputData.fileList[i]+"_out.txt";
			cout<<"path="<<path<<END;
			writeText(mainString, path);
			c_out<<report;
		}
        return "done convert";
	}

	if(ocrData=="dWylieToYagpo"){
		string mainString;
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"dWylieToYagpoConverter inputData.fileList.size()="<<inputData.fileList.size()<<END;
            
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0); 
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				mainString="";
				int step=0;
				for(int i=0;i<strVector.size();i++){
					if(step==1000){cout<<i<<" ";step=0;}step++;
					mainString+=dWylieToYagpoConverter(strVector[i]);
                    mainString+="\n";
				}	
				writeText(mainString, path);
			}				
		}else{
			for(int i=0;i<inputData.fileList.size();i++){
			    mainString="";	
			    mainString+=dWylieToYagpoConverter(inputData.fileList[i])+"\n";   
			}
			return mainString;	
		}					
	}
	if(ocrData=="dSinhalaASCIToYagpo"){
		string mainString;
		string path=inputData.data["tablePath"]+"codePages/SinhalaASCI.xml";
		readMapXML(SinhalaASCI,path);
		cout<<"SinhalaASCI.size()="<<SinhalaASCI.size()<<END;
		
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"dSinhalaASCIToYagpo inputData.fileList.size()="<<inputData.fileList.size()<<END;
			int step=0;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				cout<<"strVector.size()="<<strVector.size()<<END;
				mainString="";
				for(int i=0;i<strVector.size();i++){
					mainString+=dSinhalaASCIToYagpo(strVector[i])+"\n";
					if(step==1000){cout<<i<<" ";step=0;}step++;
				}
				writeText(mainString, path);
			}
		}else{
			int step=0;
			for(int i=0;i<inputData.fileList.size();i++){
				mainString="";
				mainString+=dSinhalaASCIToYagpo(inputData.fileList[i]);
				if(step==1000){cout<<".";step=0;}step++;
			}
			cout<<mainString<<END;
		}
	}
	
	if(ocrData=="SinhalaUniToYagpo"){
		string mainString;
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"SinhalaUniToYagpo inputData.fileList.size()="<<inputData.fileList.size()<<END;
			int step=0;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				cout<<"strVector.size()="<<strVector.size()<<END;
				mainString="";
				for(int i=0;i<strVector.size();i++){
					mainString+=SinhalaUniToYagpo(strVector[i],2)+"\n";
					if(step==1000){cout<<i<<" ";step=0;}step++;
				}
				writeText(mainString, path);
			}
		}else{
			int step=0;
			for(int i=0;i<inputData.fileList.size();i++){
				mainString="";
				mainString+=SinhalaUniToYagpo(inputData.fileList[i],2);
				if(step==1000){cout<<".";step=0;}step++;
			}
			return mainString;
		}
		
	}
	
	if(ocrData=="SinhalaMettaToYagpo"){
		string mainString;
		string path=inputData.data["tablePath"]+"codePages/SinhalaMetta.xml";
		readMapXML(SinhalaASCI,path);
		cout<<"SinhalaASCI.size()="<<SinhalaASCI.size()<<END;
		
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"dSinhalaASCIToYagpo inputData.fileList.size()="<<inputData.fileList.size()<<END;
			
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				cout<<"strVector.size()="<<strVector.size()<<END;
				mainString=""; int step=0;
				for(int i=0;i<strVector.size();i++){  //cout <<"next string "<<i<<" ="<<strVector[i]<<endl;
					if(strVector[i].size()){
					   mainString+=SinghalaASCIToYagpo(strVector[i])+"\n";
					}else{mainString+="\n";}	
					if(step==1000){cout<<i<<" ";step=0;}step++;
				}
				writeText(mainString, path);
			}
		}else{   
			int step=0;
			for(int i=0;i<inputData.fileList.size();i++){
  				mainString="";
				mainString+=dSinhalaASCIToYagpo(inputData.fileList[i]);
				if(step==1000){cout<<".";step=0;}step++;
			}
			return mainString;
		}
		cout<<"DONE CONVERT";
	}
	

	if(ocrData=="WylieToYagpo"){
		string mainString; cout<<" @inputData.data[InputMethod]="<<inputData.data["InputMethod"]<<endl;
        loadTransliterationFile("TranslitTableUni_Wylie.xml");
        
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"WylieToYagpoConverter inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i]+"_out.txt"; cout<<" path="<<path;
				readText(strVector, inputData.fileList[i]);
                cout<<" strVector="<<strVector.size()<<endl;
				mainString="";
				int step=0;
                string str;
				for(int i=0;i<strVector.size();i++){ 
					if(step==100){cout<<i<<" ";step=0;}step++;
					str=WylieToYagpoConverter(strVector[i]);
                    mainString+=YagpoToUni(str);
					mainString+="\n";
				}
				writeText(mainString, path);
				cout<<"done convert";
			}
		}else{
			mainString="";
			for(int i=0;i<inputData.fileList.size();i++){
				mainString+=WylieToYagpoConverter(inputData.fileList[i])+"\n";
			}
			return mainString;
		}
	}

	if(ocrData=="TibUniToWylie"){
		string mainString;
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"YagpoToWylieConverter inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				mainString="";
				for(int i=0;i<strVector.size();i++){
					mainString+=TibUniToWylie(strVector[i],1);
					mainString+="\n";
				}
				writeText(mainString, path);
				cout<<"done convert";
			}
		}else{
			mainString="";
			for(int i=0;i<inputData.fileList.size();i++){
				mainString+=TibUniToWylie(inputData.fileList[i],1);
				mainString+="\n";
			}
			return mainString;
		}
	}

	if(ocrData=="YagpoToWylie"){
		string mainString;
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"YagpoToWylieConverter inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				mainString="";
				for(int i=0;i<strVector.size();i++){
					mainString+=TibUniToWylie(strVector[i],2);
					mainString+="\n";
				}
				writeText(mainString, path);
				cout<<"done convert";
			}
		}else{
			mainString="";
			for(int i=0;i<inputData.fileList.size();i++){
				mainString+=TibUniToWylie(inputData.fileList[i],2);
				mainString+="\n";
			}
			return mainString;
		}
	}

	if(ocrData=="YagpoToUnicode"){
		string mainString;
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"YagpoToUnicode inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				mainString="";
                cout<<"strVector.size()="<<strVector.size()<<endl;
                int step=0;
				for(int n=0;n<strVector.size();n++){
                    if(step==strVector.size()/100){ step=0;cout<<".";}step++;
					mainString+=YagpoToUni(strVector[n]);
					mainString+="\n";
				}
				writeText(mainString, path);
                //return path;
			}
            return "done";
		}else{
			mainString="";
			for(int i=0;i<inputData.fileList.size();i++){
				mainString+=YagpoToUni(inputData.fileList[i]);
                //cout<<"mainString="<<mainString<<endl;
				mainString+="\n";
			}
			return mainString;
		}
	}

	if(ocrData=="UnicodeToYagpo"){
		string mainString;
			if(inputData.data["InputMethod"]=="fileList"){
			cout<<"YagpoToUnicode inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
                cout<<"convert "<<inputData.fileList[i]<<endl;
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				mainString="";
                int step=0;
				for(int n=0;n<strVector.size();n++){
					//mainString+=UnicodeToYagpo(strVector[n]);
                    if(step==100000){cout<<n<<" "; step=0;} step++;
                    mainString+=tibetanUTFToYagpo(strVector[n],1);
					mainString+="\n";
				}
				writeText(mainString, path);
			}
		}else{
			mainString="";
			for(int i=0;i<inputData.fileList.size();i++){
				mainString+=UnicodeToYagpo(inputData.fileList[i]);
				if(i)mainString+="\n";
			}
			return mainString;
		}
	}
    if(ocrData=="BonPDFToUni"){
		string mainString;
        if(inputData.data["InputMethod"]=="fileList"){
			cout<<"BonPDFToUni inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
                cout<<"convert "<<inputData.fileList[i]<<endl;
				path=inputData.fileList[i]+"_out.txt";
				readText(strVector, inputData.fileList[i]);
				mainString="";
				for(int n=0;n<strVector.size();n++){
					BonPDFToUni(strVector[n]);
                    mainString+=strVector[n];
					mainString+="\n";
				}
				writeText(mainString, path);
			}
            return "done";
		}else{
			mainString="";
			for(int i=0;i<inputData.fileList.size();i++){
				BonPDFToUni(inputData.fileList[i]);
                mainString+=inputData.fileList[i];
				if(i)mainString+="\n";
			}
			return mainString;
		}
        
	}
    
    if(ocrData=="tibTextCorrector"){
        string mainString,path;
        readGrammarDataXML(inputData.data["wordsDataPath"]);
        if(inputData.data["mode"]!="text"){
            cout<<"TibetanCorrector inputData.fileList.size()="<<inputData.fileList.size()<<END;
            cout<<" mode="<<inputData.data["mode"]<<endl;
            
            if(inputData.data["system"]=="process"){
                for(int i=0;i<inputData.fileList.size();i++){
                    cout<<"convert "<<inputData.fileList[i]<<endl;
                    inputData.data["fileName"]=inputData.fileList[i];
                    path=str_replace(".txt",".html",inputData.fileList[i]);
                    inputData.data["outFile"]=path;
                    TibetanCorrector();
                    string cmd="textutil -convert rtf \""+path+"\"";
                    cout<<cmd<<endl;
                    system(cmd.c_str());
                }
            }else{
                
                for(int i=0;i<inputData.fileList.size();i++){
                    //strVector.resize(0);
                    cout<<"convert "<<inputData.fileList[i]<<endl;
                    ostringstream out;
                    out<<inputData.data["rootApp"]<<" \"xml=<fileList>"<<inputData.fileList[i]<<
                    "</fileList><ocrData>"<<inputData.data["ocrData"]<<"</ocrData>"<<
                    "<ocrLn>"<<inputData.data["ocrLn"]<<"</ocrLn><system>process</system>\" &";
                    string cmd=out.str();
                    //cout<<cmd; exit(0);
                    system(cmd.c_str());
                }
            }
            return "done";
        }else{
            mainString=implode("\n",inputData.fileList);
            mainString=lineTibetanCorrector(mainString);
            return mainString;

        }
    }


    if(ocrData=="transcription"){
		string mainString,path;
        loadTransliterationFile("TranslitTableUni_Wylie.xml");
        if(inputData.data["InputMethod"]=="fileList"){
            cout<<"TranslitYagpoRus inputData.fileList.size()="<<inputData.fileList.size()<<END;
            
            for(int i=0;i<inputData.fileList.size();i++){
                //strVector.resize(0);
                cout<<"convert "<<inputData.fileList[i]<<endl;
                inputData.data["fileName"]=inputData.fileList[i];
                path=inputData.fileList[i]+"_out.txt";
                inputData.data["outFile"]=path;
                Transcription();
            }
            return "done";
        }else{
            mainString="";
            for(int i=0;i<inputData.fileList.size();i++){
                mainString+=lineTranscription(inputData.fileList[i]);
                mainString+="\n";
            }
            return mainString;
        }
	}
    
    if(ocrData=="textNormalisation"){
		string mainString,path;
        
        if(inputData.data["InputMethod"]=="fileList"){
			cout<<"textNormalisation inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){
				//strVector.resize(0);
                cout<<"convert "<<inputData.fileList[i]<<endl;
                inputData.data["fileName"]=inputData.fileList[i];
                path=inputData.fileList[i]+"_out.txt";
                inputData.data["outFile"]=path;
                string srcStr;
                readText(srcStr,inputData.data["fileName"].c_str());
				//readText(strVector, inputData.fileList[i]);
				//mainString="";
				//for(int n=0;n<strVector.size();n++){
				//	mainString+=TranslitYagpo(strVector[n]);
                //		mainString+="\n";
				//}
				//writeText(mainString, path);
                textNormalisation(srcStr);
			}
		}else{
			mainString="";
			for(int i=0;i<inputData.fileList.size();i++){
				//mainString+=TranslitYagpo();
				if(i)mainString+="\n";
			}
			return mainString;
		}
	}
    
	if(ocrData=="TXTtoXML"){
		string mainString;
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"TXTtoXML inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i]+".xml";
				path=str_replace(".doc.txt", "" ,path);
				path=str_replace(".DOC.txt", "" ,path);
				readText(strVector, inputData.fileList[i]);
				cout<<"strVector.size()="<<strVector.size()<<END;
				mainString="<text:text xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:text=\"http://www.tbrc.org/models/text#\" RID=\"\" lang=\"bo_ZH\" volume=\"\" num=\"\" fromWork=\"lam.dre\" fromVolume=\"\" start=\"0\" last=\"\">";
				for(int n=0;n<strVector.size();n++){
					if(strVector[n].find("FILE",0)==string::npos&&strVector[n].find("PAGE",0)==string::npos){
						//if(strVector[n].find("Corel",0)!=string::npos)cout<<inputData.fileList[i]<<END;
					mainString+=strVector[n];
					mainString+="\n";
					}
				}
				mainString+="</text:text>";
				cout<<path<<END;
				writeText(mainString, path);
			}
		}else{
			mainString="start\n";
			for(int i=0;i<inputData.fileList.size();i++){
				mainString+=YagpoToUni(inputData.fileList[i]);
			}
			cout<<mainString<<END;
		}
	}

	if(ocrData=="TXTtoHTML"){
		string mainString;
		if(inputData.data["InputMethod"]=="fileList"){
			cout<<"TXTtoHTML inputData.fileList.size()="<<inputData.fileList.size()<<END;
			for(int i=0;i<inputData.fileList.size();i++){

				strVector.resize(0);
				path=inputData.fileList[i]+".xml";
				path=str_replace(".doc.txt", "" ,path);
				path=str_replace(".DOC.txt", "" ,path);
				readText(strVector, inputData.fileList[i]);
				cout<<"inputData.fileList[i]="<<inputData.fileList[i]<<" strVector.size()="<<strVector.size()<<END;
				//continue;
				/*
				mainString="<text:text xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:text=\"http://www.tbrc.org/models/text#\" RID=\"\" lang=\"bo_ZH\" volume=\"\" num=\"\" fromWork=\"lam.dre\" fromVolume=\"\" start=\"0\" last=\"\">";
				for(int n=0;n<strVector.size();n++){
					if(strVector[n].find("FILE",0)==string::npos&&strVector[n].find("PAGE",0)==string::npos){
						//if(strVector[n].find("Corel",0)!=string::npos)cout<<inputData.fileList[i]<<END;
						mainString+=strVector[n];
						mainString+="\n";
					}
				}
				mainString+="</text:text>";
				cout<<path<<END;
				writeText(mainString, path);
				*/
			}
		}else{
			mainString="start\n";
			for(int i=0;i<inputData.fileList.size();i++){
				mainString+=YagpoToUni(inputData.fileList[i]);
			}
			cout<<mainString<<END;
		}
	}

	if(ocrData=="UTF8"){
		string mainString;
		if(inputData.data["InputMethod"]=="fileList"){
			DT("YagpoToUnicode inputData.fileList.size()="<<inputData.fileList.size()<<endl);
			for(int i=0;i<inputData.fileList.size();i++){
				strVector.resize(0);
				path=inputData.fileList[i];
				DT("convert path "<<path<<endl);
				readText(strVector, inputData.fileList[i]);
				mainString="";
				for(int n=0;n<strVector.size();n++){
					mainString+=Unicode_to_UTF(strVector[n]);
					mainString+="\n";
				}
				writeText(mainString, path);
			}
		}else{
			mainString="";
			for(int i=0;i<inputData.fileList.size();i++){
				mainString+=YagpoToUni(inputData.fileList[i]);
				mainString+="\n";
			}
			return mainString;
		}
	}


return "";

}//________________________________________________________________________________________________________________
예제 #3
0
void GImageEditor::imageProcess(){
    
    cout<<"START image";
    
	string strHeaderHTML,srcLine,str;
	string  path=inputData.data["tablePath"]+"/header.xml";
	replace(path.begin(),path.end(),'\\','/');
    
	if(!inputData.fileList.size()){
		readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"img");
	}
    
    //cout<<"inputData.fileList.size()="<<inputData.fileList.size();
    
#ifdef FORK11
    int maxFork=inputData.num_cores*2;  //обработка изображений легче чем OCR
    pidID *pidIDArray;
    
    
    MemoryFile *pidData_mf;  //main file for conection with child process
    pidData_mf=MemoryFile::create(inputData.data["statPath"].c_str(), MemoryFile::if_exists_keep_if_dont_exists_create);
    pidData_mf->resize(sizeof(pidID)*maxFork);
    pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes.
    
    for(int index=0;index<maxFork;index++)pidIDArray[index].status=0;
    //for(int index=0;index<maxFork;index++)cout_<<pidIDArray[index].status;
    
    
    int ID=0;
    unsigned int i=0;
    DIR *dir;
    int countFork=0;
    int status;
    while(i<inputData.fileList.size()){
        
        if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){
            cout_<<"NEW DIR#1 "<<inputData.fileList[i]<<endl;
            readDirectoryToArray(inputData.fileList, inputData.fileList[i],"img");
            i++; continue;
        }
        //cout_<<"NEW START"<<endl;
        
        inputData.data["inputFile"]=inputData.fileList[i];
        int statusFork=forkImageProcess(pidIDArray,maxFork,ID);
        if(statusFork==0){
            wait(&status);sleep(1);  //пробуем подождать
            cout<<"fork error. Try to continue;"<<endl;
            continue;
        }
        if(statusFork==3)i++;  //пропускаем битую картинку
        countFork++;
        i++;
        if(i==inputData.fileList.size())break;
        
        cout_<<" pidIDArray[0].status= "<<pidIDArray[0].status<<" >> ";
        for(int index=0;index<maxFork;index++)cout_<<pidIDArray[index].status<<" ";
        cout_<<endl;
        
        ID=100;
        while(ID==100){
            if(countFork<maxFork-1)break;             //есть свободные слоты для новых процессов
            int forkStatusCount=0;
            for(int index=0;index<maxFork;index++){   //подсчитываем количество активных процессов
                if(pidIDArray[index].status==1)forkStatusCount++;
            }
            if(forkStatusCount==countFork){sleep(1);continue;}     //нет завершенных процессов
            wait(&status);                           //регистрируем с системе завершенный процесс
            countFork--;
            for(int index=0;index<maxFork;index++){  //маркируем слот как свободный
                //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl;
                if(pidIDArray[index].status==0){
                    
                    ID=index;
                    pidIDArray[index].status=1;
                    break;
                }
            }
            
        }
    }
    
	
#else
    DIR *dir;
    int printIndex=0;
    string path0="";
    string path1="";

    string mode="tif";
    GBitmap *printPage0;
    GBitmap *printPage1;

	for(int i=0;i<inputData.fileList.size();i++){
        cout<<"inputData.fileList["<<i<<"]="<<inputData.fileList[i]<<END;
        if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){
            cout_<<"NEW DIR#1 "<<inputData.fileList[i]<<endl;
            readDirectoryToArray(inputData.fileList, inputData.fileList[i],"img");
            continue;
        }
        cout<<"NEW START"<<endl;
        inputData.data["inputFile"]=inputData.fileList[i];
        pechaImg=LoadImageData(inputData.data["inputFile"],0);
        proccessImage(pechaImg);
        if(inputData.data["fileSave"]=="print3_Pages"){
            //if(pechaImg->rows()<1200||pechaImg->rows()>1300)pechaImg->scaleFast((float)1216/pechaImg->rows());
            cout<<"printIndex="<<printIndex<<" pechaImg->rows()="<<pechaImg->rows()<<endl;

            
            if(printIndex==0){
                path=str_replace(".jpg", ".tif", inputData.data["inputFile"]);
                path=str_replace(".JPG", ".tif", path);
                path=str_replace(".jpg", ".tif", path);
                if(path.find(".tif")==-1)path=path+".tif";
                path0=str_replace(".tif", "_print0.tif", path);
                path1=str_replace(".tif", "_print1.tif", path);
                printPage0=GBitmap::create(pechaImg->columns(),pechaImg->rows()*3);
                printPage1=GBitmap::create(pechaImg->columns(),pechaImg->rows()*3);

                printPage0->drawImg(pechaImg,0, 0);
            }
            if(printIndex==1){
                cout<<" pechaImg->rows()*2="<<pechaImg->rows()*2<<endl;
                printPage1->drawImg(pechaImg,0, pechaImg->rows()*2);
            }
            
            if(printIndex==2){
                printPage0->drawImg(pechaImg,0, pechaImg->rows());
            }
            if(printIndex==3){
                printPage1->drawImg(pechaImg,0, pechaImg->rows());
            }
            if(printIndex==4){
                printPage0->drawImg(pechaImg,0, pechaImg->rows()*2);
            }
            if(printIndex==5){
                printPage1->drawImg(pechaImg,0, 0);
            }
            if(printIndex==5||i==inputData.fileList.size()-1){            //cout_<<" path="<<path<<endl;
                WriteImageData(printPage0,path0,0,mode);
                WriteImageData(printPage1,path1,0,mode);
                printPage0->destroy();
                printPage1->destroy();
                printIndex=-1;
            }
            printIndex++;
        }
        if(inputData.data["fileSave"]=="print3Pages"){
            //if(pechaImg->rows()<1200||pechaImg->rows()>1300)pechaImg->scaleFast((float)1216/pechaImg->rows());
            cout<<"printIndex="<<printIndex<<" pechaImg->rows()="<<pechaImg->rows()<<endl;
            int dY=21;
            int dX=21;
            
            if(printIndex==0){
                path=str_replace(".jpg", ".tif", inputData.data["inputFile"]);
                path=str_replace(".JPG", ".tif", path);
                path=str_replace(".jpg", ".tif", path);
                if(path.find(".tif")==-1)path=path+".tif";
                path0=str_replace(".tif", "_print0.tif", path);
                path1=str_replace(".tif", "_print1.tif", path);
                printPage0=GBitmap::create(pechaImg->columns(),pechaImg->rows()*2);
                printPage1=GBitmap::create(pechaImg->columns(),pechaImg->rows()*2);
                
                printPage0->drawImg(pechaImg,-dX, -dY);
            }
            if(printIndex==1){
                cout<<" pechaImg->rows()="<<pechaImg->rows()<<endl;
                printPage1->drawImg(pechaImg,-dX, pechaImg->rows()+dY);
            }
            
            if(printIndex==2){
                printPage0->drawImg(pechaImg,-dX, pechaImg->rows()+dY);
            }
            if(printIndex==3){
                printPage1->drawImg(pechaImg,-dX, -dY);
            }
           
            if(printIndex==3||i==inputData.fileList.size()-1){            //cout_<<" path="<<path<<endl;
                WriteImageData(printPage0,path0,0,mode);
                WriteImageData(printPage1,path1,0,mode);
                printPage0->destroy();
                printPage1->destroy();
                printIndex=-1;
            }
            printIndex++;
        }
        pechaImg->destroy();
    }
	
#endif
    //inputData.c_out.close();
    
    
	
}//______________________________________________
예제 #4
0
string GLogicProcessor::startDictionary(){

	string fileName;
	string srcLine, report,srcString;
	//int fullReport=0;
	string inputLine;
	//struct stat attrib;			// create a file attribute structure
	//int time=0,time1=0;
	//int step=0,i,j;

	int index=0;
	DIR *dir;
	int fileFlag;
	
	while(index<inputData.fileList.size()){
		if( ( dir=opendir(inputData.fileList[index].c_str()))!=NULL){
			if(inputData.data["ocrData"]=="RTFToYagpo"){
			    fileFlag=readDirectoryToArray(inputData.fileList, inputData.fileList[index],"rtf");
			}else{
			    fileFlag=readDirectoryToArray(inputData.fileList, inputData.fileList[index],"txt");
			}
			inputData.fileList.erase(inputData.fileList.begin()+index);
			continue;
		}
		index++;
	}
	
	
    string dictPath="";

	if(inputData.data["ocrData"]=="joinDict"){
		if(!inputData.fileList.size()){
			readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt");
		}
		cout<<"fileList.size()="<<inputData.fileList.size();
		strVector.resize(0); 
		mainDict.clear();
		
		for(int i=0;i<inputData.fileList.size();i++){
			inputData.data["inputFile"]=inputData.fileList[i];
			cout<<"join "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl;
			//strVector.resize(0);
			//readText(strVector,inputData.data["inputFile"]);			
			loadDictLevelFile();
			
			/*cout<<"strVector.size()="<<strVector.size()<<endl;
			 int index=0,step=0; 
			 for(int i = 0; i<strVector.size(); i++) {
			 strVector[i]=str_replace("\r", "", strVector[i]);
			 vector <string> stringItemVector;
			 stringItemVector=explode(":|:", strVector[i]);
			 if(stringItemVector.size()<2)continue;
			 
			 //cout<<"stringItemVector[1]="<<stringItemVector[1]<<" wordCount="<<
			 //mainDict[stringItemVector[1]].wordCount<<" strVector[0]="<<atoi(stringItemVector[0].c_str())<<endl;
			 
			 mainDict[stringItemVector[1]].wordCount+=atoi(stringItemVector[0].c_str());
			 if(step==100000){
			 cout<<index<<" mainDict.size()="<<mainDict.size()<<endl;
			 step=0;
			 }step++;index++;
			 }	
			 */	
			
		}
		inputData.data["inputFile"]=inputData.data["inputFolder"];
		cout<<"start save dictionary"<<endl;
		writeDictionaryTXT( mainDict);
		
	}
	if(inputData.data["ocrData"]=="buildDict"){
		if(!inputData.fileList.size()){
			readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt");
		}
		cout<<"fileList.size()="<<inputData.fileList.size();
		
		for(int i=0;i<inputData.fileList.size();i++){
			inputData.data["inputFile"]=inputData.fileList[i];
			cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl;
			strVector.resize(0); 
			mainDict.clear();
			readText(strVector,inputData.data["inputFile"]);
			//int step=0;
			cout<<"strVector.size()="<<strVector.size()<<endl;
			buildDictionary(strVector);
		}
		
	}
    
    if(inputData.data["ocrData"]=="TibetanToEngTagger"){
       inputData.data["ocrData"]="TibetanToRusTagger";
       inputData.data["ln"]="eng";
    }
	
	if(inputData.data["ocrData"]=="TibetanToRusTagger"){
        if(!inputData.fileList.size()){
            readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt");
        }
        for(int i=0;i<inputData.fileList.size();i++){
            inputData.data["inputFile"]=inputData.fileList[i];
            cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl;
            string textData;
            readText(textData,inputData.data["inputFile"]);
            //int step=0;
            mainTextTranslation(textData);
            string maket;
            readText(maket, "/_Image2OCR/edit/OSBL_Dictionary.html");
            maket=str_replace("@@@TEXT@@@", textData, maket);
            string path=inputData.data["inputFile"]+"_out.html";
            writeText(maket,path);
        }        
        return "done translation";
	}

	
	if(inputData.data["ocrData"]=="buildDictFromText"){
		if(!inputData.fileList.size()){
			readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt");
		}
		cout<<"buildDictFromText  fileList.size()="<<inputData.fileList.size()<<endl;
		
		for(int i=0;i<inputData.fileList.size();i++){
			inputData.data["inputFile"]=inputData.fileList[i];
			cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl;
			strVector.resize(0); 
			readText(strVector,inputData.data["inputFile"]);
			//int step=0;
			cout<<"strVector.size()="<<strVector.size()<<endl;
			//buildWordDictionary(strVector);
            inputData.data["ocrData"]="TibetanUTFToEng";
            buildTranslationDictionary(strVector);
			//cout<<"mainDict.size()="<<mainDict.size()<<endl;
		}
		//cout<<"start save dictionary"<<endl;
		//writeDictionaryTXT( mainDict);
		
	}
    if(inputData.data["ocrData"]=="reloadDict"){
  
    }
    
	if(inputData.data["ocrData"]=="buildHashDict"){
		if(!inputData.fileList.size()){
			readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt");
		}
		cout<<"buildDictionaryHashIndex  fileList.size()="<<inputData.fileList.size()<<endl;
		
		for(int i=0;i<inputData.fileList.size();i++){
			inputData.data["inputFile"]=inputData.fileList[i];
			cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl;
			strVector.resize(0); 
			readText(strVector,inputData.data["inputFile"]);
			//int step=0;
			cout<<"strVector.size()="<<strVector.size()<<endl;
			//buildDictionaryHashIndexUni(strVector);
			buildDictionaryHashIndex();
			cout<<"mainDict.size()="<<mainDict.size()<<endl;
		}
		cout<<"start save dictionary"<<endl;
		//writeDictionaryTXT( mainDict);
	}

	
	return "";
	
}//////////////////////////////////////////////////////////////////////////
예제 #5
0
void GMainEditor::startOneFileOCR(){

    pechaImg=LoadImageData(inputData.data["inputFile"],0); 
    cout<<"Start#1 "<<inputData.data["inputFile"]<<END; 
    if(!pechaImg){cout<<"no open file"<<inputData.data["inputFile"]<<endl; return;}
	
    inputData.data["ocrData"]="oneStringOCR";
    mainString="";
    
    vector<stringOCR>strArray;  
    int border;
    string str;
    string xmlString;
    int print=1;
    
	DT("@4_1");
    //vectorBase[0].allVectorCount=0;
	border=0; 
    setBit=GBitset::createResize(pechaImg,1,1,1);
    DT("@4_2");
    setBit->pageStringDetector(strArray,1); // Подпрограмма выделения строк и букв ПЕЧА ( РЕЛЬСЫ  )
    DT("@4_3");
    border=setBit->border_size();
    DT("@4_4");
    setBit->destroy();
    
    //получили координаты строк. Создаем новый процесс для каждой строки
    vector<int> pidID(inputData.num_cores);
    int status,pid;
    
    for(int index=0;index<pidID.size();index++){
        pidID[index]=0;
    }
    for(int a=0;a<strArray.size();a++)strArray[a].selectFlag=0;
    
    int i=strArray.size()-1;
    int processCount=0;
    while(i>=0){
        cout<<"NEW string# "<<i<<endl;
        for(int index=0;index<pidID.size();index++){
            if(pidID[index]==0){
                strArray[i].selectFlag=1;
                processCount++;
                pidID[index] = fork();
                if (pidID[index] < 0)
                    error((char*)"ERROR on fork");
                if (pidID[index] == 0)  {
                    
                    GBitsetOCR *setOCR=GBitsetOCR::createResize(pechaImg,1,1);
                    //if(NewLetterOnlyBtn->Checked==true) {mode=NEWLETTERINBOOK;}else{mode=ALL_LETTER;}
                    
                    setOCR->setData(
                                    aliKali,
                                    strArray,
                                    correctionTable,
                                    logicProcessor,
                                    iLeft,
                                    iTop,
                                    border,
                                    ALL_LETTER);
                    mainString=setOCR->mainString;
                    xmlString=setOCR->xmlString;
                    //cout<<"mainString="<<mainString<<endl;
                    ostringstream out;
                    out<<inputData.data["root"]<<"edit/OCR/_DATA/";
                    out.width(4);
                    out.fill('0');
                    out<<strArray.size()-i-1<<".html";
                    string path=out.str();
                    writeText(mainString,path);
                    out.str("");
                    out<<inputData.data["root"]<<"edit/OCR/_DATA/";
                    out.width(4);
                    out.fill('0');
                    out<<strArray.size()-i-1<<".xml";
                    path=out.str();
                    writeText(xmlString,path);
                    setOCR->destroy();
                    exit(0);
                }
                strArray[i].selectFlag=0;
                i--;if(i==-1)break;
            } 	
        }	
        
        pid=wait(&status); cout<<"new pecha";
        for(int index=0;index<pidID.size();index++){
            if(pid==pidID[index]){pidID[index]=0;processCount--;}
        }	
    }
    cout<<"start processCount="<<processCount<<endl;
    while(processCount){pid=wait(&status);
        if(pid>0){
         processCount--;
         cout<<"pid="<<pid<<" processCount="<<processCount<<endl;
        }
    }
    
    cout<<"collect all fork result in one file";
    
    vector<string>fileList;
    string path=inputData.data["root"]+"edit/OCR/_DATA/";
    int count=0;
    while(1){
       readDirectoryToArray(fileList, path,"html");
        if(fileList.size()!=strArray.size()){         
            cout<<"data not ready. has"<<fileList.size()<<" files. must be "<<strArray.size()<<" wait 2 sec."<<endl;
            count++; if(count==11)break;
             fileList.resize(0);
            sleep(2); 
        }else break;
    }
    
    for(int a=0;a<fileList.size();a++){
        string str; readText(str,fileList[a]);
        mainString+=str;
    }
    //cout<<"mainString="<<mainString<<endl;
    drawStrArray(strArray,border);
    xmlString="";  fileList.resize(0);
    readDirectoryToArray(fileList, path,"xml");
    for(int a=0;a<fileList.size();a++){
        string str; readText(str,fileList[a]);
        xmlString+=str;
    }
    //cout<<"xmlString="<<xmlString<<endl;
    writePageXML(xmlString);
    
    emptyFolder(path);

}//_________________________________