void GMainEditor::startOCRBatch(){ imageEditor=(GImageEditor*)inputData.imageEditor; fontEditor=(GFontEditor*)inputData.fontEditor; logicProcessor=(GLogicProcessor*)inputData.logicProcessor; if(!logicProcessor->dictionaryReady)logicProcessor->readDictionary(); aliKali=fontEditor->aliKali; string strHeaderHTML,srcLine,str; string path=inputData.data["tablePath"]+"/header.xml"; DIR *dir; //int mode; int i=0; //читаем статистику использования букв книги //readLetterStat(); //загружаем базу данных букв #ifdef FORK int maxFork=inputData.num_cores*0.75; int pid; pidID *pidIDArray; if(inputData.fileList.size()>1){ int countFork=0; MemoryFile *pidData_mf; //main file for conection with child process //inputData.data["statPath"].c_str() time_t seconds; seconds = time (NULL); ostringstream out; out<<"/tmp/"<<seconds; string path=out.str(); pidData_mf=MemoryFile::create(path.c_str(), MemoryFile::if_exists_keep_if_dont_exists_create); pidData_mf->resize(sizeof(pidID)*maxFork); pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes. for(int index=0;index<maxFork;index++){ //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl; pidIDArray[index].status=0; } int ID=0; while(i<inputData.fileList.size()){ cout<<"NEW file#1 "<<inputData.fileList[i]<<endl; if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){ i++; continue; } inputData.data["inputFile"]=inputData.fileList[i]; string path=inputData.data["inputFile"]; //проверяем есть ли такой распознаный файл path=substr(0,(int)path.rfind("."),path); string volume=path; string fileIndex=fileName(path); path+=".xml"; if(is_file(path)){i++; continue;} if(!forkProccesOCR_(pidIDArray,ID,maxFork)){ cout<<"ERROR on fork return"; sleep(1); continue; }; i++;if(i==inputData.fileList.size())break; countFork++; for(int index=0;index<maxFork;index++)cout<<pidIDArray[index].status<<" "; cout<<endl; ID=100; int status; while(ID==100){ if(countFork<=maxFork){ //есть свободные слоты для новых процессов for(int index=0;index<maxFork;index++){ //маркируем слот как занятый //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl; if(pidIDArray[index].status==0){ ID=index; pidIDArray[index].status=1; break; } } }else{ int forkStatusCount=0; for(int index=0;index<maxFork;index++){ //подсчитываем количество активных процессов if(pidIDArray[index].status==1)forkStatusCount++; } if(forkStatusCount==countFork){sleep(1); continue;}; //ждем завершения процесса wait(&status); //регистрируем с системе завершенный процесс countFork--; } } } }else{ inputData.data["inputFile"]=inputData.fileList[0]; pechaImg=LoadImageData(inputData.data["inputFile"],0); startOCR(pechaImg); } #else cout<<"NO FORK"; while(i<inputData.fileList.size()){ if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){ readDirectoryToArray(inputData.fileList, inputData.fileList[i],"img"); i++; continue; } GBitmap* pechaImg; inputData.data["inputFile"]=inputData.fileList[i]; if(!is_file(inputData.data["inputFile"]))continue; pechaImg=LoadImageData(inputData.data["inputFile"],0); if(!pechaImg){cout_<<"no open file"<<inputData.data["inputFile"]<<endl; return;} str=inputData.data["inputFile"]; str=substr(0,str.rfind("."),str); str+=".html"; //cout_<<str<<endl; inputData.data["inputFileName"]=inputData.data["siteName"]; inputData.data["inputFileName"]+=substr(inputData.data["siteRoot"].size(),inputData.data["inputFile"]); //readPageHTML(); inputData.c_out.open(str.c_str()); pechaDataLoaded=0; startOCR(pechaImg); pechaImg->destroy(); inputData.c_out.close(); i++; } #endif //drawLettersInCorrectionTable(DRAW_BASE); cout_<<"COMPLETE"<<endl; }//____________________________________________________________________________
string GLogicProcessor::startConvert(){ //cout<<"Start inputData.data[\"inputFolder\"]"<<inputData.data["inputFolder"]<<END; string str,path; vector<string> strVector; int index=0; DIR *dir; int fileFlag; string ocrData=inputData.data["ocrData"]; while(index<inputData.fileList.size()){ if( ( dir=opendir(inputData.fileList[index].c_str()))!=NULL){ if(ocrData=="RTFToYagpo"){ fileFlag=readDirectoryToArray(inputData.fileList, inputData.fileList[index],"rtf"); }else{ fileFlag=readDirectoryToArray(inputData.fileList, inputData.fileList[index],"txt"); } inputData.fileList.erase(inputData.fileList.begin()+index); continue; } index++; } if(ocrData=="YagpoToWylie"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"YagpoToWylieConverter inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); mainString=""; for(int i=0;i<strVector.size();i++){ mainString+=TibUniToWylie(strVector[i],2); mainString+="\n"; } writeText(mainString, path); cout<<"done convert"; } }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ mainString+=TibUniToWylie(inputData.fileList[i],2); mainString+="\n"; } return mainString; } } if(ocrData=="CXS_to_UTF"){ loadMapFilePali("CXS_UTF_HTML.xml"); cout<<"fileList.size()="<<inputData.fileList.size(); for(int i=0;i<inputData.fileList.size();i++){ inputData.data["inputFile"]=inputData.fileList[i]; cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<END; strVector.resize(0); readText(strVector,inputData.data["inputFile"]); for(int m=0;m<strVector.size();m++){ //strVector[m]= regex_replace( strVector[m], date, format ); strVector[m]=Unicode_to_UTF(strVector[m]); convertCXS_to_UTF_nocopy(strVector[m]); //cout<<strVector[m]; } writeText(strVector,inputData.data["inputFile"]); } } if(ocrData=="SinhalaUniToYagpo"){ } if(ocrData=="ConcatenateFolder"){ cout<<"fileList.size()="<<inputData.fileList.size(); ofstream srcOutput; str=inputData.data["inputFolder"]; str+="/allText.txt"; srcOutput.open(str.c_str()); for(int i=0;i<inputData.fileList.size();i++){ inputData.data["inputFile"]=inputData.fileList[i]; readText(str,inputData.data["inputFile"].c_str()); cout<<"cat "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<END; srcOutput<<str<<endl; } srcOutput.close(); } if(ocrData=="LowerCase"){ loadMapFilePali("CXS_UTF_HTML.xml"); cout<<"fileList.size()="<<inputData.fileList.size(); for(int i=0;i<inputData.fileList.size();i++){ inputData.data["inputFile"]=inputData.fileList[i]; cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<END; strVector.resize(0); readText(strVector,inputData.data["inputFile"]); int step=0; for(int m=0;m<strVector.size();m++){ //strVector[m]= regex_replace( strVector[m], date, format ); //strVector[m]=Unicode_to_UTF(strVector[m]); lowerCase_nocopy(strVector[m]); if(step==1000){ cout<<m<<"."<<strVector[m]<<END; step=0;}step++; } writeText(strVector,inputData.data["inputFile"]); } } //cout<<"inputData.data[\"ocrData\"]="<<ocrData<<END; if(ocrData=="PaliUTFToEng"){ cout<<"PaliUTFToEng"<<END; } if(ocrData=="RTFToYagpo"){ ofstream c_out; c_out.open("/_out.txt"); cout<<"RTFToYagpo inputData.fileList.size()="<<inputData.fileList.size()<<END; string mainString; for(int i=0;i<inputData.fileList.size();i++){ mainString=""; #ifdef COCOA RTFtoYagpoConverter(mainString,inputData.fileList[i]); #endif string path=inputData.fileList[i]+"_out.txt"; cout<<"path="<<path<<END; writeText(mainString, path); c_out<<report; } return "done convert"; } if(ocrData=="dWylieToYagpo"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"dWylieToYagpoConverter inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); mainString=""; int step=0; for(int i=0;i<strVector.size();i++){ if(step==1000){cout<<i<<" ";step=0;}step++; mainString+=dWylieToYagpoConverter(strVector[i]); mainString+="\n"; } writeText(mainString, path); } }else{ for(int i=0;i<inputData.fileList.size();i++){ mainString=""; mainString+=dWylieToYagpoConverter(inputData.fileList[i])+"\n"; } return mainString; } } if(ocrData=="dSinhalaASCIToYagpo"){ string mainString; string path=inputData.data["tablePath"]+"codePages/SinhalaASCI.xml"; readMapXML(SinhalaASCI,path); cout<<"SinhalaASCI.size()="<<SinhalaASCI.size()<<END; if(inputData.data["InputMethod"]=="fileList"){ cout<<"dSinhalaASCIToYagpo inputData.fileList.size()="<<inputData.fileList.size()<<END; int step=0; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); cout<<"strVector.size()="<<strVector.size()<<END; mainString=""; for(int i=0;i<strVector.size();i++){ mainString+=dSinhalaASCIToYagpo(strVector[i])+"\n"; if(step==1000){cout<<i<<" ";step=0;}step++; } writeText(mainString, path); } }else{ int step=0; for(int i=0;i<inputData.fileList.size();i++){ mainString=""; mainString+=dSinhalaASCIToYagpo(inputData.fileList[i]); if(step==1000){cout<<".";step=0;}step++; } cout<<mainString<<END; } } if(ocrData=="SinhalaUniToYagpo"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"SinhalaUniToYagpo inputData.fileList.size()="<<inputData.fileList.size()<<END; int step=0; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); cout<<"strVector.size()="<<strVector.size()<<END; mainString=""; for(int i=0;i<strVector.size();i++){ mainString+=SinhalaUniToYagpo(strVector[i],2)+"\n"; if(step==1000){cout<<i<<" ";step=0;}step++; } writeText(mainString, path); } }else{ int step=0; for(int i=0;i<inputData.fileList.size();i++){ mainString=""; mainString+=SinhalaUniToYagpo(inputData.fileList[i],2); if(step==1000){cout<<".";step=0;}step++; } return mainString; } } if(ocrData=="SinhalaMettaToYagpo"){ string mainString; string path=inputData.data["tablePath"]+"codePages/SinhalaMetta.xml"; readMapXML(SinhalaASCI,path); cout<<"SinhalaASCI.size()="<<SinhalaASCI.size()<<END; if(inputData.data["InputMethod"]=="fileList"){ cout<<"dSinhalaASCIToYagpo inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); cout<<"strVector.size()="<<strVector.size()<<END; mainString=""; int step=0; for(int i=0;i<strVector.size();i++){ //cout <<"next string "<<i<<" ="<<strVector[i]<<endl; if(strVector[i].size()){ mainString+=SinghalaASCIToYagpo(strVector[i])+"\n"; }else{mainString+="\n";} if(step==1000){cout<<i<<" ";step=0;}step++; } writeText(mainString, path); } }else{ int step=0; for(int i=0;i<inputData.fileList.size();i++){ mainString=""; mainString+=dSinhalaASCIToYagpo(inputData.fileList[i]); if(step==1000){cout<<".";step=0;}step++; } return mainString; } cout<<"DONE CONVERT"; } if(ocrData=="WylieToYagpo"){ string mainString; cout<<" @inputData.data[InputMethod]="<<inputData.data["InputMethod"]<<endl; loadTransliterationFile("TranslitTableUni_Wylie.xml"); if(inputData.data["InputMethod"]=="fileList"){ cout<<"WylieToYagpoConverter inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; cout<<" path="<<path; readText(strVector, inputData.fileList[i]); cout<<" strVector="<<strVector.size()<<endl; mainString=""; int step=0; string str; for(int i=0;i<strVector.size();i++){ if(step==100){cout<<i<<" ";step=0;}step++; str=WylieToYagpoConverter(strVector[i]); mainString+=YagpoToUni(str); mainString+="\n"; } writeText(mainString, path); cout<<"done convert"; } }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ mainString+=WylieToYagpoConverter(inputData.fileList[i])+"\n"; } return mainString; } } if(ocrData=="TibUniToWylie"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"YagpoToWylieConverter inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); mainString=""; for(int i=0;i<strVector.size();i++){ mainString+=TibUniToWylie(strVector[i],1); mainString+="\n"; } writeText(mainString, path); cout<<"done convert"; } }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ mainString+=TibUniToWylie(inputData.fileList[i],1); mainString+="\n"; } return mainString; } } if(ocrData=="YagpoToWylie"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"YagpoToWylieConverter inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); mainString=""; for(int i=0;i<strVector.size();i++){ mainString+=TibUniToWylie(strVector[i],2); mainString+="\n"; } writeText(mainString, path); cout<<"done convert"; } }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ mainString+=TibUniToWylie(inputData.fileList[i],2); mainString+="\n"; } return mainString; } } if(ocrData=="YagpoToUnicode"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"YagpoToUnicode inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); mainString=""; cout<<"strVector.size()="<<strVector.size()<<endl; int step=0; for(int n=0;n<strVector.size();n++){ if(step==strVector.size()/100){ step=0;cout<<".";}step++; mainString+=YagpoToUni(strVector[n]); mainString+="\n"; } writeText(mainString, path); //return path; } return "done"; }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ mainString+=YagpoToUni(inputData.fileList[i]); //cout<<"mainString="<<mainString<<endl; mainString+="\n"; } return mainString; } } if(ocrData=="UnicodeToYagpo"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"YagpoToUnicode inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); cout<<"convert "<<inputData.fileList[i]<<endl; path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); mainString=""; int step=0; for(int n=0;n<strVector.size();n++){ //mainString+=UnicodeToYagpo(strVector[n]); if(step==100000){cout<<n<<" "; step=0;} step++; mainString+=tibetanUTFToYagpo(strVector[n],1); mainString+="\n"; } writeText(mainString, path); } }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ mainString+=UnicodeToYagpo(inputData.fileList[i]); if(i)mainString+="\n"; } return mainString; } } if(ocrData=="BonPDFToUni"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"BonPDFToUni inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); cout<<"convert "<<inputData.fileList[i]<<endl; path=inputData.fileList[i]+"_out.txt"; readText(strVector, inputData.fileList[i]); mainString=""; for(int n=0;n<strVector.size();n++){ BonPDFToUni(strVector[n]); mainString+=strVector[n]; mainString+="\n"; } writeText(mainString, path); } return "done"; }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ BonPDFToUni(inputData.fileList[i]); mainString+=inputData.fileList[i]; if(i)mainString+="\n"; } return mainString; } } if(ocrData=="tibTextCorrector"){ string mainString,path; readGrammarDataXML(inputData.data["wordsDataPath"]); if(inputData.data["mode"]!="text"){ cout<<"TibetanCorrector inputData.fileList.size()="<<inputData.fileList.size()<<END; cout<<" mode="<<inputData.data["mode"]<<endl; if(inputData.data["system"]=="process"){ for(int i=0;i<inputData.fileList.size();i++){ cout<<"convert "<<inputData.fileList[i]<<endl; inputData.data["fileName"]=inputData.fileList[i]; path=str_replace(".txt",".html",inputData.fileList[i]); inputData.data["outFile"]=path; TibetanCorrector(); string cmd="textutil -convert rtf \""+path+"\""; cout<<cmd<<endl; system(cmd.c_str()); } }else{ for(int i=0;i<inputData.fileList.size();i++){ //strVector.resize(0); cout<<"convert "<<inputData.fileList[i]<<endl; ostringstream out; out<<inputData.data["rootApp"]<<" \"xml=<fileList>"<<inputData.fileList[i]<< "</fileList><ocrData>"<<inputData.data["ocrData"]<<"</ocrData>"<< "<ocrLn>"<<inputData.data["ocrLn"]<<"</ocrLn><system>process</system>\" &"; string cmd=out.str(); //cout<<cmd; exit(0); system(cmd.c_str()); } } return "done"; }else{ mainString=implode("\n",inputData.fileList); mainString=lineTibetanCorrector(mainString); return mainString; } } if(ocrData=="transcription"){ string mainString,path; loadTransliterationFile("TranslitTableUni_Wylie.xml"); if(inputData.data["InputMethod"]=="fileList"){ cout<<"TranslitYagpoRus inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ //strVector.resize(0); cout<<"convert "<<inputData.fileList[i]<<endl; inputData.data["fileName"]=inputData.fileList[i]; path=inputData.fileList[i]+"_out.txt"; inputData.data["outFile"]=path; Transcription(); } return "done"; }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ mainString+=lineTranscription(inputData.fileList[i]); mainString+="\n"; } return mainString; } } if(ocrData=="textNormalisation"){ string mainString,path; if(inputData.data["InputMethod"]=="fileList"){ cout<<"textNormalisation inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ //strVector.resize(0); cout<<"convert "<<inputData.fileList[i]<<endl; inputData.data["fileName"]=inputData.fileList[i]; path=inputData.fileList[i]+"_out.txt"; inputData.data["outFile"]=path; string srcStr; readText(srcStr,inputData.data["fileName"].c_str()); //readText(strVector, inputData.fileList[i]); //mainString=""; //for(int n=0;n<strVector.size();n++){ // mainString+=TranslitYagpo(strVector[n]); // mainString+="\n"; //} //writeText(mainString, path); textNormalisation(srcStr); } }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ //mainString+=TranslitYagpo(); if(i)mainString+="\n"; } return mainString; } } if(ocrData=="TXTtoXML"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"TXTtoXML inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+".xml"; path=str_replace(".doc.txt", "" ,path); path=str_replace(".DOC.txt", "" ,path); readText(strVector, inputData.fileList[i]); cout<<"strVector.size()="<<strVector.size()<<END; mainString="<text:text xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:text=\"http://www.tbrc.org/models/text#\" RID=\"\" lang=\"bo_ZH\" volume=\"\" num=\"\" fromWork=\"lam.dre\" fromVolume=\"\" start=\"0\" last=\"\">"; for(int n=0;n<strVector.size();n++){ if(strVector[n].find("FILE",0)==string::npos&&strVector[n].find("PAGE",0)==string::npos){ //if(strVector[n].find("Corel",0)!=string::npos)cout<<inputData.fileList[i]<<END; mainString+=strVector[n]; mainString+="\n"; } } mainString+="</text:text>"; cout<<path<<END; writeText(mainString, path); } }else{ mainString="start\n"; for(int i=0;i<inputData.fileList.size();i++){ mainString+=YagpoToUni(inputData.fileList[i]); } cout<<mainString<<END; } } if(ocrData=="TXTtoHTML"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ cout<<"TXTtoHTML inputData.fileList.size()="<<inputData.fileList.size()<<END; for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]+".xml"; path=str_replace(".doc.txt", "" ,path); path=str_replace(".DOC.txt", "" ,path); readText(strVector, inputData.fileList[i]); cout<<"inputData.fileList[i]="<<inputData.fileList[i]<<" strVector.size()="<<strVector.size()<<END; //continue; /* mainString="<text:text xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:text=\"http://www.tbrc.org/models/text#\" RID=\"\" lang=\"bo_ZH\" volume=\"\" num=\"\" fromWork=\"lam.dre\" fromVolume=\"\" start=\"0\" last=\"\">"; for(int n=0;n<strVector.size();n++){ if(strVector[n].find("FILE",0)==string::npos&&strVector[n].find("PAGE",0)==string::npos){ //if(strVector[n].find("Corel",0)!=string::npos)cout<<inputData.fileList[i]<<END; mainString+=strVector[n]; mainString+="\n"; } } mainString+="</text:text>"; cout<<path<<END; writeText(mainString, path); */ } }else{ mainString="start\n"; for(int i=0;i<inputData.fileList.size();i++){ mainString+=YagpoToUni(inputData.fileList[i]); } cout<<mainString<<END; } } if(ocrData=="UTF8"){ string mainString; if(inputData.data["InputMethod"]=="fileList"){ DT("YagpoToUnicode inputData.fileList.size()="<<inputData.fileList.size()<<endl); for(int i=0;i<inputData.fileList.size();i++){ strVector.resize(0); path=inputData.fileList[i]; DT("convert path "<<path<<endl); readText(strVector, inputData.fileList[i]); mainString=""; for(int n=0;n<strVector.size();n++){ mainString+=Unicode_to_UTF(strVector[n]); mainString+="\n"; } writeText(mainString, path); } }else{ mainString=""; for(int i=0;i<inputData.fileList.size();i++){ mainString+=YagpoToUni(inputData.fileList[i]); mainString+="\n"; } return mainString; } } return ""; }//________________________________________________________________________________________________________________
void GImageEditor::imageProcess(){ cout<<"START image"; string strHeaderHTML,srcLine,str; string path=inputData.data["tablePath"]+"/header.xml"; replace(path.begin(),path.end(),'\\','/'); if(!inputData.fileList.size()){ readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"img"); } //cout<<"inputData.fileList.size()="<<inputData.fileList.size(); #ifdef FORK11 int maxFork=inputData.num_cores*2; //обработка изображений легче чем OCR pidID *pidIDArray; MemoryFile *pidData_mf; //main file for conection with child process pidData_mf=MemoryFile::create(inputData.data["statPath"].c_str(), MemoryFile::if_exists_keep_if_dont_exists_create); pidData_mf->resize(sizeof(pidID)*maxFork); pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes. for(int index=0;index<maxFork;index++)pidIDArray[index].status=0; //for(int index=0;index<maxFork;index++)cout_<<pidIDArray[index].status; int ID=0; unsigned int i=0; DIR *dir; int countFork=0; int status; while(i<inputData.fileList.size()){ if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){ cout_<<"NEW DIR#1 "<<inputData.fileList[i]<<endl; readDirectoryToArray(inputData.fileList, inputData.fileList[i],"img"); i++; continue; } //cout_<<"NEW START"<<endl; inputData.data["inputFile"]=inputData.fileList[i]; int statusFork=forkImageProcess(pidIDArray,maxFork,ID); if(statusFork==0){ wait(&status);sleep(1); //пробуем подождать cout<<"fork error. Try to continue;"<<endl; continue; } if(statusFork==3)i++; //пропускаем битую картинку countFork++; i++; if(i==inputData.fileList.size())break; cout_<<" pidIDArray[0].status= "<<pidIDArray[0].status<<" >> "; for(int index=0;index<maxFork;index++)cout_<<pidIDArray[index].status<<" "; cout_<<endl; ID=100; while(ID==100){ if(countFork<maxFork-1)break; //есть свободные слоты для новых процессов int forkStatusCount=0; for(int index=0;index<maxFork;index++){ //подсчитываем количество активных процессов if(pidIDArray[index].status==1)forkStatusCount++; } if(forkStatusCount==countFork){sleep(1);continue;} //нет завершенных процессов wait(&status); //регистрируем с системе завершенный процесс countFork--; for(int index=0;index<maxFork;index++){ //маркируем слот как свободный //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl; if(pidIDArray[index].status==0){ ID=index; pidIDArray[index].status=1; break; } } } } #else DIR *dir; int printIndex=0; string path0=""; string path1=""; string mode="tif"; GBitmap *printPage0; GBitmap *printPage1; for(int i=0;i<inputData.fileList.size();i++){ cout<<"inputData.fileList["<<i<<"]="<<inputData.fileList[i]<<END; if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){ cout_<<"NEW DIR#1 "<<inputData.fileList[i]<<endl; readDirectoryToArray(inputData.fileList, inputData.fileList[i],"img"); continue; } cout<<"NEW START"<<endl; inputData.data["inputFile"]=inputData.fileList[i]; pechaImg=LoadImageData(inputData.data["inputFile"],0); proccessImage(pechaImg); if(inputData.data["fileSave"]=="print3_Pages"){ //if(pechaImg->rows()<1200||pechaImg->rows()>1300)pechaImg->scaleFast((float)1216/pechaImg->rows()); cout<<"printIndex="<<printIndex<<" pechaImg->rows()="<<pechaImg->rows()<<endl; if(printIndex==0){ path=str_replace(".jpg", ".tif", inputData.data["inputFile"]); path=str_replace(".JPG", ".tif", path); path=str_replace(".jpg", ".tif", path); if(path.find(".tif")==-1)path=path+".tif"; path0=str_replace(".tif", "_print0.tif", path); path1=str_replace(".tif", "_print1.tif", path); printPage0=GBitmap::create(pechaImg->columns(),pechaImg->rows()*3); printPage1=GBitmap::create(pechaImg->columns(),pechaImg->rows()*3); printPage0->drawImg(pechaImg,0, 0); } if(printIndex==1){ cout<<" pechaImg->rows()*2="<<pechaImg->rows()*2<<endl; printPage1->drawImg(pechaImg,0, pechaImg->rows()*2); } if(printIndex==2){ printPage0->drawImg(pechaImg,0, pechaImg->rows()); } if(printIndex==3){ printPage1->drawImg(pechaImg,0, pechaImg->rows()); } if(printIndex==4){ printPage0->drawImg(pechaImg,0, pechaImg->rows()*2); } if(printIndex==5){ printPage1->drawImg(pechaImg,0, 0); } if(printIndex==5||i==inputData.fileList.size()-1){ //cout_<<" path="<<path<<endl; WriteImageData(printPage0,path0,0,mode); WriteImageData(printPage1,path1,0,mode); printPage0->destroy(); printPage1->destroy(); printIndex=-1; } printIndex++; } if(inputData.data["fileSave"]=="print3Pages"){ //if(pechaImg->rows()<1200||pechaImg->rows()>1300)pechaImg->scaleFast((float)1216/pechaImg->rows()); cout<<"printIndex="<<printIndex<<" pechaImg->rows()="<<pechaImg->rows()<<endl; int dY=21; int dX=21; if(printIndex==0){ path=str_replace(".jpg", ".tif", inputData.data["inputFile"]); path=str_replace(".JPG", ".tif", path); path=str_replace(".jpg", ".tif", path); if(path.find(".tif")==-1)path=path+".tif"; path0=str_replace(".tif", "_print0.tif", path); path1=str_replace(".tif", "_print1.tif", path); printPage0=GBitmap::create(pechaImg->columns(),pechaImg->rows()*2); printPage1=GBitmap::create(pechaImg->columns(),pechaImg->rows()*2); printPage0->drawImg(pechaImg,-dX, -dY); } if(printIndex==1){ cout<<" pechaImg->rows()="<<pechaImg->rows()<<endl; printPage1->drawImg(pechaImg,-dX, pechaImg->rows()+dY); } if(printIndex==2){ printPage0->drawImg(pechaImg,-dX, pechaImg->rows()+dY); } if(printIndex==3){ printPage1->drawImg(pechaImg,-dX, -dY); } if(printIndex==3||i==inputData.fileList.size()-1){ //cout_<<" path="<<path<<endl; WriteImageData(printPage0,path0,0,mode); WriteImageData(printPage1,path1,0,mode); printPage0->destroy(); printPage1->destroy(); printIndex=-1; } printIndex++; } pechaImg->destroy(); } #endif //inputData.c_out.close(); }//______________________________________________
string GLogicProcessor::startDictionary(){ string fileName; string srcLine, report,srcString; //int fullReport=0; string inputLine; //struct stat attrib; // create a file attribute structure //int time=0,time1=0; //int step=0,i,j; int index=0; DIR *dir; int fileFlag; while(index<inputData.fileList.size()){ if( ( dir=opendir(inputData.fileList[index].c_str()))!=NULL){ if(inputData.data["ocrData"]=="RTFToYagpo"){ fileFlag=readDirectoryToArray(inputData.fileList, inputData.fileList[index],"rtf"); }else{ fileFlag=readDirectoryToArray(inputData.fileList, inputData.fileList[index],"txt"); } inputData.fileList.erase(inputData.fileList.begin()+index); continue; } index++; } string dictPath=""; if(inputData.data["ocrData"]=="joinDict"){ if(!inputData.fileList.size()){ readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt"); } cout<<"fileList.size()="<<inputData.fileList.size(); strVector.resize(0); mainDict.clear(); for(int i=0;i<inputData.fileList.size();i++){ inputData.data["inputFile"]=inputData.fileList[i]; cout<<"join "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl; //strVector.resize(0); //readText(strVector,inputData.data["inputFile"]); loadDictLevelFile(); /*cout<<"strVector.size()="<<strVector.size()<<endl; int index=0,step=0; for(int i = 0; i<strVector.size(); i++) { strVector[i]=str_replace("\r", "", strVector[i]); vector <string> stringItemVector; stringItemVector=explode(":|:", strVector[i]); if(stringItemVector.size()<2)continue; //cout<<"stringItemVector[1]="<<stringItemVector[1]<<" wordCount="<< //mainDict[stringItemVector[1]].wordCount<<" strVector[0]="<<atoi(stringItemVector[0].c_str())<<endl; mainDict[stringItemVector[1]].wordCount+=atoi(stringItemVector[0].c_str()); if(step==100000){ cout<<index<<" mainDict.size()="<<mainDict.size()<<endl; step=0; }step++;index++; } */ } inputData.data["inputFile"]=inputData.data["inputFolder"]; cout<<"start save dictionary"<<endl; writeDictionaryTXT( mainDict); } if(inputData.data["ocrData"]=="buildDict"){ if(!inputData.fileList.size()){ readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt"); } cout<<"fileList.size()="<<inputData.fileList.size(); for(int i=0;i<inputData.fileList.size();i++){ inputData.data["inputFile"]=inputData.fileList[i]; cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl; strVector.resize(0); mainDict.clear(); readText(strVector,inputData.data["inputFile"]); //int step=0; cout<<"strVector.size()="<<strVector.size()<<endl; buildDictionary(strVector); } } if(inputData.data["ocrData"]=="TibetanToEngTagger"){ inputData.data["ocrData"]="TibetanToRusTagger"; inputData.data["ln"]="eng"; } if(inputData.data["ocrData"]=="TibetanToRusTagger"){ if(!inputData.fileList.size()){ readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt"); } for(int i=0;i<inputData.fileList.size();i++){ inputData.data["inputFile"]=inputData.fileList[i]; cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl; string textData; readText(textData,inputData.data["inputFile"]); //int step=0; mainTextTranslation(textData); string maket; readText(maket, "/_Image2OCR/edit/OSBL_Dictionary.html"); maket=str_replace("@@@TEXT@@@", textData, maket); string path=inputData.data["inputFile"]+"_out.html"; writeText(maket,path); } return "done translation"; } if(inputData.data["ocrData"]=="buildDictFromText"){ if(!inputData.fileList.size()){ readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt"); } cout<<"buildDictFromText fileList.size()="<<inputData.fileList.size()<<endl; for(int i=0;i<inputData.fileList.size();i++){ inputData.data["inputFile"]=inputData.fileList[i]; cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl; strVector.resize(0); readText(strVector,inputData.data["inputFile"]); //int step=0; cout<<"strVector.size()="<<strVector.size()<<endl; //buildWordDictionary(strVector); inputData.data["ocrData"]="TibetanUTFToEng"; buildTranslationDictionary(strVector); //cout<<"mainDict.size()="<<mainDict.size()<<endl; } //cout<<"start save dictionary"<<endl; //writeDictionaryTXT( mainDict); } if(inputData.data["ocrData"]=="reloadDict"){ } if(inputData.data["ocrData"]=="buildHashDict"){ if(!inputData.fileList.size()){ readDirectoryToArray(inputData.fileList,inputData.data["inputFolder"],"txt"); } cout<<"buildDictionaryHashIndex fileList.size()="<<inputData.fileList.size()<<endl; for(int i=0;i<inputData.fileList.size();i++){ inputData.data["inputFile"]=inputData.fileList[i]; cout<<"convert "<<i<<" from "<<inputData.fileList.size()<<" "<<inputData.fileList[i]<<endl; strVector.resize(0); readText(strVector,inputData.data["inputFile"]); //int step=0; cout<<"strVector.size()="<<strVector.size()<<endl; //buildDictionaryHashIndexUni(strVector); buildDictionaryHashIndex(); cout<<"mainDict.size()="<<mainDict.size()<<endl; } cout<<"start save dictionary"<<endl; //writeDictionaryTXT( mainDict); } return ""; }//////////////////////////////////////////////////////////////////////////
void GMainEditor::startOneFileOCR(){ pechaImg=LoadImageData(inputData.data["inputFile"],0); cout<<"Start#1 "<<inputData.data["inputFile"]<<END; if(!pechaImg){cout<<"no open file"<<inputData.data["inputFile"]<<endl; return;} inputData.data["ocrData"]="oneStringOCR"; mainString=""; vector<stringOCR>strArray; int border; string str; string xmlString; int print=1; DT("@4_1"); //vectorBase[0].allVectorCount=0; border=0; setBit=GBitset::createResize(pechaImg,1,1,1); DT("@4_2"); setBit->pageStringDetector(strArray,1); // Подпрограмма выделения строк и букв ПЕЧА ( РЕЛЬСЫ ) DT("@4_3"); border=setBit->border_size(); DT("@4_4"); setBit->destroy(); //получили координаты строк. Создаем новый процесс для каждой строки vector<int> pidID(inputData.num_cores); int status,pid; for(int index=0;index<pidID.size();index++){ pidID[index]=0; } for(int a=0;a<strArray.size();a++)strArray[a].selectFlag=0; int i=strArray.size()-1; int processCount=0; while(i>=0){ cout<<"NEW string# "<<i<<endl; for(int index=0;index<pidID.size();index++){ if(pidID[index]==0){ strArray[i].selectFlag=1; processCount++; pidID[index] = fork(); if (pidID[index] < 0) error((char*)"ERROR on fork"); if (pidID[index] == 0) { GBitsetOCR *setOCR=GBitsetOCR::createResize(pechaImg,1,1); //if(NewLetterOnlyBtn->Checked==true) {mode=NEWLETTERINBOOK;}else{mode=ALL_LETTER;} setOCR->setData( aliKali, strArray, correctionTable, logicProcessor, iLeft, iTop, border, ALL_LETTER); mainString=setOCR->mainString; xmlString=setOCR->xmlString; //cout<<"mainString="<<mainString<<endl; ostringstream out; out<<inputData.data["root"]<<"edit/OCR/_DATA/"; out.width(4); out.fill('0'); out<<strArray.size()-i-1<<".html"; string path=out.str(); writeText(mainString,path); out.str(""); out<<inputData.data["root"]<<"edit/OCR/_DATA/"; out.width(4); out.fill('0'); out<<strArray.size()-i-1<<".xml"; path=out.str(); writeText(xmlString,path); setOCR->destroy(); exit(0); } strArray[i].selectFlag=0; i--;if(i==-1)break; } } pid=wait(&status); cout<<"new pecha"; for(int index=0;index<pidID.size();index++){ if(pid==pidID[index]){pidID[index]=0;processCount--;} } } cout<<"start processCount="<<processCount<<endl; while(processCount){pid=wait(&status); if(pid>0){ processCount--; cout<<"pid="<<pid<<" processCount="<<processCount<<endl; } } cout<<"collect all fork result in one file"; vector<string>fileList; string path=inputData.data["root"]+"edit/OCR/_DATA/"; int count=0; while(1){ readDirectoryToArray(fileList, path,"html"); if(fileList.size()!=strArray.size()){ cout<<"data not ready. has"<<fileList.size()<<" files. must be "<<strArray.size()<<" wait 2 sec."<<endl; count++; if(count==11)break; fileList.resize(0); sleep(2); }else break; } for(int a=0;a<fileList.size();a++){ string str; readText(str,fileList[a]); mainString+=str; } //cout<<"mainString="<<mainString<<endl; drawStrArray(strArray,border); xmlString=""; fileList.resize(0); readDirectoryToArray(fileList, path,"xml"); for(int a=0;a<fileList.size();a++){ string str; readText(str,fileList[a]); xmlString+=str; } //cout<<"xmlString="<<xmlString<<endl; writePageXML(xmlString); emptyFolder(path); }//_________________________________