bool GMainEditor::forkProccesOCR_(pidID *pidIDArray,int ID, int maxFork){ string str; pechaImg=LoadImageData(inputData.data["inputFile"],0); int pidID=fork(); if (pidID < 0){ error((char*)"ERROR on fork"); return 0; } if (pidID == 0) { str=substr(0,str.rfind("."),str); str+=".html"; cout_<<str<<endl; inputData.data["inputFileName"]=inputData.data["siteName"]; inputData.data["inputFileName"]+=substr(inputData.data["siteRoot"].size(),inputData.data["inputFile"]); //readPageHTML(); cout<<"NEW START "<<inputData.data["inputFile"]<<endl; pechaDataLoaded=0; startOCR(pechaImg); pidIDArray[ID].status=0; cout<<"done "<<ID<<endl; remove(inputData.data["log"].c_str()); exit(0); } pechaImg->destroy(); return 1; }
void GMainEditor::forkProccesOCR(int pidID,GBitmap *pechaImgID_){ if (pidID < 0) error((char*)"ERROR on fork"); if (pidID == 0) { string str=inputData.data["inputFile"]; str=substr(0,(int)str.rfind("."),str); str+=".html"; cout_<<str<<endl; inputData.data["inputFileName"]=inputData.data["siteName"]; inputData.data["inputFileName"]+=substr((int)inputData.data["siteRoot"].size(),inputData.data["inputFile"]); //readPageHTML(); cout_<<"NEW START "<<inputData.data["inputFile"]<<endl; pechaDataLoaded=0; startOCR(pechaImgID_); //if(stat)writeLetterStat(); exit(0); } }
KOCRBase::KOCRBase( QWidget *parent, KSpellConfig *spellConfig, KDialogBase::DialogType face ) :KDialogBase( face, i18n("Optical Character Recognition"), User2|Close|User1, User1, parent,0, false, true, KGuiItem( i18n("Start OCR" ), "launch", i18n("Start the Optical Character Recognition process" )), KGuiItem( i18n("Cancel" ), "stopocr", i18n("Stop the OCR Process" ))), m_animation(0L), m_metaBox(0L), m_imgHBox(0L), m_previewPix(0L), m_currImg(0L), m_spellConfig(spellConfig), m_wantSpellCfg(true), m_userWantsSpellCheck(true), m_cbWantCheck(0L), m_gbSpellOpts(0L) { kdDebug(28000) << "OCR Base Dialog!" << endl; // Layout-Boxes KConfig *konf = KGlobal::config (); KConfigGroupSaver gs( konf, CFG_OCR_KSPELL ); m_userWantsSpellCheck = konf->readBoolEntry(CFG_WANT_KSPELL, true); /* Connect signals which disable the fields and store the configuration */ connect( this, SIGNAL( user1Clicked()), this, SLOT( writeConfig())); connect( this, SIGNAL( user1Clicked()), this, SLOT( startOCR() )); connect( this, SIGNAL( user2Clicked()), this, SLOT( stopOCR() )); m_previewSize.setWidth(200); m_previewSize.setHeight(300); enableButton( User1, true ); /* start ocr */ enableButton( User2, false ); /* Cancel */ enableButton( Close, true ); }
void GMainEditor::startOCRBatch(){ imageEditor=(GImageEditor*)inputData.imageEditor; fontEditor=(GFontEditor*)inputData.fontEditor; logicProcessor=(GLogicProcessor*)inputData.logicProcessor; if(!logicProcessor->dictionaryReady)logicProcessor->readDictionary(); aliKali=fontEditor->aliKali; string strHeaderHTML,srcLine,str; string path=inputData.data["tablePath"]+"/header.xml"; DIR *dir; //int mode; int i=0; //читаем статистику использования букв книги //readLetterStat(); //загружаем базу данных букв #ifdef FORK int maxFork=inputData.num_cores*0.75; int pid; pidID *pidIDArray; if(inputData.fileList.size()>1){ int countFork=0; MemoryFile *pidData_mf; //main file for conection with child process //inputData.data["statPath"].c_str() time_t seconds; seconds = time (NULL); ostringstream out; out<<"/tmp/"<<seconds; string path=out.str(); pidData_mf=MemoryFile::create(path.c_str(), MemoryFile::if_exists_keep_if_dont_exists_create); pidData_mf->resize(sizeof(pidID)*maxFork); pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes. for(int index=0;index<maxFork;index++){ //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl; pidIDArray[index].status=0; } int ID=0; while(i<inputData.fileList.size()){ cout<<"NEW file#1 "<<inputData.fileList[i]<<endl; if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){ i++; continue; } inputData.data["inputFile"]=inputData.fileList[i]; string path=inputData.data["inputFile"]; //проверяем есть ли такой распознаный файл path=substr(0,(int)path.rfind("."),path); string volume=path; string fileIndex=fileName(path); path+=".xml"; if(is_file(path)){i++; continue;} if(!forkProccesOCR_(pidIDArray,ID,maxFork)){ cout<<"ERROR on fork return"; sleep(1); continue; }; i++;if(i==inputData.fileList.size())break; countFork++; for(int index=0;index<maxFork;index++)cout<<pidIDArray[index].status<<" "; cout<<endl; ID=100; int status; while(ID==100){ if(countFork<=maxFork){ //есть свободные слоты для новых процессов for(int index=0;index<maxFork;index++){ //маркируем слот как занятый //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl; if(pidIDArray[index].status==0){ ID=index; pidIDArray[index].status=1; break; } } }else{ int forkStatusCount=0; for(int index=0;index<maxFork;index++){ //подсчитываем количество активных процессов if(pidIDArray[index].status==1)forkStatusCount++; } if(forkStatusCount==countFork){sleep(1); continue;}; //ждем завершения процесса wait(&status); //регистрируем с системе завершенный процесс countFork--; } } } }else{ inputData.data["inputFile"]=inputData.fileList[0]; pechaImg=LoadImageData(inputData.data["inputFile"],0); startOCR(pechaImg); } #else cout<<"NO FORK"; while(i<inputData.fileList.size()){ if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){ readDirectoryToArray(inputData.fileList, inputData.fileList[i],"img"); i++; continue; } GBitmap* pechaImg; inputData.data["inputFile"]=inputData.fileList[i]; if(!is_file(inputData.data["inputFile"]))continue; pechaImg=LoadImageData(inputData.data["inputFile"],0); if(!pechaImg){cout_<<"no open file"<<inputData.data["inputFile"]<<endl; return;} str=inputData.data["inputFile"]; str=substr(0,str.rfind("."),str); str+=".html"; //cout_<<str<<endl; inputData.data["inputFileName"]=inputData.data["siteName"]; inputData.data["inputFileName"]+=substr(inputData.data["siteRoot"].size(),inputData.data["inputFile"]); //readPageHTML(); inputData.c_out.open(str.c_str()); pechaDataLoaded=0; startOCR(pechaImg); pechaImg->destroy(); inputData.c_out.close(); i++; } #endif //drawLettersInCorrectionTable(DRAW_BASE); cout_<<"COMPLETE"<<endl; }//____________________________________________________________________________
void GMainEditor::startOCRSystem(){ string strHeaderHTML,srcLine,str; string path=inputData.data["tablePath"]+"/header.xml"; DIR *dir; //int mode; int i=0; uint freeMemory; //читаем статистику использования букв книги //readLetterStat(); int maxProcess=inputData.num_cores*0.85; pidID *pidIDArray; if(inputData.data["pathDB"]!=""){ inputData.data["inputFile"]=inputData.fileList[0]; pechaImg=LoadImageData(inputData.data["inputFile"],0); startOCR(pechaImg); MemoryFile *pidData_mf=MemoryFile::create(inputData.data["pathDB"].c_str(), MemoryFile::if_exists_keep_if_dont_exists_create); int ID=atoi(inputData.data["ID"].c_str()); pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes. pidIDArray[ID].status=0; cout<<"done OCR"; inputData.log<<" done OCR"<<endl; inputData.log.close(); remove(inputData.data["log"].c_str()); exit(0); } if(inputData.fileList.size()>1){ int countProcess=0; MemoryFile *pidData_mf; //main file for inter proccess communications //inputData.data["statPath"].c_str() time_t seconds; seconds = time (NULL); ostringstream out; out<<"/tmp/"<<seconds; string pathDB=out.str(); pidData_mf=MemoryFile::create(pathDB.c_str(), MemoryFile::if_exists_keep_if_dont_exists_create); pidData_mf->resize(sizeof(pidID)*maxProcess); pidIDArray=(pidID*)pidData_mf->data(); //array which can be share between processes. for(int index=0;index<maxProcess;index++){ //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl; pidIDArray[index].status=0; } int ID=0; while(i<inputData.fileList.size()){ if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){ i++; continue; } inputData.data["inputFile"]=inputData.fileList[i]; string next=""; if(i<inputData.fileList.size()-2){ next=inputData.fileList[i+1]; next=str_replace(".tif",".html",next); next=str_replace(".jpg",".html",next); } string path=inputData.data["inputFile"]; //проверяем есть ли такой распознаный файл path=substr(0,(int)path.rfind("."),path); string volume=path; string fileIndex=fileName(path); path+=".html"; if(is_file(path)){i++; continue;} while(1){ //проверяем загрузку системы str=run("vm_stat"); vector<string>vm=explode("\n", str); str=str_replace("Pages free:", "", vm[1]); str=str_replace(" ", "", str); str=str_replace(".", "", str); freeMemory=atoi(str.c_str()); freeMemory=(freeMemory*4.096)/1000; //cout<<"@freeMemory:"<<freeMemory<<endl; if(freeMemory<10){ cout<<"critical memory loading error"; run("killall OCRLib"); exit(0); } if(freeMemory>500)break; cout<<"@ no free memory for process. freeMemory:"<<freeMemory<<endl; sleep(10); } cout<<"NEW file#1 "<<inputData.fileList[i]<<endl; ostringstream out; out<<inputData.data["rootApp"]<<" \"xml=<fileList>"<<inputData.data["inputFile"]<< "</fileList><ocrData>"<<inputData.data["ocrData"]<<"</ocrData>"<< "<ocrLn>"<<inputData.data["ocrLn"]<<"</ocrLn>"<<"<pathDB>"<<pathDB<<"</pathDB>"<< "<ID>"<<ID<<"</ID><nextPage>"<<next<<"</nextPage><scale>"<<inputData.data["scale"]<<"</scale>\" &"; string cmd=out.str(); //cout<<cmd; exit(0); system(cmd.c_str()); //sleep(1); //continue; i++;if(i==inputData.fileList.size())break; countProcess++; for(int index=0;index<maxProcess;index++)cout<<pidIDArray[index].status<<" "; cout<<endl; ID=100; int status; while(ID==100){ if(countProcess<=maxProcess){ //есть свободные слоты для новых процессов for(int index=0;index<maxProcess;index++){ //маркируем слот как занятый //cout_<<"pidIDArray["<<index<<"].status="<<pidIDArray[index].status<<endl; if(pidIDArray[index].status==0){ ID=index; pidIDArray[index].status=1; break; } } }else{ int processStatusCount=0; for(int index=0;index<maxProcess;index++){ //подсчитываем количество активных процессов if(pidIDArray[index].status==1)processStatusCount++; } if(processStatusCount==countProcess){sleep(1); continue;}; //ждем завершения процесса countProcess--; } } } }else{ i=0; //while(i<inputData.fileList.size()){ // if( ( dir=opendir(inputData.fileList[i].c_str()))!=NULL){ // i++; continue; // } inputData.data["inputFile"]=inputData.fileList[0]; pechaImg=LoadImageData(inputData.data["inputFile"],0); startOCR(pechaImg); // i++; //} } }