SEXP R_ocr_alternatives(SEXP filename, SEXP r_vars, SEXP r_level) { SEXP ans = R_NilValue; Pix *image = pixRead(CHAR(STRING_ELT(filename, 0))); int i; tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); api->Init(NULL, "eng"); api->SetImage(image); SEXP r_optNames = GET_NAMES(r_vars); for(i = 0; i < Rf_length(r_vars); i++) api->SetVariable(CHAR(STRING_ELT(r_optNames, i)), CHAR(STRING_ELT(r_vars, i))); api->Recognize(0); tesseract::ResultIterator* ri = api->GetIterator(); tesseract::PageIteratorLevel level = (tesseract::PageIteratorLevel) INTEGER(r_level)[0]; int n = 1; while(ri->Next(level)) n++; ri = api->GetIterator(); SEXP names; PROTECT(names = NEW_CHARACTER(n)); PROTECT(ans = NEW_LIST(n)); i = 0; do { const char* word = ri->GetUTF8Text(level); float conf = ri->Confidence(level); SET_STRING_ELT(names, i, Rf_mkChar(word)); SET_VECTOR_ELT(ans, i, getAlternatives(ri, word, conf)); delete[] word; i++; } while (ri->Next(level)); SET_NAMES(ans, names); UNPROTECT(2); return(ans); }
bool ChemicalValidity::optimizeAtomGroup(AtomRefs& data) const { logEnterFunction(); // here we know that data contains the references to the bad atoms // we can do anything with them. // HACK for "R", probably outdated if (data.size() == 1 && data[0]->getPrintableForm() == "R") { return true; } std::vector<IterationRecord> bruteforce; std::string molecule; for (size_t u = 0; u < data.size(); u++) { std::string s = data[u]->getPrintableForm(false); molecule += s; for (size_t v = 0; v < data[u]->labels.size(); v++) { IterationRecord irec; irec.atom = u; irec.pos = v; irec.counter = 0; irec.alts = getAlternatives(data[u]->labels[v].selected_character, data[u]->labels[v].alternatives); getLogExt().append("alternatives", irec.alts); bruteforce.push_back(irec); } } getLogExt().append("bad part", molecule); if (bruteforce.empty()) return false; while (true) { // increment counter bruteforce[0].counter++; size_t idx = 0; while (bruteforce[idx].counter >= (int)bruteforce[idx].alts.size()) { bruteforce[idx].counter = 0; idx++; if (idx >= bruteforce.size()) goto not_found; bruteforce[idx].counter++; } // update atoms for (size_t u = 0; u < bruteforce.size(); u++) { if (!bruteforce[u].alts.empty()) { CharacterRecognitionEntry& cg = data[bruteforce[u].atom]->labels[bruteforce[u].pos]; cg.selected_character = bruteforce[u].alts[bruteforce[u].counter]; } } // assemble string std::string test; for (size_t u = 0; u < data.size(); u++) { test += data[u]->getPrintableForm(false); } getLogExt().append("check string", test); if (calcSplitProbability(optimalSplit(test, elements.names)) > EPS) { getLogExt().append("passed!", test); return true; } } not_found: ; return false; }