Example #1
0
SEXP
R_ocr_alternatives(SEXP filename, SEXP r_vars, SEXP r_level)
{
  SEXP ans = R_NilValue; 
  Pix *image = pixRead(CHAR(STRING_ELT(filename, 0)));
  int i;

  tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
  api->Init(NULL, "eng");
  api->SetImage(image);

  SEXP r_optNames = GET_NAMES(r_vars);
  for(i = 0; i < Rf_length(r_vars); i++) 
      api->SetVariable(CHAR(STRING_ELT(r_optNames, i)), CHAR(STRING_ELT(r_vars, i)));

  api->Recognize(0);

  tesseract::ResultIterator* ri = api->GetIterator();
  tesseract::PageIteratorLevel level = (tesseract::PageIteratorLevel) INTEGER(r_level)[0];

    int n = 1;
    while(ri->Next(level))
        n++;

    ri = api->GetIterator();
    SEXP names;
    PROTECT(names = NEW_CHARACTER(n));
    PROTECT(ans = NEW_LIST(n));
    i = 0;
    do {
      const char* word = ri->GetUTF8Text(level);
      float conf = ri->Confidence(level);
      SET_STRING_ELT(names, i, Rf_mkChar(word));
      SET_VECTOR_ELT(ans, i, getAlternatives(ri, word, conf));
      delete[] word;
      i++;
    } while (ri->Next(level));

    SET_NAMES(ans, names);
    UNPROTECT(2);

 return(ans);
}
Example #2
0
	bool ChemicalValidity::optimizeAtomGroup(AtomRefs& data) const
	{
		logEnterFunction();

		// here we know that data contains the references to the bad atoms
		// we can do anything with them.

		// HACK for "R", probably outdated
		if (data.size() == 1 && data[0]->getPrintableForm() == "R")
		{
			return true;
		}

		std::vector<IterationRecord> bruteforce;

		std::string molecule;
		for (size_t u = 0; u < data.size(); u++)
		{
			std::string s = data[u]->getPrintableForm(false);
			molecule += s;
			for (size_t v = 0; v < data[u]->labels.size(); v++)
			{
				IterationRecord irec;
				irec.atom = u;
				irec.pos = v;
				irec.counter = 0;
				irec.alts = getAlternatives(data[u]->labels[v].selected_character, data[u]->labels[v].alternatives);
				getLogExt().append("alternatives", irec.alts);
				bruteforce.push_back(irec);
			}
		}
		
		getLogExt().append("bad part", molecule);

		if (bruteforce.empty())
			return false;

		while (true)
		{
			// increment counter
			bruteforce[0].counter++;
			size_t idx = 0;
			while (bruteforce[idx].counter >= (int)bruteforce[idx].alts.size())
			{
				bruteforce[idx].counter = 0;
				idx++;
				if (idx >= bruteforce.size())
					goto not_found;
				bruteforce[idx].counter++;
			}

			// update atoms
			for (size_t u = 0; u < bruteforce.size(); u++)
			{
				if (!bruteforce[u].alts.empty())
				{
					CharacterRecognitionEntry& cg = data[bruteforce[u].atom]->labels[bruteforce[u].pos];
					cg.selected_character = bruteforce[u].alts[bruteforce[u].counter];
				}
			}

			// assemble string
			std::string test;
			for (size_t u = 0; u < data.size(); u++)
			{
				test += data[u]->getPrintableForm(false);				
			}

			getLogExt().append("check string", test);

			if (calcSplitProbability(optimalSplit(test, elements.names)) > EPS)
			{
				getLogExt().append("passed!", test);
				return true;
			}
		}

		not_found: ;
		
		return false;
	}