void fileWriter(MappingRaw* map, const int numRows, const string_t& fileName)
{
    std::map<wchar_t, wchar_t> uniqnessCheck;
    std::stringstream ss;

    for (int i = 0; i < numRows; ++i) {
        ss  << toUtf8(wstring_t(1, map[i].from))
            << "|" << toUtf8(wstring_t(1, map[i].to));
            //<< "|" << map[i].desc;
        ss << std::endl;

        auto it = uniqnessCheck.insert(std::make_pair(map[i].from, map[i].to));
        if ( !it.second ) {
            wchar_t ch = map[i].from;
            wchar_t to = it.first->second;
            std::stringstream ss;
            ss  << "Character INTCODE(" << (int) ch << "), HEX (0x" 
                << Poco::NumberFormatter::formatHex((int)ch, 4) << "), CHAR ("
                << toUtf8(wstring_t(1, ch)) << ")"
                << " already mapped to INTCODE(" << (int)to << "), HEX (0x" 
                << Poco::NumberFormatter::formatHex((int)to, 4) << "), CHAR ("
                << toUtf8(wstring_t(1, to)) << ")";

            throw std::logic_error(ss.str().c_str());
        }
    }

    writeFileAsBinary(fileName, ss.str());
}
Exemple #2
0
void Converter::convertSingleDocQuick( const string_t& fileName )
{
	tDocumentsSp docs = word()->getDocuments();
    tDocumentSp  doc  = docs->open(toUtf16(getInputAbsPath(fileName)));
    if (!doc) {
        logError(logger(), "Error while opening document: " + fileName);
        return;
    }

    tCharMappingSp cm;
    string_t       fontName, newFontName;
    wstring_t      text, textUnicode, docAsText;
    int            c = 0;

	tSelectionSp s = word()->getSelection();
    int pos = 0;
    int totalCharsQty = s->getStoryLength();

    do {
        s->setStart(pos);
        s->setEnd(pos + 1);
        s->selectCurrentFont();
        fontName = s->getFont()->getName();

        if ( canSkipFont(fontName) ) {
            //s->getFont()->haveCommonAttributes();
            pos = s->getEnd();
            docAsText += s->getText();
            std::cout << "\r" << percentageStr(pos, totalCharsQty - 1);
            continue;
        }

        text = s->getText();
        if ( fontName.empty() ) {
            saveSelection(s);
            fontName = makeGuess(s);            
            restoreSelection(s);

            /// if after all we have empty font name, log about that event
            /// and go forward
            if (fontName.empty()) {
                logError(logger(), "EMPTY FONT NAME: Investigate");
                pos = s->getEnd();
                docAsText += text;
                std::cout << "\r" << percentageStr(pos, totalCharsQty - 1);
                continue;
            }
        }

        /// use mapping
        textUnicode.clear();
        cm = getCM(fontName);
        if (cm) {
            bool spacingOnly = cm->doConversion(text, textUnicode, fontName);
            newFontName = getFontSubstitution(cm, fontName);
            //tFontSp fontDup = s->getFont()->duplicate();
            s->setText(textUnicode);
            //s->getFont()->haveCommonAttributes();
            s->getFont()->setName(newFontName);
            //s->setFont(fontDup);
        }

        /// extract text from the document as well
        docAsText += textUnicode;
        pos = s->getEnd();

        std::cout << "\r" << percentageStr(pos, totalCharsQty - 1);
    } while ( pos < totalCharsQty - 1 );


    /// -------------------------------------------///
    /// now save result in the appropriate folder  ///
    string_t outputDir = getOutputAbsPath(fileName);
    Poco::File(outputDir).createDirectories();
    Poco::Path p(fileName);
    doc->saveAs( outputDir + p.getBaseName() + " QUICK." + p.getExtension() );
    doc->close();

    if ( config_->getBool("app.saveAlsoAsUTF8", false) )
        writeFileAsBinary( outputDir + p.getBaseName() + " UTF8 QUICK.txt", toUtf8(docAsText));
}
Exemple #3
0
void Converter::convertSingleDocPrecise( const string_t& fileName )
{
#ifdef SECURITY_ENABLED
    if ( !security_.getKey().updateCounters(0) ) {
        logContent(security_.getKey());
        return;
    }
#endif

	tDocumentsSp docs = word()->getDocuments();
    tDocumentSp  doc  = docs->open(toUtf16(getInputAbsPath(fileName)));
    if (!doc) {
        logError(logger(), "Error while opening document: " + fileName);
        return;
    }

    /// -------------------------------------------///
    usedFonts_.clear();
    wstring_t docAsText;
//     tParagraphsSp paragraphs = doc->getParagraphs();
//     int count = paragraphs->getCount();

//     tSentencesSp sentences = doc->getSentences();
//     int sentCount = sentences->getCount();

//     for (int i = 1; i <= count; ++i) {
//         tParagraphSp p = paragraphs->getItem(i);
//         docAsText += processRangePrecise(p->getRange(), false);
//         std::cout << "\r" << percentageStr(i, count);
//     }

    tRangeSp r = doc->getContent();
    int64 totalBytes = r->getStoryLength();

    docAsText += processRangePreciseVer2(r, true);

    std::cout << std::endl;


    /// footnotes
    logInfo(logger(), "Processing [footnotes]: ");

    tFootnotesSp footnots = doc->getFootnotes();
    int notesCount = footnots->getCount();
    for (int i = 1; i <= notesCount; ++i) {
        tNoteSp note = footnots->getItem(i);
        tRangeSp r = note->getRange();
        processRangePreciseVer2(r, false);
        std::cout << "\r" << percentageStr(i, notesCount);
    }
    if (notesCount > 0)
        std::cout << std::endl;

    tSectionsSp sections = doc->getSections();
    int sectionsCount = sections->getCount();
    for (int i = 1; i <= sectionsCount; ++i) {
        tSectionSp section(new Section(sections->getItem(i)));

        tHeadersFootersSp hfs = section->getHeaders();
        if (hfs) {
            logInfo(logger(), "Processing [headers]: ");
            tHeaderFooterSp hf( new HeaderFooter(hfs->getItem(1)) );
            tRangeSp r = hf->getRange();
            processRangePreciseVer2(r, false);
        }

        hfs = section->getFooters();
        if (hfs) {
            logInfo(logger(), "Processing [footers]: ");
            tHeaderFooterSp hf( new HeaderFooter(hfs->getItem(1)) );
            tRangeSp r = hf->getRange();
            processRangePreciseVer2(r, false);
        }
    }

#ifdef SECURITY_ENABLED
    security_.getKey().updateCounters(totalBytes);
#endif

    logUsedFonts(fileName, usedFonts_);
    usedFonts_.clear();

    /// now save result in the appropriate folder
    string_t outputDir = getOutputAbsPath(fileName);
    Poco::File(outputDir).createDirectories();
    Poco::Path p(fileName);
    logInfo(logger(), "Saving document...");
    doc->saveAs( outputDir + p.getBaseName() + " UNICODE." + p.getExtension() );
    doc->close();
    logInfo(logger(), "Save was successful.");
    
    if ( wantUtf8Text_ )
        writeFileAsBinary( outputDir + p.getBaseName() + " UTF8.txt", toUtf8(docAsText));

#ifdef SECURITY_ENABLED
    logContent(security_.getKey());
#endif
}