Exemple #1
0
static void putBookmark( LVStream * stream, CRBookmark * bmk )
{
    static const char * tnames[] = {"lastpos", "position", "comment", "correction"};
    const char * tname = bmk->getType()>=bmkt_lastpos && bmk->getType()<=bmkt_correction ? tnames[bmk->getType()] : "unknown";
    char bmktag[256];
    sprintf(bmktag, "bookmark type=\"%s\" percent=\"%d.%02d%%\" timestamp=\"%d\" shortcut=\"%d\" page=\"%d\"", tname,
            bmk->getPercent()/100, bmk->getPercent()%100,
            (int)bmk->getTimestamp(), (int)bmk->getShortcut(), (int)bmk->getBookmarkPage());
    putTag(stream, 3, bmktag);
    putTagValue( stream, 4, "start-point", bmk->getStartPos() );
    putTagValue( stream, 4, "end-point", bmk->getEndPos() );
    putTagValue( stream, 4, "header-text", bmk->getTitleText() );
    putTagValue( stream, 4, "selection-text", bmk->getPosText() );
    putTagValue( stream, 4, "comment-text", bmk->getCommentText() );
    putTag(stream, 3, "/bookmark");
}
Exemple #2
0
bool CRFileHist::saveToStream( LVStream * targetStream )
{
    LVStreamRef streamref = LVCreateMemoryStream(NULL, 0, false, LVOM_WRITE);
    LVStream * stream = streamref.get();
    const char * xml_hdr = "\xef\xbb\xbf<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<FictionBookMarks>\r\n";
    const char * xml_ftr = "</FictionBookMarks>\r\n";
    //const char * crlf = "\r\n";
    *stream << xml_hdr;
    for ( int i=0; i<_records.length(); i++ ) {
        CRFileHistRecord * rec = _records[i];
        putTag( stream, 1, "file" );
        putTag( stream, 2, "file-info" );
        putTagValue( stream, 3, "doc-title", rec->getTitle() );
        putTagValue( stream, 3, "doc-author", rec->getAuthor() );
        putTagValue( stream, 3, "doc-series", rec->getSeries() );
        putTagValue( stream, 3, "doc-filename", rec->getFileName() );
        putTagValue( stream, 3, "doc-filepath", rec->getFilePath() );
        putTagValue( stream, 3, "doc-filesize", lString16::itoa( (unsigned int)rec->getFileSize() ) );
        putTag( stream, 2, "/file-info" );
        putTag( stream, 2, "bookmark-list" );
        putBookmark( stream, rec->getLastPos() );
        for ( int j=0; j<rec->getBookmarks().length(); j++) {
            CRBookmark * bmk = rec->getBookmarks()[j];
            putBookmark( stream, bmk );
        }
        putTag( stream, 2, "/bookmark-list" );
        putTag( stream, 1, "/file" );
    }
    *stream << xml_ftr;
    LVPumpStream( targetStream, stream );
    return true;
}
Exemple #3
0
/** This method replaces all the tags in the UTF-8 encoded input stream by spaces or underscores in the output stream.
Underscores are written when the tag is adjacent to a word (directly or by another) tag. Spaces are written otherwise.
If an unclosed tag finishes the input stream, the result is written but an error message is written in <I>status</I> and a non zero-value is returned.
If an opening '<' is found inside a tag, it is ignored (replaced) but an error message is written in <I>status</I> and a non zero-value will be returned.
If a closing '>' is found outside a tag, it is ignored (replaced) but an error message is written in <I>status</I> and a non zero-value will be returned.
This method does not modify its receiver and thus is constant.
Throws a runtime_error in case of an unknown internal state
It does not record the removed tags. Should be done in another method;

@param std::ostream& status the stream where errors and warning will be written
@param std::istream& input the input stream. Should contain a valid SGML file
@param std::ostream& output the output stream. The result stream where un-tagged results are written
@return int 0 if there is no error nor warning ; non-zero otherwise.
 */
int TextFormater::untaggingWithSpaces(std::ostream& status, std::istream& input,
        std::ostream& output, bool wide, 
        const std::string& endSentenceTag) const
{
  LIMA_UNUSED(wide);
    LILOGINIT;
    setlocale(LC_CTYPE,"fr_FR.UTF-8");
    
    size_t nb = 0; // the number of chars in the current tag when we cannot know
                            // if it is bond to the following text (tag after a white space)
    size_t nbNewLines = 0; // the number of newlines in the current tag when we cannot know
                            // if it is bond to the following text (tag after a white space)
    size_t position=0; // position in the input stream;
    enum RetVal {SUCCESS, INVALID_OPENING_TAG_CHAR, INVALID_CLOSING_TAG_CHAR,
                    DUPLICATED_OPENING_TAG_CHAR, DUPLICATED_CLOSING_TAG_CHAR,
                    UNCLOSED_OPENING_TAG_CHAR};
    RetVal retVal = SUCCESS;
    
    enum Etat {TEXT, DEBCOL, FINCOL, BLANC, DEBBLANC, FINBLANC, BEGENTITY, ENTITY};
    Etat etat = BLANC;

    char carLu;
    char carLu2; 
    std::string s;
    
    std::ostringstream txt;
    std::ostringstream tag;
    std::ostringstream tagValue;
    std::ostringstream entity;
    
    while (input.good())
    {
        input.get(carLu);
        LDEBUG << carLu;
        if (input.eof()) continue;
        switch (etat)
        {
            case TEXT:
                switch (carLu)
                {
                    case '<':
                        output << txt.str();
                        txt.str("");
                        tag << '_';
                        tag << ' ';
                        tagValue << carLu;
                        LDEBUG << "TEXT-> DEBCOL" << LENDL;
                        etat = DEBCOL;
                    break;
                    case '>':
                        txt << carLu;
                        status << "Invalid '>' character at " << position << std::endl;
                        retVal = INVALID_CLOSING_TAG_CHAR;
                    break;
                    case '&':
                        output << txt.str();
                        txt.str("");
                        entity << carLu;
                        LDEBUG << "TEXT -> BEGENTITY;" << LENDL;
                        etat = BEGENTITY;
                    break;
                    case ' ':case '\t':case '\n':
                        output << txt.str();;
                        output << carLu;
                        txt.str("");
                        LDEBUG << "TEXT-> BLANC" << LENDL;
                        etat = BLANC;
                    break;
                    default:
                        txt << carLu;
                    break;
                }
            break;
            case BEGENTITY:
                switch (carLu)
                {
                    case 'A':;case 'B':;case 'C':;case 'D':;case 'E':;case 'F':;case 'G':;
                    case 'H':;case 'I':;case 'J':;case 'K':;case 'L':;case 'M':;case 'N':;
                    case 'O':;case 'P':;case 'Q':;case 'R':;case 'S':;case 'T':;case 'U':;
                    case 'V':;case 'W':;case 'X':;case 'Y':;case 'Z':;
                    case 'a':;case 'b':;case 'c':;case 'd':;case 'e':;case 'f':;case 'g':;
                    case 'h':;case 'i':;case 'j':;case 'k':;case 'l':;case 'm':;case 'n':;
                    case 'o':;case 'p':;case 'q':;case 'r':;case 's':;case 't':;case 'u':;
                    case 'v':;case 'w':;case 'x':;case 'y':;case 'z':
                        entity << carLu;
                        LDEBUG << "BEGENTITY-> ENTITY" << LENDL;
                        etat = ENTITY;
                    break;
                    case '<':
                        output.put('_');
                        output.put(' ');
                        entity.str("");
                        tag << '_';
                        tag << ' ';
                        LDEBUG << "BEGENTITY-> DEBCOL" << LENDL;
                        etat = DEBCOL;
                    break;
                    case ' ':case '\t':case '\n':
//                        output.put('_');
                        output.put(' ');
                        output << carLu;
                        entity.str("");
                        LDEBUG << "BEGENTITY-> BLANC" << LENDL;
                        etat = BLANC;
                    break;
                    default:
//                        output.put('_');
                        output.put(' ');
                        output << carLu;
                        entity.str("");
                        LDEBUG << "BEGENTITY-> TEXT" << LENDL;
                        etat = TEXT;
                    break;
                }
            break;
            case ENTITY:
                switch (carLu)
                {
                    case 'A':;case 'B':;case 'C':;case 'D':;case 'E':;case 'F':;case 'G':;
                    case 'H':;case 'I':;case 'J':;case 'K':;case 'L':;case 'M':;case 'N':;
                    case 'O':;case 'P':;case 'Q':;case 'R':;case 'S':;case 'T':;case 'U':;
                    case 'V':;case 'W':;case 'X':;case 'Y':;case 'Z':;
                    case 'a':;case 'b':;case 'c':;case 'd':;case 'e':;case 'f':;case 'g':;
                    case 'h':;case 'i':;case 'j':;case 'k':;case 'l':;case 'm':;case 'n':;
                    case 'o':;case 'p':;case 'q':;case 'r':;case 's':;case 't':;case 'u':;
                    case 'v':;case 'w':;case 'x':;case 'y':;case 'z':
                        entity << carLu;
                    break;
                    case '<':
                        for (uint64_t i = 0; i < entity.str().size(); i++)
                        {
//                            output << '_';
                            output << ' ';
                        }
                        entity.str("");
//                        tag << '_';
                        tag << ' ';
                        LDEBUG << "ENTITY-> DEBCOL" << LENDL;
                        etat = DEBCOL;
                    break;
                    case ' ':case '\t':case '\n':
                        for (uint64_t i = 0; i < entity.str().size(); i++)
                        {
//                            output << '_'; 
                            output << ' '; 
                        }
                        output << carLu;
                        entity.str("");
                        LDEBUG << "ENTITY-> BLANC" << LENDL;
                        etat = BLANC;
                    break;
                    case ';':
                        for (uint64_t i = 0; i < entity.str().size()+1; i++)
                        {
//                            output << '_'; 
                            output << ' '; 
                        }
                        entity.str("");
                        LDEBUG << "ENTITY-> TEXT" << LENDL;
                        etat = TEXT;
                    break;
                    default:
                        for (uint64_t i = 0; i < entity.str().size(); i++)
                        {
//                            output << '_'; 
                            output << ' '; 
                        }
                        output << carLu;
                        entity.str("");
                        LDEBUG << "ENTITY-> TEXT" << LENDL;
                        etat = TEXT;
                    break;
                }
            break;
            case DEBCOL:
                tagValue << carLu;
                switch (carLu)
                {
                    case '<':
//                        tag << '_';
                        tag << ' ';
                        status << "Invalid '<' character at " << position << std::endl;
                        retVal = DUPLICATED_OPENING_TAG_CHAR;
                    break;
                    case '>':
//                        tag << '_';
                        tag << ' ';
                        LDEBUG << "DEBCOL-> FINCOL" << LENDL;
                        etat = FINCOL;
                    break;
                    case ' ':case '\t':case '\n':
                        tag << carLu;
                    break;
                    default:
                        LDEBUG << "Looking at " << carLu << LENDL;
                        char buf[MB_LEN_MAX];
                        buf[0] = carLu;
                        input.rdbuf()-> sgetn(buf+1, 9);
                        wchar_t mbc;
                        int transRes = mbtowc(&mbc, buf, MB_LEN_MAX);
                        LDEBUG << "transres value is " << transRes << LENDL;
                        if (transRes > 1)
                        {
                            LDEBUG << "Got a multibyte char inside tag: " << mbc << LENDL;
                            for (int i = 1; i < transRes; i++)
                            {
                                input.get(carLu);
                            }
                        }
//                        tag << '_';
                        tag << ' ';
                    break;
                }
            break;
            case FINCOL:
                switch (carLu)
                {
                    case '<':
//                        tag << '_';
                        tag << ' ';
                        tagValue << carLu;
                        LDEBUG << "FINCOL-> DEBCOL" << LENDL;
                        etat = DEBCOL;
                    break;
                    case '>':
//                        tag << '_';
                        tag << ' ';
                        tagValue << carLu;
                        status << "Invalid '>' character at " << position << std::endl;
                        retVal = DUPLICATED_CLOSING_TAG_CHAR;
                    break;
                    case ' ':case '\t':case '\n':
                        putTag(status, output, wide, endSentenceTag, tag, tagValue);
                        output << carLu;
                        LDEBUG << "FINCOL-> BLANC" << LENDL;
                        etat = BLANC;
                    break;
                    case '&':
                        putTag(status, output, wide, endSentenceTag, tag, tagValue);
                        entity << carLu;
                        LDEBUG << "FINCOL -> BEGENTITY;" << LENDL;
                        etat = BEGENTITY;
                    break;
                    default:
                        putTag(status, output, wide, endSentenceTag, tag, tagValue);
                        txt << carLu;
                        tagValue << carLu;
                        LDEBUG << "FINCOL-> TEXT" << LENDL;
                        etat = TEXT;
                    break;
                }
            break;
            case BLANC:
                switch (carLu)
                {
                    case '&':
                        entity << carLu;
                        LDEBUG << "BLANC -> BEGENTITY;" << LENDL;
                        etat = BEGENTITY;
                    break;
                    case '<':
                        nb = 1;
                        nbNewLines = 0;
                        tagValue << carLu;
                        LDEBUG << "BLANC-> DEBBLANC" << LENDL;
                        etat = DEBBLANC;
                    break;
                    case '>':
                        output << '>';
                        status << "Invalid '>' character at " << position << std::endl;
                        retVal = INVALID_CLOSING_TAG_CHAR;
                        LDEBUG << "BLANC-> TEXT" << LENDL;
                        etat = TEXT;
                    break;
                    case ' ':case '\t':case '\n':
                        output << carLu;
                    break;
                    default:
                        txt << carLu;
                        LDEBUG << "BLANC-> TEXT" << LENDL;
                        etat = TEXT;
                    break;
                }
            break;
            case DEBBLANC:
                tagValue << carLu;
                switch (carLu)
                {
                    case '<':
                        nb++;
                        status << "Duplicated '<' character at " << position << std::endl;
                        retVal = DUPLICATED_OPENING_TAG_CHAR;
                    break;
                    case '>':
                        nb++;
                        LDEBUG << "DEBBLANC-> FINBLANC" << LENDL;
                        etat = FINBLANC;
                    break;
                    case '\n':
                        nbNewLines++;
                    break;
                    default:
                        LDEBUG << "Looking at " << carLu << LENDL;
                        char buf[MB_LEN_MAX];
                        buf[0] = carLu;
                        std::streamsize got = input.rdbuf()-> sgetn(buf+1, 9);
                        for (std::streamsize i = 0; i < got ; i++)
                            input.rdbuf()-> sungetc();
                        for (uint64_t i = 0; i<MB_LEN_MAX; i++)
                            LDEBUG << buf[i];
                        LDEBUG << LENDL;
                        wchar_t mbc;
                        int transRes = mbtowc(&mbc, buf, MB_LEN_MAX);
                        LDEBUG << "transres  is " << transRes << LENDL;
                        if (transRes > 1)
                        {
                            LDEBUG << "Got a multibyte char inside tag: " << mbc << LENDL;
                            for (int i = 1; i < transRes; i++)
                            {
                                input.get(carLu);
                            }
                        }
                        nb++;
                    break;
                }
            break;
            case FINBLANC:
                switch (carLu)
                {
                    case '<':
                        nb++;
                        tagValue << carLu;
                        LDEBUG << "FINBLANC-> DEBBLANC" << LENDL;
                        etat = DEBBLANC;
                    break;
                    case '>':
                        status << "Duplicated '>' character at " << position << std::endl;
                        retVal = DUPLICATED_CLOSING_TAG_CHAR;
                        putWhites(status, output, wide,endSentenceTag, tagValue,
                                output, carLu, ' ', nb, nbNewLines);
                        LDEBUG << "FINBLANC-> TEXT" << LENDL;
                        etat = TEXT;
                    break;
                    case ' ':case '\t':case '\n':
                        putWhites(status, output, wide,endSentenceTag, tagValue,
                                output, carLu, ' ', nb, nbNewLines);
                        LDEBUG << "FINBLANC-> BLANC" << LENDL;
                        etat = BLANC;
                    break;
                    case '&':
//                        putWhites(status, output, wide,endSentenceTag, tagValue,
//                                entity, carLu, '_', nb, nbNewLines);
                        putWhites(status, output, wide,endSentenceTag, tagValue,
                                entity, carLu, ' ', nb, nbNewLines);
                        LDEBUG << "FINBLANC -> BEGENTITY;" << LENDL;
                        etat = BEGENTITY;
                    break;
                    default:
//                        putWhites(status, output, wide,endSentenceTag, tagValue,
//                                txt, carLu, '_', nb, nbNewLines);
                        putWhites(status, output, wide,endSentenceTag, tagValue,
                                txt, carLu, ' ', nb, nbNewLines);
                        LDEBUG << "FINBLANC-> TEXT" << LENDL;
                        etat = TEXT;
                    break;
                }
            break;
            default:
                throw std::runtime_error((std::string("unknown state %d.\n", int(etat))).c_str());
        }
        ++position;
    }
    if ( (etat == DEBCOL) || (etat == DEBBLANC) )
    {
        status << "Unclosed tag at EOF (" << position << ")" << std::endl;
        retVal = UNCLOSED_OPENING_TAG_CHAR;
        if (etat == DEBCOL) 
        {
          output << tag.str();
        }
        else
        {
//                s = std::string(nb/2, '_');
            s = std::string(nb/2, ' ');
            s.append(nbNewLines, '\n');
//                if (nb%2 == 0) s.append(nb/2, '_');
            if (nb%2 == 0) s.append(nb/2, ' ');
//                else s.append(nb/2 + 1, '_');
            else s.append(nb/2 + 1, ' ');
            output << s;
            nb = 0;
            nbNewLines = 0;
        }
    }

    else if ((etat == FINCOL) || (etat == FINBLANC))
    {
      output << tag.str();
    }

    else if (etat == TEXT)
    {
      output << txt.str();
    }
    else {} // BLANC nothing to do

    return int(retVal);
}