Пример #1
0
static void addNbsps(DFNode *node, int inParagraph, int *havePrecedingSpace)
{
    switch (node->tag) {
        case WORD_P:
            inParagraph = 1;
            *havePrecedingSpace = 1;
            break;
        case DOM_TEXT: {
            if (node->parent->tag != WORD_T)
                break;
            uint32_t *chars = DFUTF8To32(node->value);
            size_t length = DFUTF32Length(chars);
            for (size_t i = 0; i < length; i++) {
                if (chars[i] == ' ') {
                    if (*havePrecedingSpace)
                        chars[i] = DFNbspChar;
                    else
                        *havePrecedingSpace = 1;
                }
                else {
                    *havePrecedingSpace = 0;
                }
            }
            char *value = DFUTF32to8(chars);
            DFSetNodeValue(node,value);
            free(value);
            free(chars);
            break;
        }
    }

    for (DFNode *child = node->first; child != NULL; child = child->next)
        addNbsps(child,inParagraph,havePrecedingSpace);
}
Пример #2
0
static void removeNbsps(DFNode *node)
{
    if (node->tag == DOM_TEXT) {
        uint32_t *chars = DFUTF8To32(node->value);
        size_t length = DFUTF32Length(chars);
        for (size_t i = 0; i < length; i++) {
            if (chars[i] == DFNbspChar)
                chars[i] = ' ';
        }
        char *value = DFUTF32to8(chars);
        DFSetNodeValue(node,value);
        free(value);
        free(chars);
    }

    for (DFNode *child = node->first; child != NULL; child = child->next)
        removeNbsps(child);
}
Пример #3
0
void DFStripWhitespace(DFNode *node)
{
    if (node->tag == DOM_TEXT) {
        char *trimmed = DFStringTrimWhitespace(node->value);
        if ((strlen(trimmed) == 0) && (node->parent != NULL))
            DFRemoveNode(node);
        else
            DFSetNodeValue(node,trimmed);
        free(trimmed);
    }
    else {
        if (node->tag >= MIN_ELEMENT_TAG) {
            const char *space = DFGetAttribute(node,XML_SPACE);
            if ((space != NULL) && !strcmp(space,"preserve"))
                return;
        }
        DFNode *next;
        for (DFNode *child = node->first; child != NULL; child = next) {
            next = child->next;
            DFStripWhitespace(child);
        }
    }
}
Пример #4
0
static void extractPrefixRecursive(DFNode *node, const char *counterName, DFBuffer *result,
                                   int *foundSeq, int *foundContent)
{
    if (isSeqField(node)) {
        if (result->len > 0)
            DFBufferFormat(result," ");
        DFBufferFormat(result,"counter(%s)",counterName);
        *foundSeq = 1;
        DFRemoveNode(node);
        return;
    }

    if (node->tag == DOM_TEXT) {
        size_t valueLen = strlen(node->value);
        size_t pos = 0;

        if (*foundSeq) {
            size_t offset = 0;
            uint32_t ch;
            do {
                pos = offset;
                ch = DFNextChar(node->value,&offset);
            } while ((ch != 0) && (DFCharIsWhitespaceOrNewline(ch) || DFCharIsPunctuation(ch)));
        }
        else {
            pos = valueLen;
        }

        if (pos == valueLen) {
            if (result->len > 0)
                DFBufferFormat(result," ");
            char *quotedValue = DFQuote(node->value);
            DFBufferFormat(result,"%s",quotedValue);
            free(quotedValue);
            DFRemoveNode(node);
            if (*foundSeq)
                *foundContent = 1;
            return;
        }
        else if (pos > 0) {
            char *first = DFSubstring(node->value,0,pos);
            char *rest = DFSubstring(node->value,pos,valueLen);
            if (result->len > 0)
                DFBufferFormat(result," ");
            char *quotedFirst = DFQuote(first);
            DFBufferFormat(result,"%s",quotedFirst);
            free(quotedFirst);
            DFSetNodeValue(node,rest);
            if (*foundSeq)
                *foundContent = 1;
            free(first);
            free(rest);
            return;
        }
    }

    int wasEmpty = (node->first == NULL);
    DFNode *next;
    for (DFNode *child = node->first; child != NULL; child = next) {
        next = child->next;
        if (*foundContent)
            break;
        extractPrefixRecursive(child,counterName,result,foundSeq,foundContent);
    }
    int isEmpty = (node->first == NULL);
    if ((node->tag == HTML_SPAN) && isEmpty && !wasEmpty)
        DFRemoveNode(node);
}