Beispiel #1
0
static int parsePackage(TextPackage *package, const char *string, const char *path, DFError **error)
{
    DFBuffer *replaced = DFBufferNew();
    if (!strcmp(path,""))
        path = ".";

    if (!processIncludes(package,string,replaced,path,error)) {
        DFBufferRelease(replaced);
        return 0;
    }


    char *currentKey = strdup("");
    DFBuffer *currentValue = DFBufferNew();
    const char **lines = DFStringSplit(replaced->data,"\n",0);
    for (int lineno = 0; lines[lineno]; lineno++) {
        const char *line = lines[lineno];

        if (!DFStringHasPrefix(line,"#")) {
            DFBufferFormat(currentValue,"%s\n",line);
        }
        else if (DFStringHasPrefix(line,"#item ")) {
            package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *));
            package->keys[package->nkeys++] = strdup(currentKey);
            package->keys[package->nkeys] = NULL;
            DFHashTableAdd(package->items,currentKey,currentValue->data);
            free(currentKey);
            DFBufferRelease(currentValue);
            currentKey = DFSubstring(line,6,strlen(line));
            currentValue = DFBufferNew();
        }
        else if (DFStringHasPrefix(line,"##")) {
            DFBufferFormat(currentValue,"%s\n",&line[1]);
        }
        else {
            DFErrorFormat(error,"Unknown command: %s on line %d",line,(lineno+1));
            return 0;
        }
    }
    package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *));
    package->keys[package->nkeys++] = strdup(currentKey);
    package->keys[package->nkeys] = NULL;
    DFHashTableAdd(package->items,currentKey,currentValue->data);

    free(lines);
    free(currentKey);
    DFBufferRelease(currentValue);
    DFBufferRelease(replaced);
    return 1;
}
Beispiel #2
0
static int processIncludes(TextPackage *package, const char *input, DFBuffer *output, const char *path, DFError **error)
{
    int ok = 1;
    const char **lines = DFStringSplit(input,"\n",0);
    for (int lineno = 0; lines[lineno] && ok; lineno++) {
        const char *line = lines[lineno];
        if (DFStringHasPrefix(line,"#include \"") && DFStringHasSuffix(line,"\"")) {
            char *inclRelPath = DFSubstring(line,10,strlen(line)-1);
            char *inclAbsPath = DFAppendPathComponent(path,inclRelPath);
            char *inclDirName = DFPathDirName(inclAbsPath);
            char *inclContent = DFStringReadFromFile(inclAbsPath,error);
            if (inclContent == NULL) {
                DFErrorFormat(error,"%s: %s",inclRelPath,DFErrorMessage(error));
                ok = 0;
            }
            else if (!processIncludes(package,inclContent,output,inclDirName,error)) {
                ok = 0;
            }
            free(inclRelPath);
            free(inclAbsPath);
            free(inclDirName);
            free(inclContent);
        }
        else {
            DFBufferFormat(output,"%s\n",line);
        }
    }
    free(lines);
    return ok;
}
Beispiel #3
0
DFNode *WordConverterGetConcrete(WordPutData *put, DFNode *abstract)
{
    // Is the abstract node an element, and does it have an id that matches the prefix used for
    // conversion? That is, does it look like it has a corresponding node in the concrete document?
    if ((abstract == NULL) || (abstract->tag < MIN_ELEMENT_TAG))
        return NULL;
    const char *idStr = DFGetAttribute(abstract,HTML_ID);
    if ((idStr == NULL) || !DFStringHasPrefix(idStr,put->conv->idPrefix))
        return NULL;

    // Determine the node sequence number and the document based on the id attribute.
    // The format of the attribute is <prefix><seqno>(-<docname>)?, where
    //
    //     <prefix>  is the BDT prefix we use to identify nodes that match the original document
    //     <seqno>   is an integer uniquely identifying a node in a given document
    //     <docname> is the name of the document, either footnotes or endnotes. If absent, it is
    //               the main content document (that is, document.xml)
    //
    // Note that the sequence number only makes sense within the context of a specific document. It
    // is possible to have two different nodes in different documents that have the same sequence number.
    // It is for this reason that the id string identifies both the node and the document.

    size_t idLen = strlen(idStr);
    size_t prefixLen = strlen(put->conv->idPrefix);

    unsigned int seqNo = 0;
    size_t pos = prefixLen;
    while ((pos < idLen) && (idStr[pos] >= '0') && (idStr[pos] <= '9'))
        seqNo = seqNo*10 + (idStr[pos++] - '0');

    const char *docName = NULL;
    if ((pos < idLen) && (idStr[pos] == '-')) {
        pos++;
        docName = &idStr[pos];
    }

    DFDocument *doc = NULL;
    if (docName == NULL)
        doc = put->conv->package->document;
    else if (!strcmp(docName,"footnotes"))
        doc = put->conv->package->footnotes;
    else if (!strcmp(docName,"endnotes"))
        doc = put->conv->package->endnotes;
    else
        return NULL;

    // Check to see if we have a node in the concrete document matching that sequence number
    DFNode *node = DFNodeForSeqNo(doc,seqNo);

    // Only return the node if it's actually an element
    if ((node == NULL) || (node->tag < MIN_ELEMENT_TAG))
        return NULL;
    return node;
}
Beispiel #4
0
static void removeRedundantProperties(CSSSheet *sheet)
{
    // Remove any properties set on a style that have the same value as the corresponding property
    // on the parent style. This is necessary because CSS doesn't support style inheritance (in
    // the sense of Word & ODF's styles), so when we save out a HTML file, every style has all
    // properties of its ancestors. After reading in a HTML file for the purposes of updating the
    // original Word or ODF style, we don't want these extra property settings to remain, so that
    // we can avoid adding spurious extra redundant property settings to the original file.

    breakCycles(sheet);
    const char **sortedSelectors = reverseTopologicalSortedSelectors(sheet);

    for (size_t selIndex = 0; sortedSelectors[selIndex]; selIndex++) {
        const char *selector = sortedSelectors[selIndex];
        CSSStyle *child = CSSSheetLookupSelector(sheet,selector,0,0);
        CSSStyle *parent = CSSSheetGetStyleParent(sheet,child);
        if (parent == NULL)
            continue;
        const char **allSuffixes = CSSStyleCopySuffixes(child);
        for (int suffixIndex = 0; allSuffixes[suffixIndex]; suffixIndex++) {
            const char *suffix = allSuffixes[suffixIndex];
            int isCell = !strcmp(suffix," > * > tr > td");
            CSSProperties *childProperties = CSSStyleRuleForSuffix(child,suffix);
            CSSProperties *parentProperties = CSSStyleRuleForSuffix(parent,suffix);

            const char **allNames = CSSPropertiesCopyNames(childProperties);
            for (int nameIndex = 0; allNames[nameIndex]; nameIndex++) {
                const char *name = allNames[nameIndex];

                // In docx's styles.xml, the tblCellMar values in table styles are not inherited
                // (this seems like a bug in word, as isn't inconsistent with all other properties)
                // So keep these ones.
                if (isCell && DFStringHasPrefix(name,"padding-"))
                    continue;

                const char *childVal = CSSGet(childProperties,name);
                const char *parentVal = CSSGet(parentProperties,name);
                if ((childVal != NULL) && (parentVal != NULL) && DFStringEquals(childVal,parentVal))
                    CSSPut(childProperties,name,NULL);
            }
            free(allNames);
        }
        free(allSuffixes);
    }
    free(sortedSelectors);
}
Beispiel #5
0
char *WordStyleIdForStyle(CSSStyle *style)
{
    const char *selector = style->selector;
    char *resStyleId = NULL;

    if (!strcmp(selector,"table.Normal_Table"))
        return strdup("TableNormal");
    if (!strcmp(selector,"table.Table_Grid"))
        return strdup("TableGrid");
    if (!strcmp(selector,"span.Default_Paragraph_Font"))
        return strdup("DefaultParagraphFont");
    if (!strcmp(selector,"p.List_Paragraph"))
        return strdup("ListParagraph");

    int headingLevel = CSSSelectorHeadingLevel(selector);
    if (headingLevel != 0) {
        char *prefix = DFFormatString("heading_%d",headingLevel);
        if ((style->className != NULL) && DFStringHasPrefix(style->className,prefix)) {
            char *rest = DFSubstring(style->className,strlen(prefix),strlen(style->className));
            char *result = DFFormatString("Heading%d%s",headingLevel,rest);
            free(rest);
            free(prefix);
            return result;
        }
        free(prefix);
    }

    if (!strcmp(selector,"span.Heading1Char"))
        return strdup("Heading1Char");
    if (!strcmp(selector,"span.Heading2Char"))
        return strdup("Heading2Char");
    if (!strcmp(selector,"span.Heading3Char"))
        return strdup("Heading3Char");
    if (!strcmp(selector,"span.Heading4Char"))
        return strdup("Heading4Char");
    if (!strcmp(selector,"span.Heading5Char"))
        return strdup("Heading5Char");
    if (!strcmp(selector,"span.Heading6Char"))
        return strdup("Heading6Char");
    if (!strcmp(selector,"span.Heading7Char"))
        return strdup("Heading7Char");
    if (!strcmp(selector,"span.Heading8Char"))
        return strdup("Heading8Char");
    if (!strcmp(selector,"span.Heading9Char"))
        return strdup("Heading9Char");

    char *className = CSSSelectorCopyClassName(selector);
    switch (CSSSelectorGetTag(selector)) {
        case HTML_FIGURE: {
            resStyleId = DFStrDup("Figure");
            break;
        }
        case HTML_CAPTION: {
            resStyleId = DFStrDup("Caption");
            break;
        }
        case HTML_H1:
        case HTML_H2:
        case HTML_H3:
        case HTML_H4:
        case HTML_H5:
        case HTML_H6: {
            if ((className == NULL) || (strlen(className) == 0)) {
                int level = CSSSelectorHeadingLevel(selector);
                if ((level >= 1) && (level <= 6)) {
                    // FIXME: we shouldn't rely on the specific word "Heading" here - instead using the localised name
                    // FIXME: not covered by tests
                    resStyleId = DFFormatString("Heading%d",level);
                }
            }
            else {
                resStyleId = DFStrDup(className);
            }
            break;
        }
        case HTML_P:
            resStyleId = DFStrDup(className);
            break;
        case HTML_SPAN:
            resStyleId = DFStrDup(className);
            break;
        case HTML_TABLE:
            resStyleId = DFStrDup(className);
            break;
    }
    free(className);

    if (resStyleId == NULL) {
        // Note: selector here may start with . (i.e. applies to all elements)
        // FIXME: not covered by tests
        resStyleId = strdup(selector);
    }

    return resStyleId;
}