static int parsePackage(TextPackage *package, const char *string, const char *path, DFError **error) { DFBuffer *replaced = DFBufferNew(); if (!strcmp(path,"")) path = "."; if (!processIncludes(package,string,replaced,path,error)) { DFBufferRelease(replaced); return 0; } char *currentKey = strdup(""); DFBuffer *currentValue = DFBufferNew(); const char **lines = DFStringSplit(replaced->data,"\n",0); for (int lineno = 0; lines[lineno]; lineno++) { const char *line = lines[lineno]; if (!DFStringHasPrefix(line,"#")) { DFBufferFormat(currentValue,"%s\n",line); } else if (DFStringHasPrefix(line,"#item ")) { package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *)); package->keys[package->nkeys++] = strdup(currentKey); package->keys[package->nkeys] = NULL; DFHashTableAdd(package->items,currentKey,currentValue->data); free(currentKey); DFBufferRelease(currentValue); currentKey = DFSubstring(line,6,strlen(line)); currentValue = DFBufferNew(); } else if (DFStringHasPrefix(line,"##")) { DFBufferFormat(currentValue,"%s\n",&line[1]); } else { DFErrorFormat(error,"Unknown command: %s on line %d",line,(lineno+1)); return 0; } } package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *)); package->keys[package->nkeys++] = strdup(currentKey); package->keys[package->nkeys] = NULL; DFHashTableAdd(package->items,currentKey,currentValue->data); free(lines); free(currentKey); DFBufferRelease(currentValue); DFBufferRelease(replaced); return 1; }
static int processIncludes(TextPackage *package, const char *input, DFBuffer *output, const char *path, DFError **error) { int ok = 1; const char **lines = DFStringSplit(input,"\n",0); for (int lineno = 0; lines[lineno] && ok; lineno++) { const char *line = lines[lineno]; if (DFStringHasPrefix(line,"#include \"") && DFStringHasSuffix(line,"\"")) { char *inclRelPath = DFSubstring(line,10,strlen(line)-1); char *inclAbsPath = DFAppendPathComponent(path,inclRelPath); char *inclDirName = DFPathDirName(inclAbsPath); char *inclContent = DFStringReadFromFile(inclAbsPath,error); if (inclContent == NULL) { DFErrorFormat(error,"%s: %s",inclRelPath,DFErrorMessage(error)); ok = 0; } else if (!processIncludes(package,inclContent,output,inclDirName,error)) { ok = 0; } free(inclRelPath); free(inclAbsPath); free(inclDirName); free(inclContent); } else { DFBufferFormat(output,"%s\n",line); } } free(lines); return ok; }
DFNode *WordConverterGetConcrete(WordPutData *put, DFNode *abstract) { // Is the abstract node an element, and does it have an id that matches the prefix used for // conversion? That is, does it look like it has a corresponding node in the concrete document? if ((abstract == NULL) || (abstract->tag < MIN_ELEMENT_TAG)) return NULL; const char *idStr = DFGetAttribute(abstract,HTML_ID); if ((idStr == NULL) || !DFStringHasPrefix(idStr,put->conv->idPrefix)) return NULL; // Determine the node sequence number and the document based on the id attribute. // The format of the attribute is <prefix><seqno>(-<docname>)?, where // // <prefix> is the BDT prefix we use to identify nodes that match the original document // <seqno> is an integer uniquely identifying a node in a given document // <docname> is the name of the document, either footnotes or endnotes. If absent, it is // the main content document (that is, document.xml) // // Note that the sequence number only makes sense within the context of a specific document. It // is possible to have two different nodes in different documents that have the same sequence number. // It is for this reason that the id string identifies both the node and the document. size_t idLen = strlen(idStr); size_t prefixLen = strlen(put->conv->idPrefix); unsigned int seqNo = 0; size_t pos = prefixLen; while ((pos < idLen) && (idStr[pos] >= '0') && (idStr[pos] <= '9')) seqNo = seqNo*10 + (idStr[pos++] - '0'); const char *docName = NULL; if ((pos < idLen) && (idStr[pos] == '-')) { pos++; docName = &idStr[pos]; } DFDocument *doc = NULL; if (docName == NULL) doc = put->conv->package->document; else if (!strcmp(docName,"footnotes")) doc = put->conv->package->footnotes; else if (!strcmp(docName,"endnotes")) doc = put->conv->package->endnotes; else return NULL; // Check to see if we have a node in the concrete document matching that sequence number DFNode *node = DFNodeForSeqNo(doc,seqNo); // Only return the node if it's actually an element if ((node == NULL) || (node->tag < MIN_ELEMENT_TAG)) return NULL; return node; }
static void removeRedundantProperties(CSSSheet *sheet) { // Remove any properties set on a style that have the same value as the corresponding property // on the parent style. This is necessary because CSS doesn't support style inheritance (in // the sense of Word & ODF's styles), so when we save out a HTML file, every style has all // properties of its ancestors. After reading in a HTML file for the purposes of updating the // original Word or ODF style, we don't want these extra property settings to remain, so that // we can avoid adding spurious extra redundant property settings to the original file. breakCycles(sheet); const char **sortedSelectors = reverseTopologicalSortedSelectors(sheet); for (size_t selIndex = 0; sortedSelectors[selIndex]; selIndex++) { const char *selector = sortedSelectors[selIndex]; CSSStyle *child = CSSSheetLookupSelector(sheet,selector,0,0); CSSStyle *parent = CSSSheetGetStyleParent(sheet,child); if (parent == NULL) continue; const char **allSuffixes = CSSStyleCopySuffixes(child); for (int suffixIndex = 0; allSuffixes[suffixIndex]; suffixIndex++) { const char *suffix = allSuffixes[suffixIndex]; int isCell = !strcmp(suffix," > * > tr > td"); CSSProperties *childProperties = CSSStyleRuleForSuffix(child,suffix); CSSProperties *parentProperties = CSSStyleRuleForSuffix(parent,suffix); const char **allNames = CSSPropertiesCopyNames(childProperties); for (int nameIndex = 0; allNames[nameIndex]; nameIndex++) { const char *name = allNames[nameIndex]; // In docx's styles.xml, the tblCellMar values in table styles are not inherited // (this seems like a bug in word, as isn't inconsistent with all other properties) // So keep these ones. if (isCell && DFStringHasPrefix(name,"padding-")) continue; const char *childVal = CSSGet(childProperties,name); const char *parentVal = CSSGet(parentProperties,name); if ((childVal != NULL) && (parentVal != NULL) && DFStringEquals(childVal,parentVal)) CSSPut(childProperties,name,NULL); } free(allNames); } free(allSuffixes); } free(sortedSelectors); }
char *WordStyleIdForStyle(CSSStyle *style) { const char *selector = style->selector; char *resStyleId = NULL; if (!strcmp(selector,"table.Normal_Table")) return strdup("TableNormal"); if (!strcmp(selector,"table.Table_Grid")) return strdup("TableGrid"); if (!strcmp(selector,"span.Default_Paragraph_Font")) return strdup("DefaultParagraphFont"); if (!strcmp(selector,"p.List_Paragraph")) return strdup("ListParagraph"); int headingLevel = CSSSelectorHeadingLevel(selector); if (headingLevel != 0) { char *prefix = DFFormatString("heading_%d",headingLevel); if ((style->className != NULL) && DFStringHasPrefix(style->className,prefix)) { char *rest = DFSubstring(style->className,strlen(prefix),strlen(style->className)); char *result = DFFormatString("Heading%d%s",headingLevel,rest); free(rest); free(prefix); return result; } free(prefix); } if (!strcmp(selector,"span.Heading1Char")) return strdup("Heading1Char"); if (!strcmp(selector,"span.Heading2Char")) return strdup("Heading2Char"); if (!strcmp(selector,"span.Heading3Char")) return strdup("Heading3Char"); if (!strcmp(selector,"span.Heading4Char")) return strdup("Heading4Char"); if (!strcmp(selector,"span.Heading5Char")) return strdup("Heading5Char"); if (!strcmp(selector,"span.Heading6Char")) return strdup("Heading6Char"); if (!strcmp(selector,"span.Heading7Char")) return strdup("Heading7Char"); if (!strcmp(selector,"span.Heading8Char")) return strdup("Heading8Char"); if (!strcmp(selector,"span.Heading9Char")) return strdup("Heading9Char"); char *className = CSSSelectorCopyClassName(selector); switch (CSSSelectorGetTag(selector)) { case HTML_FIGURE: { resStyleId = DFStrDup("Figure"); break; } case HTML_CAPTION: { resStyleId = DFStrDup("Caption"); break; } case HTML_H1: case HTML_H2: case HTML_H3: case HTML_H4: case HTML_H5: case HTML_H6: { if ((className == NULL) || (strlen(className) == 0)) { int level = CSSSelectorHeadingLevel(selector); if ((level >= 1) && (level <= 6)) { // FIXME: we shouldn't rely on the specific word "Heading" here - instead using the localised name // FIXME: not covered by tests resStyleId = DFFormatString("Heading%d",level); } } else { resStyleId = DFStrDup(className); } break; } case HTML_P: resStyleId = DFStrDup(className); break; case HTML_SPAN: resStyleId = DFStrDup(className); break; case HTML_TABLE: resStyleId = DFStrDup(className); break; } free(className); if (resStyleId == NULL) { // Note: selector here may start with . (i.e. applies to all elements) // FIXME: not covered by tests resStyleId = strdup(selector); } return resStyleId; }