CSSStyle *WordSetupTableGridStyle(CSSSheet *styleSheet, int *changed) { CSSStyle *style = CSSSheetLookupElement(styleSheet,"table","Table_Grid",0,0); if (style == NULL) { style = CSSSheetLookupElement(styleSheet,"table","Table_Grid",1,0); CSSProperties *border = CSSPropertiesNewWithString("border: 1px solid black"); DFHashTable *collapsed = CSSCollapseProperties(border); CSSPropertiesUpdateFromRaw(CSSStyleRule(style),collapsed); CSSPropertiesUpdateFromRaw(CSSStyleCell(style),collapsed); DFHashTableRelease(collapsed); CSSPut(CSSStyleRule(style),"margin-left","auto"); CSSPut(CSSStyleRule(style),"margin-right","auto"); // These must be set last, as updateRaw clears them when modifying rule CSSStyleSetParent(style,"table.Normal_Table"); CSSStyleSetDisplayName(style,"Table Grid"); if (changed != NULL) *changed = 1; CSSPropertiesRelease(border); } return style; }
CSSSheet *WordParseStyles(WordConverter *converter) { CSSSheet *styleSheet = CSSSheetNew(); CSSStyle *bodyStyle = CSSSheetLookupElement(styleSheet,"body",NULL,1,0); CSSPut(CSSStyleRule(bodyStyle),"counter-reset","h1 h2 h3 h4 h5 h6 figure table"); parseBody(converter,styleSheet); if (converter->package->styles == NULL) return styleSheet;; DFNode *root = converter->package->styles->root; if (root == NULL) return styleSheet; if (root->tag != WORD_STYLES) return styleSheet;; WordSheet *sheet = converter->styles; const char **allIdents = WordSheetCopyIdents(sheet); for (int i = 0; allIdents[i]; i++) { WordStyle *wordStyle = WordSheetStyleForIdent(sheet,allIdents[i]); if ((wordStyle->selector == NULL) || WordStyleIsProtected(wordStyle)) continue; CSSStyle *style = CSSSheetLookupSelector(styleSheet,wordStyle->selector,1,0); styleParse(wordStyle,converter,style); const char *defaultVal = DFGetAttribute(wordStyle->element,WORD_DEFAULT); if ((defaultVal != NULL) && Word_parseOnOff(defaultVal)) { StyleFamily family = WordStyleFamilyForSelector(style->selector); CSSSheetSetDefaultStyle(styleSheet,style,family); CSSSetDefault(CSSStyleRule(style),1); if (family == StyleFamilyParagraph) fixParagraphSpacing(style,wordStyle->element); } } free(allIdents); DFNode *docDefaults = DFChildWithTag(root,WORD_DOCDEFAULTS); DFNode *pPrDefault = DFChildWithTag(docDefaults,WORD_PPRDEFAULT); DFNode *pPr = DFChildWithTag(pPrDefault,WORD_PPR); if (pPr != NULL) { CSSStyle *body = CSSSheetLookupElement(styleSheet,"body",NULL,1,0); const char *styleId = NULL; WordGetPPr(pPr,CSSStyleRule(body),&styleId,converter->mainSection); } // Special case for figure style: set left and right margin to auto, if not already set CSSStyle *figure = CSSSheetLookupElement(styleSheet,"figure",NULL,0,0); if (figure != NULL) { if (CSSGet(CSSStyleRule(figure),"margin-left") == NULL) CSSPut(CSSStyleRule(figure),"margin-left","auto"); if (CSSGet(CSSStyleRule(figure),"margin-right") == NULL) CSSPut(CSSStyleRule(figure),"margin-right","auto"); } return styleSheet; }
static void updateDefaults(WordConverter *converter, CSSSheet *styleSheet) { DFNode *root = converter->package->styles->root; CSSStyle *bodyStyle = CSSSheetLookupElement(converter->styleSheet,"body",NULL,0,0); if (bodyStyle != NULL) { // Remove margin properties DFHashTable *collapsed = CSSCollapseProperties(CSSStyleRule(bodyStyle)); CSSProperties *copy = CSSPropertiesNewWithRaw(collapsed); DFHashTableRelease(collapsed); CSSPut(copy,"margin-top",NULL); CSSPut(copy,"margin-bottom",NULL); CSSPut(copy,"margin-left",NULL); CSSPut(copy,"margin-right",NULL); DFNode *docDefaults = DFChildWithTag(root,WORD_DOCDEFAULTS); DFNode *rPrDefault = DFChildWithTag(docDefaults,WORD_RPRDEFAULT); DFNode *pPrDefault = DFChildWithTag(docDefaults,WORD_PPRDEFAULT); DFNode *rPr = DFChildWithTag(rPrDefault,WORD_RPR); DFNode *pPr = DFChildWithTag(pPrDefault,WORD_PPR); int hadEmptyRPrDefault = ((rPrDefault != NULL) && (rPrDefault->first == NULL)); int hadEmptyPPrDefault = ((pPrDefault != NULL) && (pPrDefault->first == NULL)); if (docDefaults == NULL) docDefaults = DFCreateElement(converter->package->styles,WORD_DOCDEFAULTS); if (rPrDefault == NULL) rPrDefault = DFCreateElement(converter->package->styles,WORD_RPRDEFAULT); if (pPrDefault == NULL) pPrDefault = DFCreateElement(converter->package->styles,WORD_PPRDEFAULT); if (rPr == NULL) rPr = DFCreateChildElement(rPrDefault,WORD_RPR); if (pPr == NULL) pPr = DFCreateChildElement(pPrDefault,WORD_PPR); DFAppendChild(docDefaults,rPrDefault); DFAppendChild(docDefaults,pPrDefault); DFInsertBefore(root,docDefaults,root->first); WordPutPPr(pPr,copy,NULL,converter->mainSection,-1); if (rPr->first == NULL) DFRemoveNode(rPr); if (pPr->first == NULL) DFRemoveNode(pPr); if ((rPrDefault->first == NULL) && !hadEmptyRPrDefault) DFRemoveNode(rPrDefault); if ((pPrDefault->first == NULL) && !hadEmptyPPrDefault) DFRemoveNode(pPrDefault); if (docDefaults->first == NULL) DFRemoveNode(docDefaults); CSSPropertiesRelease(copy); } }
static void addMissingDefaultStyles(WordConverter *converter) { if (CSSSheetDefaultStyleForFamily(converter->styleSheet,StyleFamilyParagraph) == NULL) { CSSStyle *style = CSSSheetLookupElement(converter->styleSheet,"p","Normal",1,0); CSSSheetSetDefaultStyle(converter->styleSheet,style,StyleFamilyParagraph); } if (CSSSheetDefaultStyleForFamily(converter->styleSheet,StyleFamilyCharacter) == NULL) { CSSStyle *style = CSSSheetLookupElement(converter->styleSheet,"span","DefaultParagraphFont",1,0); CSSStyleSetDisplayName(style,"Default Paragraph Font"); CSSSheetSetDefaultStyle(converter->styleSheet,style,StyleFamilyCharacter); } if (CSSSheetDefaultStyleForFamily(converter->styleSheet,StyleFamilyTable) == NULL) { CSSStyle *style = CSSSheetLookupElement(converter->styleSheet,"table","Normal_Table",1,0); CSSStyleSetDisplayName(style,"Normal Table"); CSSPut(CSSStyleCell(style),"padding-left","5.4pt"); CSSPut(CSSStyleCell(style),"padding-right","5.4pt"); CSSPut(CSSStyleCell(style),"padding-top","0pt"); CSSPut(CSSStyleCell(style),"padding-bottom","0pt"); CSSSheetSetDefaultStyle(converter->styleSheet,style,StyleFamilyTable); } }
static void WordPutStyle(DFNode *concrete, CSSStyle *style, WordConverter *converter) { // If we find a style with a counter-increment value of "<elementname> 0", this means that it's // an explicitly unnumbered paragraph. So we set the numbering id to 0, so word doesn't increment // the corresponding counter when encountering this style if (CSSGet(CSSStyleRule(style),"counter-increment") != NULL) { char *zeroIncrement = DFFormatString("%s 0",style->elementName); if (DFStringEquals(CSSGet(CSSStyleRule(style),"counter-increment"),zeroIncrement)) CSSPut(CSSStyleRule(style),"-word-numId","0"); free(zeroIncrement); } int outlineLvl = -1; if ((style->headingLevel >= 1) && (style->headingLevel <= 6)) { outlineLvl = style->headingLevel-1; char *nextSelector = CSSStyleCopyNext(style); if (nextSelector == NULL) { CSSStyle *def = CSSSheetDefaultStyleForFamily(converter->styleSheet,StyleFamilyParagraph); if (def != NULL) CSSStyleSetNext(style,def->selector); } free(nextSelector); } DFNode *children[PREDEFINED_TAG_COUNT]; childrenToArray(concrete,children); char *parentSelector = CSSStyleCopyParent(style); if (parentSelector == NULL) { if (style->className != NULL) { if ((style->tag != HTML_P) && (style->tag != HTML_SPAN) && (style->tag != HTML_TABLE)) { CSSStyle *parentStyle = CSSSheetLookupElement(converter->styleSheet,style->elementName,NULL,0,0); if ((parentStyle != NULL) && !parentStyle->latent) parentSelector = xstrdup(style->elementName); } } } // Based on const char *basedOn = WordSheetStyleIdForSelector(converter->styles,parentSelector); if (basedOn == NULL) { children[WORD_BASEDON] = NULL; } else { children[WORD_BASEDON] = DFCreateElement(concrete->doc,WORD_BASEDON); DFSetAttribute(children[WORD_BASEDON],WORD_VAL,basedOn); } // Style for next paragraph children[WORD_NEXT] = NULL; char *nextSelector = CSSStyleCopyNext(style); if (nextSelector != NULL) { StyleFamily thisFamily = WordStyleFamilyForSelector(style->selector); StyleFamily nextFamily = WordStyleFamilyForSelector(nextSelector); if (nextFamily == thisFamily) { children[WORD_NEXT] = DFCreateElement(concrete->doc,WORD_NEXT); const char *nextStyleId = WordSheetStyleIdForSelector(converter->styles,nextSelector); DFSetAttribute(children[WORD_NEXT],WORD_VAL,nextStyleId); } } if (WordStyleFamilyForSelector(style->selector) == StyleFamilyTable) { // Table properties if (children[WORD_TBLPR] == NULL) children[WORD_TBLPR] = DFCreateElement(concrete->doc,WORD_TBLPR);; const char *oldJc = DFGetChildAttribute(children[WORD_TBLPR],WORD_JC,WORD_VAL); CSSProperties *wholeTable = CSSStyleRuleForSuffix(style,DFTableSuffixWholeTable); WordPutTblPr(children[WORD_TBLPR],wholeTable,CSSStyleCell(style),converter->mainSection,NULL); const char *newJc = DFGetChildAttribute(children[WORD_TBLPR],WORD_JC,WORD_VAL); if (children[WORD_TBLPR]->first == NULL) children[WORD_TBLPR] = NULL; if (children[WORD_TRPR] == NULL) children[WORD_TRPR] = DFCreateElement(concrete->doc,WORD_TRPR); WordPutTrPr(children[WORD_TRPR],oldJc,newJc); if (children[WORD_TRPR]->first == NULL) children[WORD_TRPR] = NULL; } else { // Paragraph properties if (children[WORD_PPR] == NULL) children[WORD_PPR] = DFCreateElement(concrete->doc,WORD_PPR); WordPutPPr(children[WORD_PPR],CSSStyleRule(style),NULL,converter->mainSection,outlineLvl); if (children[WORD_PPR]->first == NULL) children[WORD_PPR] = NULL; // Run properties if (children[WORD_RPR] == NULL) children[WORD_RPR] = DFCreateElement(concrete->doc,WORD_RPR); WordPutRPr(children[WORD_RPR],CSSStyleRule(style),NULL,converter->theme); if (children[WORD_RPR]->first == NULL) children[WORD_RPR] = NULL; } replaceChildrenFromArray(concrete,children,WordStyle_Children); free(parentSelector); free(nextSelector); }
int WordConverterUpdateFromHTML(WordConverter *converter, DFError **error) { if (converter->package->document == NULL) { DFErrorFormat(error,"document.xml not found"); return 0; } DFNode *wordDocument = DFChildWithTag(converter->package->document->docNode,WORD_DOCUMENT); if (wordDocument == NULL) { DFErrorFormat(error,"word:document not found"); return 0; } // FIXME: Need a more reliable way of telling whether this is a new document or not - it could be that the // document already existed (with styles set up) but did not have any content DFNode *wordBody = DFChildWithTag(wordDocument,WORD_BODY); int creating = ((wordBody == NULL) || (wordBody->first == NULL)); converter->haveFields = Word_simplifyFields(converter->package); Word_mergeRuns(converter->package); assert(converter->package->styles); CSSSheetRelease(converter->styleSheet); converter->styleSheet = CSSSheetNew(); char *cssText = HTMLCopyCSSText(converter->html); CSSSheetUpdateFromCSSText(converter->styleSheet,cssText); free(cssText); addMissingDefaultStyles(converter); CSSEnsureReferencedStylesPresent(converter->html,converter->styleSheet); if (creating) CSSSetHTMLDefaults(converter->styleSheet); CSSEnsureUnique(converter->styleSheet,converter->html,creating); CSSStyle *pageStyle = CSSSheetLookupElement(converter->styleSheet,"@page",NULL,0,0); CSSStyle *bodyStyle = CSSSheetLookupElement(converter->styleSheet,"body",NULL,1,0); CSSProperties *page = (pageStyle != NULL) ? CSSPropertiesRetain(CSSStyleRule(pageStyle)) : CSSPropertiesNew(); CSSProperties *body = (bodyStyle != NULL) ? CSSPropertiesRetain(CSSStyleRule(bodyStyle)) : CSSPropertiesNew(); if (CSSGet(body,"margin-left") == NULL) CSSPut(body,"margin-left","10%"); if (CSSGet(body,"margin-right") == NULL) CSSPut(body,"margin-right","10%"); if (CSSGet(body,"margin-top") == NULL) CSSPut(body,"margin-top","10%"); if (CSSGet(body,"margin-bottom") == NULL) CSSPut(body,"margin-bottom","10%"); WordSectionUpdateFromCSSPage(converter->mainSection,page,body); WordPutData put; put.conv = converter; put.numIdByHtmlId = DFHashTableNew((DFCopyFunction)strdup,free); put.htmlIdByNumId = DFHashTableNew((DFCopyFunction)strdup,free); // Make sure we update styles.xml from the CSS stylesheet *before* doing any conversion of the content, // since the latter requires a full mapping of CSS selectors to styleIds to be in place. WordUpdateStyles(converter,converter->styleSheet); Word_preProcessHTMLDoc(converter,converter->html); buildListMapFromHTML(&put,converter->html->docNode); updateListTypes(&put); WordBookmarks_removeCaptionBookmarks(converter->package->document); WordObjectsCollapseBookmarks(converter->objects); WordObjectsScan(converter->objects); Word_setupBookmarkLinks(&put); WordObjectsAnalyzeBookmarks(converter->objects,converter->styles); WordDocumentLens.put(&put,converter->html->root,wordDocument); WordObjectsExpandBookmarks(converter->objects); WordRemoveNbsps(converter->package->document); // Make sure the updateFields flag is set Word_updateSettings(converter->package,converter->haveFields); // Remove any abstract numbering definitions that are no longer referenced from concrete // numbering definitions WordNumberingRemoveUnusedAbstractNums(converter->numbering); // Remove any relationships and images that have been removed from the HTML file and no longer // have any other references pointing to them WordGarbageCollect(converter->package); CSSPropertiesRelease(page); CSSPropertiesRelease(body); DFHashTableRelease(put.numIdByHtmlId); DFHashTableRelease(put.htmlIdByNumId); return 1; }
static void Word_postProcessHTML(WordConverter *conv, DFNode *node) { DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; switch (child->tag) { case HTML_SPAN: { const char *className = DFGetAttribute(child,HTML_CLASS); if (DFStringEquals(className,DFBookmarkClass)) { if (child->first != NULL) next = child->first; DFRemoveNodeButKeepChildren(child); } break; } case HTML_CAPTION: { const char *counterName = NULL; if ((child->prev != NULL) && (child->prev->tag == HTML_FIGURE) && (DFChildWithTag(child->prev,HTML_FIGCAPTION) == NULL)) { child->tag = HTML_FIGCAPTION; counterName = "figure"; DFAppendChild(child->prev,child); } else if ((child->prev != NULL) && (child->prev->tag == HTML_TABLE) && (DFChildWithTag(child->prev,HTML_CAPTION) == NULL)) { counterName = "table"; DFInsertBefore(child->prev,child,child->prev->first); } else if ((child->next != NULL) && (child->next->tag == HTML_FIGURE) && (DFChildWithTag(child->next,HTML_FIGCAPTION) == NULL)) { child->tag = HTML_FIGCAPTION; counterName = "figure"; DFInsertBefore(child->next,child,child->next->first); } else if ((child->next != NULL) && (child->next->tag == HTML_TABLE) && (DFChildWithTag(child->next,HTML_CAPTION) == NULL)) { counterName = "table"; DFSetAttribute(child,HTML_STYLE,"caption-side: top"); DFInsertBefore(child->next,child,child->next->first); } if (counterName != NULL) { char *beforeText = extractPrefix(child,counterName); if (beforeText != NULL) { CSSStyle *style = CSSSheetLookupElement(conv->styleSheet,DFNodeName(child),NULL,1,0); if (CSSGet(CSSStyleBefore(style),"content") == NULL) { CSSPut(CSSStyleRule(style),"counter-increment",counterName); CSSPut(CSSStyleBefore(style),"content",beforeText); } } free(beforeText); } break; } case HTML_NAV: { if (HTML_isParagraphTag(node->tag)) { if (child->prev != NULL) { DFNode *beforeP = DFCreateElement(conv->package->document,node->tag); while (child->prev != NULL) DFInsertBefore(beforeP,child->prev,beforeP->first); DFInsertBefore(node->parent,beforeP,node); } DFInsertBefore(node->parent,child,node); if ((node->first == NULL) || ((node->first->tag == HTML_BR) && (node->first->next == NULL))) { DFRemoveNode(node); return; } next = NULL; } break; } } } for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; Word_postProcessHTML(conv,child); } }
static void Word_preProcessHTML(WordConverter *word, DFNode *node) { switch (node->tag) { case HTML_TABLE: case HTML_FIGURE: { DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; if ((child->tag != HTML_CAPTION) && (child->tag != HTML_FIGCAPTION)) continue; WordCaption *caption = WordCaptionNew(child); WordObjectsSetCaption(word->objects,caption,node); caption->contentStart = child->first; WordCaptionRelease(caption); const char *className = DFGetAttribute(child,HTML_CLASS); CSSStyle *style; if (child->tag == HTML_CAPTION) style = CSSSheetLookupElement(word->styleSheet,"caption",className,0,0); else style = CSSSheetLookupElement(word->styleSheet,"figcaption",className,0,0); CSSProperties *before = CSSStyleBefore(style); if (CSSGet(before,"content") != NULL) Word_addContentParts(child,CSSGet(before,"content"),caption); child->tag = HTML_P; DFSetAttribute(child,HTML_CLASS,"Caption"); DFInsertBefore(node->parent,child,node->next); Word_preProcessHTML(word,child); } // The HTML normalization process ensures that apart from the <figcaption> element, // all children of a <figure> are paragraphs or containers. Currently the editor only // lets you create figures that contain a single image, so it's always a single // paragraph. Since the HTML <figure> element gets mapped to a single <w:p> element // by WordParagraphLens, we want to make sure it only contains inline children. for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; if (HTML_isParagraphTag(child->tag)) DFRemoveNodeButKeepChildren(child); } // FIXME: Handle <div>, <pre>, lists, tables etc which could also theoretically // exist inside the <figure> element break; } case HTML_NAV: { const char *className = DFGetAttribute(node,HTML_CLASS); const char *instr = NULL; if (DFStringEquals(className,DFTableOfContentsClass)) instr = " TOC \\o \"1-3\" "; else if (DFStringEquals(className,DFListOfFiguresClass)) instr = " TOC \\c \"Figure\" "; else if (DFStringEquals(className,DFListOfTablesClass)) instr = " TOC \\c \"Table\" "; if (instr != NULL) { DFNode *p = DFCreateElement(word->html,HTML_P); DFNode *field = DFCreateChildElement(p,HTML_SPAN); DFSetAttribute(field,HTML_CLASS,DFFieldClass); DFCreateChildTextNode(field,instr); DFInsertBefore(node->parent,p,node); DFRemoveNode(node); } break; } } DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; Word_preProcessHTML(word,child); } }
// FIXME: This won't work now for Word documents, where styles always have class names // FIXME: Not covered by tests int CSSSheetIsNumberingUsed(CSSSheet *sheet) { CSSStyle *style = CSSSheetLookupElement(sheet,"body",NULL,0,0); return (CSSGet(CSSStyleRule(style),"counter-reset") != NULL); }
// FIXME: This won't work now for Word documents, where styles always have class names // FIXME: Not covered by tests void CSSSheetSetHeadingNumbering(CSSSheet *sheet, int enabled) { CSSStyle *defaultStyle = CSSSheetDefaultStyleForFamily(sheet,StyleFamilyParagraph); if (enabled) { DFHashTable *stylesByHeadingLevel = getStylesByHeadingLevel(sheet); for (int level = 1; level <= 6; level++) { StyleList *styles = DFHashTableLookupInt(stylesByHeadingLevel,level); int haveClassless = 0; for (StyleList *item = styles; item != NULL; item = item->next) { if (item->style->className == NULL) haveClassless = 1; } // If there exists a style at level n which has no class name, then that is the parent of all other // heading styles. Set the numbering on that. Alternatively, if there no styles exist at level n, then // create a new one with no class name, and set the numbering information on that. if ((styles == NULL) || haveClassless) { char *elementName = DFFormatString("h%d",level); CSSStyle *style = CSSSheetLookupElement(sheet,elementName,NULL,1,0); enableNumberingForStyle(style); free(elementName); } else { // There are multiple heading styles at level n, all of which have a class name associated with them. // Set the numbering information on those which either have no parentId, or whose parentId is the // default paragraph style. This caters for situations like the "HeadingXUnnumbered" styles we used // to have, which were based on the corresponding "HeadingX" style. for (StyleList *item = styles; item != NULL; item = item->next) { char *parentSelector = CSSStyleCopyParent(item->style); if ((parentSelector == NULL) || !strcmp(parentSelector,defaultStyle->selector)) { enableNumberingForStyle(item->style); } else { disableNumberingForStyle(item->style,1); } free(parentSelector); } } StyleList *next; for (; styles != NULL; styles = next) { next = styles->next; CSSStyleRelease(styles->style); free(styles); } } DFHashTableRelease(stylesByHeadingLevel); } else { // Clear all numbering information on all heading styles const char **allSelectors = CSSSheetCopySelectors(sheet); for (int i = 0; allSelectors[i]; i++) { CSSStyle *style = CSSSheetLookupSelector(sheet,allSelectors[i],0,0); if (style->headingLevel > 0) { CSSPut(CSSStyleRule(style),"counter-increment",NULL); CSSPut(CSSStyleRule(style),"counter-reset",NULL); CSSPut(CSSStyleBefore(style),"content",NULL); } } free(allSelectors); } }
CSSProperties *CSSSheetBodyProperties(CSSSheet *sheet) { return CSSStyleRule(CSSSheetLookupElement(sheet,"body",NULL,1,0)); }
CSSProperties *CSSSheetPageProperties(CSSSheet *sheet) { return CSSStyleRule(CSSSheetLookupElement(sheet,"@page",NULL,1,0)); }