static void updateDefaults(WordConverter *converter, CSSSheet *styleSheet) { DFNode *root = converter->package->styles->root; CSSStyle *bodyStyle = CSSSheetLookupElement(converter->styleSheet,"body",NULL,0,0); if (bodyStyle != NULL) { // Remove margin properties DFHashTable *collapsed = CSSCollapseProperties(CSSStyleRule(bodyStyle)); CSSProperties *copy = CSSPropertiesNewWithRaw(collapsed); DFHashTableRelease(collapsed); CSSPut(copy,"margin-top",NULL); CSSPut(copy,"margin-bottom",NULL); CSSPut(copy,"margin-left",NULL); CSSPut(copy,"margin-right",NULL); DFNode *docDefaults = DFChildWithTag(root,WORD_DOCDEFAULTS); DFNode *rPrDefault = DFChildWithTag(docDefaults,WORD_RPRDEFAULT); DFNode *pPrDefault = DFChildWithTag(docDefaults,WORD_PPRDEFAULT); DFNode *rPr = DFChildWithTag(rPrDefault,WORD_RPR); DFNode *pPr = DFChildWithTag(pPrDefault,WORD_PPR); int hadEmptyRPrDefault = ((rPrDefault != NULL) && (rPrDefault->first == NULL)); int hadEmptyPPrDefault = ((pPrDefault != NULL) && (pPrDefault->first == NULL)); if (docDefaults == NULL) docDefaults = DFCreateElement(converter->package->styles,WORD_DOCDEFAULTS); if (rPrDefault == NULL) rPrDefault = DFCreateElement(converter->package->styles,WORD_RPRDEFAULT); if (pPrDefault == NULL) pPrDefault = DFCreateElement(converter->package->styles,WORD_PPRDEFAULT); if (rPr == NULL) rPr = DFCreateChildElement(rPrDefault,WORD_RPR); if (pPr == NULL) pPr = DFCreateChildElement(pPrDefault,WORD_PPR); DFAppendChild(docDefaults,rPrDefault); DFAppendChild(docDefaults,pPrDefault); DFInsertBefore(root,docDefaults,root->first); WordPutPPr(pPr,copy,NULL,converter->mainSection,-1); if (rPr->first == NULL) DFRemoveNode(rPr); if (pPr->first == NULL) DFRemoveNode(pPr); if ((rPrDefault->first == NULL) && !hadEmptyRPrDefault) DFRemoveNode(rPrDefault); if ((pPrDefault->first == NULL) && !hadEmptyPPrDefault) DFRemoveNode(pPrDefault); if (docDefaults->first == NULL) DFRemoveNode(docDefaults); CSSPropertiesRelease(copy); } }
static void putInParagraphsRecursive(DFNode *node) { DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; putInParagraphsRecursive(child); } if (((node->tag == WORD_BOOKMARKSTART) || (node->tag == WORD_BOOKMARKEND)) && (node->parent->tag != WORD_P)) { DFNode *forwards = findParagraphForwards(node); if (forwards != NULL) { DFNode *pPr = DFChildWithTag(forwards,WORD_PPR); if (pPr != NULL) DFInsertBefore(forwards,node,pPr->next); else DFInsertBefore(forwards,node,forwards->first); return; } DFNode *backwards = findParagraphBackwards(node); if (backwards != NULL) { DFAppendChild(backwards,node); return; } DFRemoveNode(node); } }
static void Word_mergeRunsRecursive(DFNode *node) { DFNode *current = node->first; while (current != NULL) { DFNode *next = current->next; if ((current->tag == WORD_R) && (next != NULL) && (next->tag == WORD_R)) { DFNode *currentRPr = DFChildWithTag(current,WORD_RPR); DFNode *nextRPr = DFChildWithTag(next,WORD_RPR); if (nodesEqual(currentRPr,nextRPr)) { while (next->first != NULL) { if (next->first->tag == WORD_RPR) DFRemoveNode(next->first); else DFAppendChild(current,next->first); } DFRemoveNode(next); continue; } } current = next; } for (current = node->first; current != NULL; current = current->next) Word_mergeRunsRecursive(current); }
DFDocument *DFDocumentNewWithRoot(Tag rootTag) { DFDocument *doc = DFDocumentNew(); doc->root = DFCreateElement(doc,rootTag); DFAppendChild(doc->docNode,doc->root); return doc; }
static DFNode *createAbstractPlaceholder(WordGetData *get, const char *placeholderText, DFNode *concrete) { DFNode *span = WordConverterCreateAbstract(get,HTML_SPAN,concrete); DFSetAttribute(span,HTML_CLASS,DFPlaceholderClass); DFNode *text = DFCreateTextNode(get->conv->html,placeholderText); DFAppendChild(span,text); return span; }
static void SAXComment(void *ctx, const xmlChar *value) { DFSAXParser *parser = (DFSAXParser *)ctx; if (parser->ignoreDepth > 0) return;; DFNode *comment = DFCreateComment(parser->document,(const char *)value); assert(parser->parent != NULL); DFAppendChild(parser->parent,comment); }
static void SAXProcessingInstruction(void *ctx, const xmlChar *target, const xmlChar *data) { DFSAXParser *parser = (DFSAXParser *)ctx; if (parser->ignoreDepth > 0) return;; DFNode *pi = DFCreateProcessingInstruction(parser->document,(const char *)target,(const char *)data); assert(parser->parent != NULL); DFAppendChild(parser->parent,pi); }
void replaceChildrenFromArray(DFNode *node, DFNode **children, Tag *tags) { while (node->first != NULL) DFRemoveNode(node->first); for (int i = 0; tags[i] != 0; i++) { if (children[tags[i]]) DFAppendChild(node,children[tags[i]]); } }
static DFNode *WordDocumentGet(WordGetData *get, DFNode *concrete) { if (concrete->tag != WORD_DOCUMENT) return NULL; DFNode *html = WordConverterCreateAbstract(get,HTML_HTML,concrete); DFNode *head = WordConverterCreateAbstract(get,HTML_HEAD,NULL); DFAppendChild(html,head); DFNode *meta = WordConverterCreateAbstract(get,HTML_META,NULL); DFAppendChild(head,meta); DFSetAttribute(meta,HTML_CHARSET,"utf-8"); DFNode *wordBody = DFChildWithTag(concrete,WORD_BODY); if (wordBody != NULL) { DFNode *htmlBody = WordBodyLens.get(get,wordBody); DFAppendChild(html,htmlBody); } return html; }
static void SAXCharacters(void *ctx, const xmlChar *ch, int len) { DFSAXParser *parser = (DFSAXParser *)ctx; if (parser->ignoreDepth > 0) return; char *data = (char *)malloc(len+1); memcpy(data,ch,len); data[len] = '\0'; DFNode *text = DFCreateTextNode(parser->document,data); assert(parser->parent != NULL); DFAppendChild(parser->parent,text); free(data); }
static void SAXCDATABlock(void *ctx, const xmlChar *value, int len) { DFSAXParser *parser = (DFSAXParser *)ctx; if (parser->ignoreDepth > 0) return; char *data = (char *)xmalloc(len+1); memcpy(data,value,len); data[len] = '\0'; DFNode *cdata = DFCreateTextNode(parser->document,data); assert(parser->parent != NULL); DFAppendChild(parser->parent,cdata); free(data); }
static DFNode *findSectPr(WordConverter *converter, int add) { DFNode *root = converter->package->document->root; if (root->tag != WORD_DOCUMENT) return NULL;; DFNode *body = DFChildWithTag(root,WORD_BODY); if (body == NULL) return NULL;; DFNode *sectPr = DFChildWithTag(body,WORD_SECTPR); if ((sectPr == NULL) & add) { sectPr = DFCreateElement(converter->package->document,WORD_SECTPR); DFAppendChild(body,sectPr); } return sectPr; }
DFNode *fromTidyNode(DFDocument *htmlDoc, TidyDoc tdoc, TidyNode tnode) { switch (tidyNodeGetType(tnode)) { case TidyNode_Text: { char *value = copyTidyNodeValue(tnode,tdoc); DFNode *result = DFCreateTextNode(htmlDoc,value); free(value); return result; } case TidyNode_CDATA: break; case TidyNode_Comment: break; case TidyNode_Root: printf("Have root\n"); break; default: { const char *name = tidyNodeGetName(tnode); if (name == NULL) { printf("NULL name for %p, type %d\n",tnode,tidyNodeGetType(tnode)); return NULL; } const NamespaceDecl *namespaceDecl = DFNameMapNamespaceForID(htmlDoc->map,NAMESPACE_HTML); Tag tag = DFNameMapTagForName(htmlDoc->map,namespaceDecl->namespaceURI,name); DFNode *element = DFCreateElement(htmlDoc,tag); for (TidyAttr tattr = tidyAttrFirst(tnode); tattr != NULL; tattr = tidyAttrNext(tattr)) { const char *name = tidyAttrName(tattr); const char *value = tidyAttrValue(tattr); if (value == NULL) // Can happen in case of the empty string value = "";; Tag attrTag = DFNameMapTagForName(htmlDoc->map,namespaceDecl->namespaceURI,name); DFSetAttribute(element,attrTag,value); } for (TidyNode tchild = tidyGetChild(tnode); tchild != NULL; tchild = tidyGetNext(tchild)) { DFNode *child = fromTidyNode(htmlDoc,tdoc,tchild); if (child != NULL) DFAppendChild(element,child); } return element; } } return NULL; }
static void SAXStartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) { DFSAXParser *parser = (DFSAXParser *)ctx; const NamespaceDecl *namespaceDecl = DFNameMapNamespaceForID(parser->document->map,NAMESPACE_HTML); Tag tag = DFNameMapTagForName(parser->document->map,namespaceDecl->namespaceURI,(const char *)fullname); DFNode *element = DFCreateElement(parser->document,tag); if (atts != NULL) { for (int i = 0; atts[i] != NULL; i += 2) { const xmlChar *name = atts[i]; const xmlChar *value = atts[i+1]; Tag attrTag = DFNameMapTagForName(parser->document->map,namespaceDecl->namespaceURI,(const char *)name); DFSetAttribute(element,attrTag,(const char *)value); } } DFAppendChild(parser->parent,element); parser->parent = element; if (parser->document->root == NULL) parser->document->root = element; }
static void Word_addContentParts(DFNode *child, const char *content, WordCaption *caption) { if (content == NULL) return; DFNode *nextSibling = child->first; DFArray *parts = CSSParseContent(content); for (int i = 0; i < DFArrayCount(parts); i++) { ContentPart *part = DFArrayItemAt(parts,i); switch (part->type) { case ContentPartString: { DFNode *text = DFCreateTextNode(child->doc,part->value); if (strlen(part->value) > 0) { DFNode *span = DFCreateElement(child->doc,HTML_SPAN); DFAppendChild(span,text); DFInsertBefore(child,span,nextSibling); } break; } case ContentPartCounter: { if (DFStringEquals(part->value,"figure")) { DFNode *span = DFCreateElement(child->doc,HTML_SPAN); DFSetAttribute(span,HTML_CLASS,DFFieldClass); DFCreateChildTextNode(span," SEQ Figure \\* ARABIC "); DFInsertBefore(child,span,nextSibling); caption->number = span; } else if (DFStringEquals(part->value,"table")) { DFNode *span = DFCreateElement(child->doc,HTML_SPAN); DFSetAttribute(span,HTML_CLASS,DFFieldClass); DFCreateChildTextNode(span," SEQ Table \\* ARABIC "); DFInsertBefore(child,span,nextSibling); caption->number = span; } break; default: break; } } } DFArrayRelease(parts); }
static DFNode *WordRunContentCreate(WordPutData *put, DFNode *abstract) { switch (abstract->tag) { case DOM_TEXT: { DFNode *text = DFCreateTextNode(put->contentDoc,abstract->value); // Text inside a <w:del> element must be stored in a <w:delText> element // Text *not* inside a <w:del> element is stored in a <w:t> element Tag tag = WORD_T; for (DFNode *a = abstract->parent; a != NULL; a = a->parent) { if (a->tag == HTML_DEL) tag = WORD_DELTEXT; } DFNode *t = DFCreateElement(put->contentDoc,tag); DFAppendChild(t,text); char *trimmed = DFStringTrimWhitespace(abstract->value); if (!DFStringEquals(trimmed,abstract->value)) DFSetAttribute(t,XML_SPACE,"preserve"); free(trimmed); return t; } case HTML_IMG: return WordDrawingCreate(put,abstract); case HTML_BR: return DFCreateElement(put->contentDoc,WORD_BR); case HTML_SPAN: { const char *className = DFGetAttribute(abstract,HTML_CLASS); if (DFStringEquals(className,DFTabClass)) return DFCreateElement(put->contentDoc,WORD_TAB); return NULL; } default: return NULL; } }
int WordConverterConvertToHTML(WordConverter *converter, DFError **error) { converter->haveFields = Word_simplifyFields(converter->package); Word_mergeRuns(converter->package); if (converter->package->document == NULL) { DFErrorFormat(error,"document.xml not found"); return 0; } DFNode *wordDocument = DFChildWithTag(converter->package->document->docNode,WORD_DOCUMENT); if (wordDocument == NULL) { DFErrorFormat(error,"word:document not found"); return 0; } WordAddNbsps(converter->package->document); WordFixLists(converter); CSSSheetRelease(converter->styleSheet); converter->styleSheet = WordParseStyles(converter); WordObjectsCollapseBookmarks(converter->objects); WordObjectsScan(converter->objects); WordObjectsAnalyzeBookmarks(converter->objects,converter->styles); WordGetData get; get.conv = converter; DFNode *abstract = WordDocumentLens.get(&get,wordDocument); DFAppendChild(converter->html->docNode,abstract); Word_postProcessHTMLDoc(converter); HTMLAddExternalStyleSheet(converter->html,"reset.css"); char *cssText = CSSSheetCopyCSSText(converter->styleSheet); HTMLAddInternalStyleSheet(converter->html,cssText); free(cssText); return 1; }
DFNode *DFCreateChildTextNode(DFNode *parent, const char *data) { DFNode *text = DFCreateTextNode(parent->doc,data); DFAppendChild(parent,text); return text; }
DFNode *DFCreateChildElement(DFNode *parent, Tag tag) { DFNode *child = DFCreateElement(parent->doc,tag); DFAppendChild(parent,child); return child; }
static DFNode *WordTblGet(WordGetData *get, DFNode *concrete) { if (concrete->tag != WORD_TBL) return NULL;; DFNode *table = WordConverterCreateAbstract(get,HTML_TABLE,concrete); ConcreteInfo *cinfo = getConcreteInfo(get->conv,concrete); calcTotals(get,cinfo); const char *cellWidthType = cellWidthTypeForTable(concrete); int autoWidth = DFStringEquals(cellWidthType,"auto"); if ((CSSGet(cinfo->tableProperties,"width") == NULL) && autoWidth) { CSSPut(cinfo->tableProperties,"width",NULL); } else { // Determine column widths and table width if (cinfo->totalWidthPts > 0) { DFNode *colgroup = HTML_createColgroup(get->conv->html,cinfo->structure); DFAppendChild(table,colgroup); double tableWidthPct = 100.0; if (WordSectionContentWidth(get->conv->mainSection) > 0) { double contentWidthPts = WordSectionContentWidth(get->conv->mainSection)/20.0; tableWidthPct = 100.0*cinfo->totalWidthPts/contentWidthPts; if (CSSGet(cinfo->tableProperties,"width") == NULL) { char buf[100]; CSSPut(cinfo->tableProperties,"width",DFFormatDoublePct(buf,100,tableWidthPct)); } } } if (CSSGet(cinfo->tableProperties,"width") == NULL) CSSPut(cinfo->tableProperties,"width","100%"); } DFHashTable *collapsed = CSSCollapseProperties(cinfo->tableProperties); char *styleValue = CSSSerializeProperties(collapsed); DFHashTableRelease(collapsed); if (strlen(styleValue) > 0) DFSetAttribute(table,HTML_STYLE,styleValue); free(styleValue); if ((cinfo->style != NULL) && (cinfo->style->selector != NULL)) { char *className = CSSSelectorCopyClassName(cinfo->style->selector); DFSetAttribute(table,HTML_CLASS,className); free(className); } else { CSSStyle *defaultStyle = CSSSheetDefaultStyleForFamily(get->conv->styleSheet,StyleFamilyTable); if (defaultStyle != NULL) DFSetAttribute(table,HTML_CLASS,defaultStyle->className); } // Create rows and cells int row = 0; for (DFNode *tblChild = concrete->first; tblChild != NULL; tblChild = tblChild->next) { if (tblChild->tag != WORD_TR) continue; DFNode *tr = WordConverterCreateAbstract(get,HTML_TR,tblChild); DFAppendChild(table,tr); unsigned int col = 0; while (col < cinfo->structure->cols) { DFCell *cell = DFTableGetCell(cinfo->structure,row,col); if (cell == NULL) { DFNode *td = DFCreateElement(get->conv->html,HTML_TD); DFAppendChild(tr,td); col++; continue; } if (row == cell->row) { DFNode *td = WordTcGet(get,cell->element); DFAppendChild(tr,td); if (cell->colSpan != 1) DFFormatAttribute(td,HTML_COLSPAN,"%d",cell->colSpan); if (cell->rowSpan != 1) DFFormatAttribute(td,HTML_ROWSPAN,"%d",cell->rowSpan); } col += cell->colSpan; } row++; } ConcreteInfoFree(cinfo); return table; }
static void WordTblPut(WordPutData *put, DFNode *abstract, DFNode *concrete) { if ((abstract->tag != HTML_TABLE) || (concrete->tag != WORD_TBL)) return;; DFTable *abstractStructure = HTML_tableStructure(abstract); const char *inlineCSSText = DFGetAttribute(abstract,HTML_STYLE); CSSProperties *tableProperties = CSSPropertiesNewWithString(inlineCSSText); CSSProperties *cellProperties = CSSPropertiesNew(); const char *className = DFGetAttribute(abstract,HTML_CLASS); char *selector = CSSMakeSelector("table",className); WordStyle *style = WordSheetStyleForSelector(put->conv->styles,selector); CellPadding padding = getPadding(put->conv->styleSheet,style,cellProperties); DFNode *tblPr = DFChildWithTag(concrete,WORD_TBLPR); if (tblPr == NULL) tblPr = DFCreateElement(concrete->doc,WORD_TBLPR);; DFNode *tblGrid = DFChildWithTag(concrete,WORD_TBLGRID); if (tblGrid == NULL) tblGrid = DFCreateElement(concrete->doc,WORD_TBLGRID); while (concrete->first != NULL) DFRemoveNode(concrete->first); const char *oldJc = DFGetChildAttribute(tblPr,WORD_JC,WORD_VAL); WordPutTblPr(tblPr,tableProperties,NULL,put->conv->mainSection,style != NULL ? style->styleId : NULL); const char *newJc = DFGetChildAttribute(tblPr,WORD_JC,WORD_VAL); double tableWidthPct = 100; if (CSSGet(tableProperties,"width") != NULL) { CSSLength length = CSSLengthFromString(CSSGet(tableProperties,"width")); if (CSSLengthIsValid(length) && (length.units == UnitsPct)) tableWidthPct = length.value; } double contentWidthPts = WordSectionContentWidthPts(put->conv->mainSection); double totalWidthPts = (contentWidthPts+padding.leftPts+padding.rightPts)*(tableWidthPct/100.0); while (tblGrid->first != NULL) DFRemoveNode(tblGrid->first); for (unsigned int i = 0; i < abstractStructure->cols; i++) { DFNode *gridCol = DFCreateChildElement(tblGrid,WORD_GRIDCOL); double colWidthPct = DFTablePctWidthForCol(abstractStructure,i); double colWidthPts = totalWidthPts*colWidthPct/100.0; int colWidthTwips = (int)round(colWidthPts*20); DFFormatAttribute(gridCol,WORD_W,"%d",colWidthTwips); } DFAppendChild(concrete,tblPr); DFAppendChild(concrete,tblGrid); for (unsigned int row = 0; row < abstractStructure->rows; row++) { DFNode *htmlTr = DFTableGetRowElement(abstractStructure,row); DFNode *wordTr = concreteRowForAbstractRow(put,htmlTr); updateTrJc(wordTr,oldJc,newJc); DFAppendChild(concrete,wordTr); unsigned int col = 0; while (col < abstractStructure->cols) { DFCell *cell = DFTableGetCell(abstractStructure,row,col); assert(cell != NULL); DFNode *tc = WordConverterGetConcrete(put,cell->element); if ((tc == NULL) || (row != cell->row)) tc = DFCreateElement(concrete->doc,WORD_TC); DFAppendChild(wordTr,tc); if (cell->row == row) WordTcPut(put,cell->element,tc);; const char *vMerge = NULL; if (cell->rowSpan > 1) { if (row == cell->row) vMerge = "restart"; else vMerge = "continue"; } DFNode *tcPr = DFChildWithTag(tc,WORD_TCPR); if (tcPr == NULL) tcPr = DFCreateElement(concrete->doc,WORD_TCPR); // Make sure tcPr comes first DFInsertBefore(tc,tcPr,tc->first); WordPutTcPr2(tcPr,cell->colSpan,vMerge); const char *inlineCSSText = DFGetAttribute(cell->element,HTML_STYLE); CSSProperties *innerCellProperties = CSSPropertiesNewWithString(inlineCSSText); if ((row == cell->row) && (totalWidthPts > 0)) { double spannedWidthPct = 0; for (unsigned int c = col; c < col + cell->colSpan; c++) spannedWidthPct += DFTablePctWidthForCol(abstractStructure,c); char buf[100]; CSSPut(innerCellProperties,"width",DFFormatDoublePct(buf,100,spannedWidthPct)); } WordPutTcPr1(tcPr,innerCellProperties); int haveBlockLevelElement = 0; for (DFNode *tcChild = tc->first; tcChild != NULL; tcChild = tcChild->next) { if (WordBlockLevelLens.isVisible(put,tcChild)) haveBlockLevelElement = 1; } // Every cell must contain at least one block-level element if (!haveBlockLevelElement) { DFNode *p = DFCreateElement(concrete->doc,WORD_P); DFAppendChild(tc,p); } col += cell->colSpan; CSSPropertiesRelease(innerCellProperties); } } free(selector); DFTableRelease(abstractStructure); CSSPropertiesRelease(tableProperties); CSSPropertiesRelease(cellProperties); }
static void Word_postProcessHTML(WordConverter *conv, DFNode *node) { DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; switch (child->tag) { case HTML_SPAN: { const char *className = DFGetAttribute(child,HTML_CLASS); if (DFStringEquals(className,DFBookmarkClass)) { if (child->first != NULL) next = child->first; DFRemoveNodeButKeepChildren(child); } break; } case HTML_CAPTION: { const char *counterName = NULL; if ((child->prev != NULL) && (child->prev->tag == HTML_FIGURE) && (DFChildWithTag(child->prev,HTML_FIGCAPTION) == NULL)) { child->tag = HTML_FIGCAPTION; counterName = "figure"; DFAppendChild(child->prev,child); } else if ((child->prev != NULL) && (child->prev->tag == HTML_TABLE) && (DFChildWithTag(child->prev,HTML_CAPTION) == NULL)) { counterName = "table"; DFInsertBefore(child->prev,child,child->prev->first); } else if ((child->next != NULL) && (child->next->tag == HTML_FIGURE) && (DFChildWithTag(child->next,HTML_FIGCAPTION) == NULL)) { child->tag = HTML_FIGCAPTION; counterName = "figure"; DFInsertBefore(child->next,child,child->next->first); } else if ((child->next != NULL) && (child->next->tag == HTML_TABLE) && (DFChildWithTag(child->next,HTML_CAPTION) == NULL)) { counterName = "table"; DFSetAttribute(child,HTML_STYLE,"caption-side: top"); DFInsertBefore(child->next,child,child->next->first); } if (counterName != NULL) { char *beforeText = extractPrefix(child,counterName); if (beforeText != NULL) { CSSStyle *style = CSSSheetLookupElement(conv->styleSheet,DFNodeName(child),NULL,1,0); if (CSSGet(CSSStyleBefore(style),"content") == NULL) { CSSPut(CSSStyleRule(style),"counter-increment",counterName); CSSPut(CSSStyleBefore(style),"content",beforeText); } } free(beforeText); } break; } case HTML_NAV: { if (HTML_isParagraphTag(node->tag)) { if (child->prev != NULL) { DFNode *beforeP = DFCreateElement(conv->package->document,node->tag); while (child->prev != NULL) DFInsertBefore(beforeP,child->prev,beforeP->first); DFInsertBefore(node->parent,beforeP,node); } DFInsertBefore(node->parent,child,node); if ((node->first == NULL) || ((node->first->tag == HTML_BR) && (node->first->next == NULL))) { DFRemoveNode(node); return; } next = NULL; } break; } } } for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; Word_postProcessHTML(conv,child); } }
static void SAXStartElementNS(void *ctx, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) { DFSAXParser *parser = (DFSAXParser *)ctx; if (parser->ignoreDepth > 0) { parser->ignoreDepth++; return; } for (int i = 0; i < nb_namespaces; i++) { const xmlChar *nsPrefix = namespaces[i*2]; const xmlChar *nsURI = namespaces[i*2+1]; DFNameMapFoundNamespace(parser->document->map,(const char *)nsURI,(const char *)nsPrefix); } Tag tag = DFNameMapTagForName(parser->document->map,(const char *)URI,(const char *)localname); if (parser->compatibility != NULL) { const TagDecl *tagDecl = DFNameMapNameForTag(parser->document->map,tag); MCAction action = DFMarkupCompatibilityLookup(parser->compatibility,tagDecl->namespaceID,tag,1); if (action == MCActionIgnore) { parser->ignoreDepth++; return; } } if (parser->compatibility != NULL) { DFMarkupCompatibilityPush(parser->compatibility,nb_namespaces,(const char **)namespaces,parser->document->map); } DFNode *element = DFCreateElement(parser->document,tag); for (int i = 0; i < nb_attributes; i++) { const xmlChar *attrLocalName = attributes[i*5+0]; const xmlChar *attrURI = attributes[i*5+2]; const xmlChar *attrValueStart = attributes[i*5+3]; const xmlChar *attrValueEnd = attributes[i*5+4]; unsigned long attrValueLen = (unsigned long)(attrValueEnd - attrValueStart); Tag attrTag = DFNameMapTagForName(parser->document->map,(const char *)attrURI,(const char *)attrLocalName); const TagDecl *attrTagDecl = DFNameMapNameForTag(parser->document->map,attrTag); char *attrValue = (char *)xmalloc(attrValueLen+1); memcpy(attrValue,attrValueStart,attrValueLen); attrValue[attrValueLen] = '\0'; if (parser->compatibility != NULL) { switch (attrTag) { case MC_IGNORABLE: case MC_PROCESSCONTENT: case MC_MUSTUNDERSTAND: DFMarkupCompatibilityProcessAttr(parser->compatibility,attrTag,attrValue,parser->document->map); break; default: { MCAction action = DFMarkupCompatibilityLookup(parser->compatibility,attrTagDecl->namespaceID,0,0); if (action != MCActionIgnore) DFSetAttribute(element,attrTag,attrValue); break; } } } else { DFSetAttribute(element,attrTag,attrValue); } free(attrValue); } DFAppendChild(parser->parent,element); parser->parent = element; if (parser->document->root == NULL) parser->document->root = element; }
static void collapseRecursive(DFNode *node, DFHashTable *bookmarksById) { DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; switch (child->tag) { case WORD_BOOKMARKSTART: case WORD_BOOKMARKEND: { DFArray *startElements = DFArrayNew(NULL,NULL); DFArray *endElements = DFArrayNew(NULL,NULL); DFHashTable *startIds = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free); DFHashTable *endIds = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free); DFNode *n; for (n = child; (n != NULL) && ((n->tag == WORD_BOOKMARKSTART) || (n->tag == WORD_BOOKMARKEND)); n = n->next) { if (n->tag == WORD_BOOKMARKSTART) { const char *idValue = DFGetAttribute(n,WORD_ID); if (idValue == NULL) idValue = ""; DFHashTableAdd(startIds,idValue,idValue); DFArrayAppend(startElements,n); } else { const char *idValue = DFGetAttribute(n,WORD_ID); if (idValue == NULL) idValue = ""; DFHashTableAdd(endIds,idValue,idValue); DFArrayAppend(endElements,n); } } next = n; DFArraySort(startElements,bookmarksById,compareStartElements); for (size_t endIndex = 0; endIndex < DFArrayCount(endElements); endIndex++) { DFNode *elem = DFArrayItemAt(endElements,endIndex); const char *endId = DFGetAttribute(elem,WORD_ID); int found = 0; DFNode *ancestor; for (ancestor = elem->parent; (ancestor != NULL) && !found; ancestor = ancestor->parent) { if ((ancestor->tag == WORD_BOOKMARK) && DFStringEquals(DFGetAttribute(ancestor,WORD_ID),endId)) { found = 1; break; } } if (found) { DFNode *before = ancestor->next; DFNode *nnext; for (DFNode *n = child; n != NULL; n = nnext) { nnext = n->next; DFInsertBefore(ancestor->parent,n,before); } } } size_t x = 0; while (x < DFArrayCount(startElements)) { DFNode *element = DFArrayItemAt(startElements,x); const char *bookmarkId = DFGetAttribute(element,WORD_ID); if (bookmarkId == NULL) bookmarkId = ""; if (DFHashTableLookup(endIds,bookmarkId) != NULL) { element->tag = WORD_BOOKMARK; DFArrayRemove(startElements,x); } else { x++; } } if (DFArrayCount(startElements) > 0) { for (size_t i = 1; i < DFArrayCount(startElements); i++) { DFNode *tempParent = DFArrayItemAt(startElements,i-1); DFNode *tempChild = DFArrayItemAt(startElements,i); DFAppendChild(tempParent,tempChild); } DFNode *last = DFArrayItemAt(startElements,DFArrayCount(startElements)-1); while (next != NULL) { DFNode *tempChild = next; next = next->next; DFAppendChild(last,tempChild); } } for (size_t eIndex = 0; eIndex < DFArrayCount(startElements); eIndex++) { DFNode *e = DFArrayItemAt(startElements,eIndex); e->tag = WORD_BOOKMARK; } for (size_t eIndex = 0; eIndex < DFArrayCount(endElements); eIndex++) { DFNode *e = DFArrayItemAt(endElements,eIndex); DFRemoveNode(e); } if (DFArrayCount(startElements) > 0) { DFNode *last = DFArrayItemAt(startElements,DFArrayCount(startElements)-1); collapseRecursive(last,bookmarksById); } DFArrayRelease(startElements); DFArrayRelease(endElements); DFHashTableRelease(startIds); DFHashTableRelease(endIds); break; } default: collapseRecursive(child,bookmarksById); break; } } }
static void Word_fixListSingle(WordConverter *conv, DFNode *node) { ListStack stack; bzero(&stack,sizeof(ListStack)); DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; int isListItem = 0; if (child->tag == HTML_P) { DFNode *elem = child; const char *numIdStr = DFGetAttribute(elem,WORD_NUMID); const char *ilvlStr = DFGetAttribute(elem,WORD_ILVL); DFRemoveAttribute(elem,WORD_NUMID); DFRemoveAttribute(elem,WORD_ILVL); // A numId of 0 means that there is no numbering applied to this paragraph if ((numIdStr != NULL) && (atoi(numIdStr) == 0)) { numIdStr = NULL; ilvlStr = NULL; } if ((numIdStr != NULL) && (ilvlStr != NULL)) { isListItem = 1; int numId = atoi(numIdStr); int ilvl = atoi(ilvlStr); ListDimensions dimensions = listIndent(conv,numIdStr,ilvlStr); // Find the list at the same ilvl, and check if it has the same numId. If not, we're // starting a new list. ListFrame *sameLevelFrame = NULL; for (ListFrame *frame = stack.top; frame != NULL; frame = frame->parent) { if (frame->ilvl == ilvl) sameLevelFrame = frame; } if ((sameLevelFrame != NULL) && (sameLevelFrame->numId != numId)) fixTrailingParagraphs(&stack,ilvl); else fixTrailingParagraphs(&stack,ilvl+1); if ((stack.top != NULL) && (stack.top->numId != numId)) ListStackPopToAboveIlvl(&stack,ilvl); else if ((stack.top != NULL) && (stack.top->ilvl > ilvl)) ListStackPopToAboveIlvl(&stack,ilvl+1); if ((stack.top == NULL) || (stack.top->numId != numId) || (stack.top->ilvl < ilvl)) { WordConcreteNum *num = WordNumberingConcreteWithId(conv->numbering,numIdStr); // may be NULL WordNumLevel *level = WordConcreteNumGetLevel(num,ilvl); // may be NULL const char *type = WordNumLevelToListStyleType(level); // may be NULL Tag tag; if (DFStringEquals(type,"disc") || DFStringEquals(type,"circle") || DFStringEquals(type,"square")) tag = HTML_UL; else tag = HTML_OL; DFNode *element = DFCreateElement(conv->html,tag); if (type != NULL) DFFormatAttribute(element,HTML_STYLE,"list-style-type: %s",type); if (stack.top != NULL) { DFNode *li; if (stack.top->element->last != NULL) li = stack.top->element->last; else li = DFCreateChildElement(stack.top->element,HTML_LI); DFAppendChild(li,element); } else { DFInsertBefore(node,element,child); } ListStackPushFrame(&stack,element,numId,ilvl,dimensions); } } } if (stack.top != NULL) { DFNode *li; if ((stack.top->element->last != NULL) && !isListItem) li = stack.top->element->last; else li = DFCreateChildElement(stack.top->element,HTML_LI); DFAppendChild(li,child); } } fixTrailingParagraphs(&stack,-1); while (stack.top != NULL) ListStackPop(&stack); }
void Word_setupBookmarkLinks(WordPutData *put) { DFHashTable *referencesById = findReferences(put->conv->html); const char **sortedIds = DFHashTableCopyKeys(referencesById); DFSortStringsCaseSensitive(sortedIds); for (int idIndex = 0; sortedIds[idIndex]; idIndex++) { const char *targetId = sortedIds[idIndex]; DFArray *references = DFHashTableLookup(referencesById,targetId); DFNode *targetElem = DFElementForIdAttr(put->conv->html,targetId); if (targetElem == NULL) continue; // The following is only relevant for figures and tables int refText = 0; int refLabelNum = 0; int refCaptionText = 0; for (int refIndex = 0; refIndex < DFArrayCount(references); refIndex++) { DFNode *a = DFArrayItemAt(references,refIndex); const char *className = DFGetAttribute(a,HTML_CLASS); if (DFStringEquals(className,DFRefTextClass)) refText = 1; else if (DFStringEquals(className,DFRefLabelNumClass)) refLabelNum = 1; else if (DFStringEquals(className,DFRefCaptionTextClass)) refCaptionText = 1; } DFNode *concrete = WordConverterGetConcrete(put,targetElem); switch (targetElem->tag) { case HTML_H1: case HTML_H2: case HTML_H3: case HTML_H4: case HTML_H5: case HTML_H6: { const char *bookmarkId = NULL; const char *bookmarkName = NULL; DFNode *bookmarkElem = NULL; if ((concrete != NULL) && (concrete->tag == WORD_P)) { // FIXME: We only want to consider the bookmark to be the headings "correct" // bookmark in the case where it contains all of the heading's content, though // excluding other bookmarks that might come before or after it. // If you have the cursor inside a heading bookmark when you save the document, // word puts a bookmark called _GoBack there, and we of course don't want to // confuse that with the actual heading's bookmark (if any). // For now as a temporary hack we just explicitly filter out _GoBack; but there // needs to be a more general fix, as there may be other bookmarks that end up // in the heading. for (DFNode *child = concrete->first; child != NULL; child = child->next) { if ((child->tag == WORD_BOOKMARK) && !DFStringEquals(DFGetAttribute(child,WORD_NAME),"_GoBack")) { bookmarkElem = child; bookmarkId = DFGetAttribute(bookmarkElem,WORD_ID); bookmarkName = DFGetAttribute(bookmarkElem,WORD_NAME); break; } } } if ((bookmarkElem == NULL) || (bookmarkId == NULL) || (bookmarkName == NULL)) { // New bookmark WordBookmark *bookmark = WordObjectsAddBookmark(put->conv->objects); bookmarkId =bookmark->bookmarkId; bookmarkName = bookmark->bookmarkName; } DFNode *bookmarkSpan = DFCreateElement(put->conv->package->document,HTML_SPAN); DFSetAttribute(bookmarkSpan,HTML_CLASS,DFBookmarkClass); if (bookmarkElem != NULL) { // FIXME: Not covered by tests DFFormatAttribute(bookmarkSpan,HTML_ID,"%s%u",put->conv->idPrefix,bookmarkElem->seqNo); } DFSetAttribute(bookmarkSpan,WORD_NAME,bookmarkName); DFSetAttribute(bookmarkSpan,WORD_ID,bookmarkId); while (targetElem->first != NULL) DFAppendChild(bookmarkSpan,targetElem->first); DFAppendChild(targetElem,bookmarkSpan); break; } case HTML_TABLE: case HTML_FIGURE: { WordCaption *caption = WordObjectsCaptionForTarget(put->conv->objects,targetElem); if (caption == NULL) break; assert(caption->element != NULL); assert((caption->number == NULL) || (caption->number->parent == caption->element)); assert((caption->contentStart == NULL) || (caption->contentStart->parent == caption->element)); // Note: caption.number may be null (i.e. if the caption is unnumbered) // caption.contentStart may be null (if there is no text in the caption) WordBookmark *captionTextBookmark = NULL; WordBookmark *labelNumBookmark = NULL; WordBookmark *textBookmark = NULL; if (!refCaptionText && !refLabelNum && !refText) refText = 1; if (refCaptionText) { captionTextBookmark = createBookmark(put->conv); DFNode *nnext; for (DFNode *n = caption->contentStart; n != NULL; n = nnext) { nnext = n->next; DFAppendChild(captionTextBookmark->element,n); } DFAppendChild(caption->element,captionTextBookmark->element); } if (refLabelNum && (caption->number != NULL)) { labelNumBookmark = createBookmark(put->conv); DFNode *numberNext = caption->number->next; DFNode *nnext; for (DFNode *n = caption->element->first; (n != NULL) && (n != numberNext); n = nnext) { nnext = n->next; DFAppendChild(labelNumBookmark->element,n); } DFInsertBefore(caption->element,labelNumBookmark->element,caption->element->first); } if (refText) { textBookmark = createBookmark(put->conv); DFNode *nnext; for (DFNode *n = caption->element->first; n != NULL; n = nnext) { nnext = n->next; DFAppendChild(textBookmark->element,n); } DFAppendChild(caption->element,textBookmark->element); } caption->captionTextBookmark = captionTextBookmark; caption->labelNumBookmark = labelNumBookmark; caption->textBookmark = textBookmark; break; } } } free(sortedIds); DFHashTableRelease(referencesById); }
static DFNode *WordFieldGet(WordGetData *get, DFNode *concrete) { if (concrete->tag != WORD_FLDSIMPLE) return NULL;; const char *instr = DFGetAttribute(concrete,WORD_INSTR); if (instr != NULL) { const char **args = Word_parseField(instr); size_t argCount = DFStringArrayCount(args); if ((argCount >= 2) && !strcmp(args[0],"REF")) { WordBookmark *bookmark = WordObjectsBookmarkWithName(get->conv->objects,args[1]); if ((bookmark != NULL) && (bookmark->target != NULL)) { WordRefType type = WordRefTypeGet(args,bookmark); DFNode *a = WordConverterCreateAbstract(get,HTML_A,concrete); DFFormatAttribute(a,HTML_HREF,"#%s%u",get->conv->idPrefix,bookmark->target->seqNo); DFSetAttribute(a,HTML_CLASS,WordRefTypeClassName(type)); free(args); return a; } } else if ((argCount >= 1) && !strcmp(args[0],"TOC")) { if ((argCount >= 2) && !strcmp(args[1],"\\o")) { DFNode *nav = WordConverterCreateAbstract(get,HTML_NAV,concrete); DFSetAttribute(nav,HTML_CLASS,DFTableOfContentsClass); free(args); return nav; } else if ((argCount >= 3) && !strcmp(args[1],"\\c")) { // FIXME: The names "Figure" and "Table" here will be different if the document // was created in a language other than English. We need to look through the // document to figure out which counter names are used in captions adjacent to // figures and tables to know what the counter names used in the document // actually are. // Another option might be just to collect a static list of names used in all the // major languages and base the detection on that. These would need to be checked // with multiple versions of word, as the names used could in theory change // between releases. // We should keep track of a set of "document parameters", which record the names // used for figure and table counters, as well as the prefixes used on numbered // figures and tables. The latter would correspond to the content property of the // caption::before and figcaption::before CSS rules. if (!strcmp(args[2],"Figure")) { DFNode *nav = WordConverterCreateAbstract(get,HTML_NAV,concrete); DFSetAttribute(nav,HTML_CLASS,DFListOfFiguresClass); free(args); return nav; } else if (!strcmp(args[2],"Table")) { DFNode *nav = WordConverterCreateAbstract(get,HTML_NAV,concrete); DFSetAttribute(nav,HTML_CLASS,DFListOfTablesClass); free(args); return nav; } } } DFNode *span = WordConverterCreateAbstract(get,HTML_SPAN,concrete); DFSetAttribute(span,HTML_CLASS,DFFieldClass); DFNode *text = DFCreateTextNode(get->conv->html,instr); DFAppendChild(span,text); free(args); return span; } return NULL; }