static DFNode *createAbstractPlaceholder(WordGetData *get, const char *placeholderText, DFNode *concrete) { DFNode *span = WordConverterCreateAbstract(get,HTML_SPAN,concrete); DFSetAttribute(span,HTML_CLASS,DFPlaceholderClass); DFNode *text = DFCreateTextNode(get->conv->html,placeholderText); DFAppendChild(span,text); return span; }
static void SAXCDATABlock(void *ctx, const xmlChar *value, int len) { DFSAXParser *parser = (DFSAXParser *)ctx; if (parser->ignoreDepth > 0) return; char *data = (char *)xmalloc(len+1); memcpy(data,value,len); data[len] = '\0'; DFNode *cdata = DFCreateTextNode(parser->document,data); assert(parser->parent != NULL); DFAppendChild(parser->parent,cdata); free(data); }
static void SAXCharacters(void *ctx, const xmlChar *ch, int len) { DFSAXParser *parser = (DFSAXParser *)ctx; if (parser->ignoreDepth > 0) return; char *data = (char *)malloc(len+1); memcpy(data,ch,len); data[len] = '\0'; DFNode *text = DFCreateTextNode(parser->document,data); assert(parser->parent != NULL); DFAppendChild(parser->parent,text); free(data); }
DFNode *fromTidyNode(DFDocument *htmlDoc, TidyDoc tdoc, TidyNode tnode) { switch (tidyNodeGetType(tnode)) { case TidyNode_Text: { char *value = copyTidyNodeValue(tnode,tdoc); DFNode *result = DFCreateTextNode(htmlDoc,value); free(value); return result; } case TidyNode_CDATA: break; case TidyNode_Comment: break; case TidyNode_Root: printf("Have root\n"); break; default: { const char *name = tidyNodeGetName(tnode); if (name == NULL) { printf("NULL name for %p, type %d\n",tnode,tidyNodeGetType(tnode)); return NULL; } const NamespaceDecl *namespaceDecl = DFNameMapNamespaceForID(htmlDoc->map,NAMESPACE_HTML); Tag tag = DFNameMapTagForName(htmlDoc->map,namespaceDecl->namespaceURI,name); DFNode *element = DFCreateElement(htmlDoc,tag); for (TidyAttr tattr = tidyAttrFirst(tnode); tattr != NULL; tattr = tidyAttrNext(tattr)) { const char *name = tidyAttrName(tattr); const char *value = tidyAttrValue(tattr); if (value == NULL) // Can happen in case of the empty string value = "";; Tag attrTag = DFNameMapTagForName(htmlDoc->map,namespaceDecl->namespaceURI,name); DFSetAttribute(element,attrTag,value); } for (TidyNode tchild = tidyGetChild(tnode); tchild != NULL; tchild = tidyGetNext(tchild)) { DFNode *child = fromTidyNode(htmlDoc,tdoc,tchild); if (child != NULL) DFAppendChild(element,child); } return element; } } return NULL; }
static DFNode *WordRunContentGet(WordGetData *get, DFNode *concrete) { switch (concrete->tag) { case WORD_T: case WORD_DELTEXT: { DFBuffer *buf = DFBufferNew(); DFNodeTextToBuffer(concrete,buf); DFNode *abstract = DFCreateTextNode(get->conv->html,buf->data); DFBufferRelease(buf); return abstract; } case WORD_DRAWING: case WORD_OBJECT: case WORD_PICT: return WordDrawingGet(get,concrete); case WORD_TAB: { DFNode *span = WordConverterCreateAbstract(get,HTML_SPAN,concrete); DFSetAttribute(span,HTML_CLASS,DFTabClass); return span; } case WORD_BR: { const char *type = DFGetAttribute(concrete,WORD_TYPE); if (DFStringEquals(type,"column")) { DFNode *span = WordConverterCreateAbstract(get,HTML_SPAN,concrete); DFSetAttribute(span,HTML_CLASS,DFPlaceholderClass); DFCreateChildTextNode(span,"[Column break]"); return span; } else if (DFStringEquals(type,"page")) { DFNode *span = WordConverterCreateAbstract(get,HTML_SPAN,concrete); DFSetAttribute(span,HTML_CLASS,DFPlaceholderClass); DFCreateChildTextNode(span,"[Page break]"); return span; } else { return WordConverterCreateAbstract(get,HTML_BR,concrete); } } default: return NULL; } }
static void Word_addContentParts(DFNode *child, const char *content, WordCaption *caption) { if (content == NULL) return; DFNode *nextSibling = child->first; DFArray *parts = CSSParseContent(content); for (int i = 0; i < DFArrayCount(parts); i++) { ContentPart *part = DFArrayItemAt(parts,i); switch (part->type) { case ContentPartString: { DFNode *text = DFCreateTextNode(child->doc,part->value); if (strlen(part->value) > 0) { DFNode *span = DFCreateElement(child->doc,HTML_SPAN); DFAppendChild(span,text); DFInsertBefore(child,span,nextSibling); } break; } case ContentPartCounter: { if (DFStringEquals(part->value,"figure")) { DFNode *span = DFCreateElement(child->doc,HTML_SPAN); DFSetAttribute(span,HTML_CLASS,DFFieldClass); DFCreateChildTextNode(span," SEQ Figure \\* ARABIC "); DFInsertBefore(child,span,nextSibling); caption->number = span; } else if (DFStringEquals(part->value,"table")) { DFNode *span = DFCreateElement(child->doc,HTML_SPAN); DFSetAttribute(span,HTML_CLASS,DFFieldClass); DFCreateChildTextNode(span," SEQ Table \\* ARABIC "); DFInsertBefore(child,span,nextSibling); caption->number = span; } break; default: break; } } } DFArrayRelease(parts); }
static DFNode *WordRunContentCreate(WordPutData *put, DFNode *abstract) { switch (abstract->tag) { case DOM_TEXT: { DFNode *text = DFCreateTextNode(put->contentDoc,abstract->value); // Text inside a <w:del> element must be stored in a <w:delText> element // Text *not* inside a <w:del> element is stored in a <w:t> element Tag tag = WORD_T; for (DFNode *a = abstract->parent; a != NULL; a = a->parent) { if (a->tag == HTML_DEL) tag = WORD_DELTEXT; } DFNode *t = DFCreateElement(put->contentDoc,tag); DFAppendChild(t,text); char *trimmed = DFStringTrimWhitespace(abstract->value); if (!DFStringEquals(trimmed,abstract->value)) DFSetAttribute(t,XML_SPACE,"preserve"); free(trimmed); return t; } case HTML_IMG: return WordDrawingCreate(put,abstract); case HTML_BR: return DFCreateElement(put->contentDoc,WORD_BR); case HTML_SPAN: { const char *className = DFGetAttribute(abstract,HTML_CLASS); if (DFStringEquals(className,DFTabClass)) return DFCreateElement(put->contentDoc,WORD_TAB); return NULL; } default: return NULL; } }
DFNode *DFCreateChildTextNode(DFNode *parent, const char *data) { DFNode *text = DFCreateTextNode(parent->doc,data); DFAppendChild(parent,text); return text; }
static DFNode *WordFieldGet(WordGetData *get, DFNode *concrete) { if (concrete->tag != WORD_FLDSIMPLE) return NULL;; const char *instr = DFGetAttribute(concrete,WORD_INSTR); if (instr != NULL) { const char **args = Word_parseField(instr); size_t argCount = DFStringArrayCount(args); if ((argCount >= 2) && !strcmp(args[0],"REF")) { WordBookmark *bookmark = WordObjectsBookmarkWithName(get->conv->objects,args[1]); if ((bookmark != NULL) && (bookmark->target != NULL)) { WordRefType type = WordRefTypeGet(args,bookmark); DFNode *a = WordConverterCreateAbstract(get,HTML_A,concrete); DFFormatAttribute(a,HTML_HREF,"#%s%u",get->conv->idPrefix,bookmark->target->seqNo); DFSetAttribute(a,HTML_CLASS,WordRefTypeClassName(type)); free(args); return a; } } else if ((argCount >= 1) && !strcmp(args[0],"TOC")) { if ((argCount >= 2) && !strcmp(args[1],"\\o")) { DFNode *nav = WordConverterCreateAbstract(get,HTML_NAV,concrete); DFSetAttribute(nav,HTML_CLASS,DFTableOfContentsClass); free(args); return nav; } else if ((argCount >= 3) && !strcmp(args[1],"\\c")) { // FIXME: The names "Figure" and "Table" here will be different if the document // was created in a language other than English. We need to look through the // document to figure out which counter names are used in captions adjacent to // figures and tables to know what the counter names used in the document // actually are. // Another option might be just to collect a static list of names used in all the // major languages and base the detection on that. These would need to be checked // with multiple versions of word, as the names used could in theory change // between releases. // We should keep track of a set of "document parameters", which record the names // used for figure and table counters, as well as the prefixes used on numbered // figures and tables. The latter would correspond to the content property of the // caption::before and figcaption::before CSS rules. if (!strcmp(args[2],"Figure")) { DFNode *nav = WordConverterCreateAbstract(get,HTML_NAV,concrete); DFSetAttribute(nav,HTML_CLASS,DFListOfFiguresClass); free(args); return nav; } else if (!strcmp(args[2],"Table")) { DFNode *nav = WordConverterCreateAbstract(get,HTML_NAV,concrete); DFSetAttribute(nav,HTML_CLASS,DFListOfTablesClass); free(args); return nav; } } } DFNode *span = WordConverterCreateAbstract(get,HTML_SPAN,concrete); DFSetAttribute(span,HTML_CLASS,DFFieldClass); DFNode *text = DFCreateTextNode(get->conv->html,instr); DFAppendChild(span,text); free(args); return span; } return NULL; }