int textPackageList(const char *filename, DFError **error) { char *filePath = DFPathDirName(filename); char *value = DFStringReadFromFile(filename,error); int result = 0; if (value == NULL) DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); else if (!textPackageListRecursive(value,filePath,error,0)) DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); else result = 1; free(filePath); free(value); return result; }
static int fromPlain2(const char *inStr, const char *inPath, const char *outFilename, DFError **error) { char *outExtension = DFPathExtension(outFilename); int isDocx = DFStringEqualsCI(outExtension,"docx"); int ok = 0; if (!isDocx) { DFErrorFormat(error,"%s: Unknown extension",outFilename); goto end; } DFStorage *storage = NULL; storage = Word_fromPlain(inStr,inPath,error); if (storage == NULL) goto end; ok = DFZip(outFilename,storage,error); DFStorageRelease(storage); return ok; end: free(outExtension); return ok; }
static int processIncludes(TextPackage *package, const char *input, DFBuffer *output, const char *path, DFError **error) { int ok = 1; const char **lines = DFStringSplit(input,"\n",0); for (int lineno = 0; lines[lineno] && ok; lineno++) { const char *line = lines[lineno]; if (DFStringHasPrefix(line,"#include \"") && DFStringHasSuffix(line,"\"")) { char *inclRelPath = DFSubstring(line,10,strlen(line)-1); char *inclAbsPath = DFAppendPathComponent(path,inclRelPath); char *inclDirName = DFPathDirName(inclAbsPath); char *inclContent = DFStringReadFromFile(inclAbsPath,error); if (inclContent == NULL) { DFErrorFormat(error,"%s: %s",inclRelPath,DFErrorMessage(error)); ok = 0; } else if (!processIncludes(package,inclContent,output,inclDirName,error)) { ok = 0; } free(inclRelPath); free(inclAbsPath); free(inclDirName); free(inclContent); } else { DFBufferFormat(output,"%s\n",line); } } free(lines); return ok; }
static int prettyPrintODFFile(const char *filename, DFError **error) { int ok = 0; char *odf = NULL; DFStorage *storage = NULL; storage = DFStorageOpenZip(filename,error); if (storage == NULL) { DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); goto end; } /* odf = ODF_toPlain(storage,NULL); printf("%s",odt); */ printf("ODF file support has not been implemented yet.\n"); ok = 1; end: free(odf); DFStorageRelease(storage); return ok; }
static char *Word_toPlainFromDir(DFStorage *storage, DFHashTable *parts, DFError **error) { char *documentPath = NULL; DFHashTable *rels = DFHashTableNew((DFCopyFunction)xstrdup,(DFFreeFunction)free); DFBuffer *output = DFBufferNew(); char *relsPathRel = NULL; DFDocument *relsDoc = NULL; int ok = 0; documentPath = findDocumentPath(storage,error); if (documentPath == NULL) { DFErrorFormat(error,"findDocumentPath: %s",DFErrorMessage(error)); goto end; } relsPathRel = computeDocumentRelsPath(documentPath); if (DFStorageExists(storage,relsPathRel) && ((relsDoc = DFParseXMLStorage(storage,relsPathRel,error)) == NULL)) { DFErrorFormat(error,"%s: %s",relsPathRel,DFErrorMessage(error)); goto end; } parseDocumentRels(documentPath,relsDoc,rels,error); if (!processParts(parts,documentPath,relsDoc,rels,output,storage,error)) goto end; ok = 1; end: free(relsPathRel); free(documentPath); DFHashTableRelease(rels); DFDocumentRelease(relsDoc); if (!ok) { DFBufferRelease(output); return NULL; } else { char *result = xstrdup(output->data); DFBufferRelease(output); return result; } }
static DFBuffer *readData(const char *filename, DFError **error) { if ((filename == NULL) || !strcmp(filename,"-")) filename = "/dev/stdin";; DFBuffer *buffer = DFBufferReadFromFile(filename,error); if (buffer == NULL) { DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); return NULL; } return buffer; }
int DFZip(const char *zipFilename, DFStorage *storage, DFError **error) { const char **allPaths = NULL; DFBuffer *content = NULL; int ok = 0; DFextZipHandleP zipHandle = NULL; allPaths = DFStorageList(storage,error); if (allPaths == NULL || !(zipHandle = DFextZipCreate(zipFilename))) { DFErrorFormat(error,"Cannot create file"); } else { for (int i = 0; allPaths[i]; i++) { const char *path = allPaths[i]; DFBufferRelease(content); content = DFBufferReadFromStorage(storage,path,error); if (content == NULL) { DFErrorFormat(error,"%s: %s",path,DFErrorMessage(error)); goto end; } if (!zipAddFile(zipHandle, path, content, error)) goto end; } ok = 1; } end: DFBufferRelease(content); free(allPaths); if (zipHandle != NULL) DFextZipClose(zipHandle); return ok; }
static int parsePackage(TextPackage *package, const char *string, const char *path, DFError **error) { DFBuffer *replaced = DFBufferNew(); if (!strcmp(path,"")) path = "."; if (!processIncludes(package,string,replaced,path,error)) { DFBufferRelease(replaced); return 0; } char *currentKey = strdup(""); DFBuffer *currentValue = DFBufferNew(); const char **lines = DFStringSplit(replaced->data,"\n",0); for (int lineno = 0; lines[lineno]; lineno++) { const char *line = lines[lineno]; if (!DFStringHasPrefix(line,"#")) { DFBufferFormat(currentValue,"%s\n",line); } else if (DFStringHasPrefix(line,"#item ")) { package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *)); package->keys[package->nkeys++] = strdup(currentKey); package->keys[package->nkeys] = NULL; DFHashTableAdd(package->items,currentKey,currentValue->data); free(currentKey); DFBufferRelease(currentValue); currentKey = DFSubstring(line,6,strlen(line)); currentValue = DFBufferNew(); } else if (DFStringHasPrefix(line,"##")) { DFBufferFormat(currentValue,"%s\n",&line[1]); } else { DFErrorFormat(error,"Unknown command: %s on line %d",line,(lineno+1)); return 0; } } package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *)); package->keys[package->nkeys++] = strdup(currentKey); package->keys[package->nkeys] = NULL; DFHashTableAdd(package->items,currentKey,currentValue->data); free(lines); free(currentKey); DFBufferRelease(currentValue); DFBufferRelease(replaced); return 1; }
static int writeData(DFBuffer *buf, const char *filename, DFError **error) { if ((filename == NULL) || !strcmp(filename,"-")) { fwrite(buf->data,buf->len,1,stdout); return 1; } else if (!DFBufferWriteToFile(buf,filename,error)) { DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); return 0; } else { return 1; } }
TextPackage *TextPackageNewWithFile(const char *filename, DFError **error) { char *contents = DFStringReadFromFile(filename,error); if (contents == NULL) { DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); return NULL; } char *path = DFPathDirName(filename); TextPackage *result = TextPackageNewWithString(contents,path,error); free(path); free(contents); return result; }
int WordConverterConvertToHTML(WordConverter *converter, DFError **error) { converter->haveFields = Word_simplifyFields(converter->package); Word_mergeRuns(converter->package); if (converter->package->document == NULL) { DFErrorFormat(error,"document.xml not found"); return 0; } DFNode *wordDocument = DFChildWithTag(converter->package->document->docNode,WORD_DOCUMENT); if (wordDocument == NULL) { DFErrorFormat(error,"word:document not found"); return 0; } WordAddNbsps(converter->package->document); WordFixLists(converter); CSSSheetRelease(converter->styleSheet); converter->styleSheet = WordParseStyles(converter); WordObjectsCollapseBookmarks(converter->objects); WordObjectsScan(converter->objects); WordObjectsAnalyzeBookmarks(converter->objects,converter->styles); WordGetData get; get.conv = converter; DFNode *abstract = WordDocumentLens.get(&get,wordDocument); DFAppendChild(converter->html->docNode,abstract); Word_postProcessHTMLDoc(converter); HTMLAddExternalStyleSheet(converter->html,"reset.css"); char *cssText = CSSSheetCopyCSSText(converter->styleSheet); HTMLAddInternalStyleSheet(converter->html,cssText); free(cssText); return 1; }
DFDocument *DFParseXMLString(const char *str, DFError **error) { DFSAXParser *parser = DFSAXParserNew(); DFSAXParserParse(parser,str,strlen(str)); if (parser->fatalErrors->len > 0) { DFErrorFormat(error,"%s",parser->fatalErrors->data); DFSAXParserFree(parser); return NULL; } else if (parser->errors->len > 0) { DFErrorFormat(error,"%s",parser->errors->data); DFSAXParserFree(parser); return NULL; } else if (parser->document->root == NULL) { DFErrorFormat(error,"No root element"); DFSAXParserFree(parser); return NULL; } DFDocument *result = DFDocumentRetain(parser->document); DFSAXParserFree(parser); return result; }
int textPackageGet(const char *filename, const char *itemPath, DFError **error) { char *value = DFStringReadFromFile(filename,error); if (value == NULL) { DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); return 0; } const char **components = DFStringSplit(itemPath,"/",0); for (size_t i = 0; components[i]; i++) { const char *name = components[i]; char *filePath = DFPathDirName(filename); TextPackage *package = TextPackageNewWithString(value,filePath,error); free(filePath); if (package == NULL) { free(value); free(components); return 0; } free(value); value = xstrdup(DFHashTableLookup(package->items,name)); if (value == NULL) { DFErrorFormat(error,"%s: Item %s not found",filename,itemPath); TextPackageRelease(package); free(value); free(components); return 0; } TextPackageRelease(package); } free(components); printf("%s",value); free(value); return 1; }
int diffFiles(const char *filename1, const char *filename2, DFError **error) { DFDocument *doc1 = DFParseHTMLFile(filename1,0,error); if (doc1 == NULL) { DFErrorFormat(error,"%s: %s",filename1,DFErrorMessage(error)); return 0; } DFDocument *doc2 = DFParseHTMLFile(filename1,0,error); if (doc2 == NULL) { DFErrorFormat(error,"%s: %s",filename2,DFErrorMessage(error)); DFDocumentRelease(doc1); return 0; } DFComputeChanges(doc1->root,doc2->root,HTML_ID); char *changesStr = DFChangesToString(doc1->root); printf("%s",changesStr); free(changesStr); DFDocumentRelease(doc1); DFDocumentRelease(doc2); return 1; }
static int saveXMLDocument(DFStorage *storage, const char *filename, DFDocument *doc, NamespaceID defaultNS, DFError **error) { char *parentPath = DFPathDirName(filename); int ok = 0; if (!DFSerializeXMLStorage(doc,defaultNS,0,storage,filename,error)) { DFErrorFormat(error,"serialize %s: %s",filename,DFErrorMessage(error)); goto end; } ok = 1; end: free(parentPath); return ok; }
int normalizeFile(const char *filename, DFError **error) { DFDocument *doc = DFParseHTMLFile(filename,0,error); if (doc == NULL) { DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); return 0; } HTML_normalizeDocument(doc); HTML_safeIndent(doc->docNode,0); char *str = DFSerializeXMLString(doc,0,0); printf("%s",str); free(str); DFDocumentRelease(doc); return 1; }
int prettyPrintFile(const char *filename, DFError **error) { int ok; char *extension = DFPathExtension(filename); if (DFStringEqualsCI(extension,"xml")) ok = prettyPrintXMLFile(filename,0,error); else if (DFStringEqualsCI(extension,"html") || DFStringEqualsCI(extension,"htm")) ok = prettyPrintXMLFile(filename,1,error); else if (DFStringEqualsCI(extension,"docx")) ok = prettyPrintWordFile(filename,error); else if (DFStringEqualsCI(extension,"odt")) ok = prettyPrintODFFile(filename,error); else { DFErrorFormat(error,"Unknown file type"); ok = 0; } free(extension); return ok; }
static int addRelatedDoc(DFHashTable *parts, DFHashTable *documentRels, const char *relName, const char *filename, DFBuffer *output, DFHashTable *includeTypes, DFStorage *storage, DFError **error) { const char *relPath = DFHashTableLookup(documentRels,relName); if (relPath == NULL) return 1;; DFDocument *doc = DFParseXMLStorage(storage,relPath,error); if (doc == NULL) { DFErrorFormat(error,"%s: %s",relPath,DFErrorMessage(error)); return 0; } if (doc->root->first != NULL) { addStrippedSerializedDoc(output,doc,filename); DFHashTableAdd(includeTypes,relName,""); } DFDocumentRelease(doc); return 1; }
int testCSS(const char *filename, DFError **error) { char *input = DFStringReadFromFile(filename,error); if (input == NULL) { DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); return 0; } CSSSheet *styleSheet = CSSSheetNew(); CSSSheetUpdateFromCSSText(styleSheet,input); char *text = CSSSheetCopyText(styleSheet); printf("%s",text); free(text); printf("================================================================================\n"); char *cssText = CSSSheetCopyCSSText(styleSheet); printf("%s",cssText); free(cssText); CSSSheetRelease(styleSheet); free(input); return 1; }
static int prettyPrintWordFile(const char *filename, DFError **error) { int ok = 0; char *plain = NULL; DFStorage *storage = NULL; storage = DFStorageOpenZip(filename,error); if (storage == NULL) { DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error)); goto end; } plain = Word_toPlain(storage,NULL); printf("%s",plain); ok = 1; end: free(plain); DFStorageRelease(storage); return ok; }
static char *findDocumentPath(DFStorage *storage, DFError **error) { int ok = 0; DFDocument *relsDoc = NULL; char *result = NULL; relsDoc = DFParseXMLStorage(storage,"/_rels/.rels",error); if (relsDoc == NULL) { DFErrorFormat(error,"_rels/.rels: %s",DFErrorMessage(error)); goto end; } for (DFNode *child = relsDoc->root->first; child != NULL; child = child->next) { if (child->tag != REL_RELATIONSHIP) continue; const char *type = DFGetAttribute(child,NULL_Type); const char *target = DFGetAttribute(child,NULL_TARGET); if ((type == NULL) || (target == NULL)) continue; if (strcmp(type,"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")) continue; result = xstrdup(target); ok = 1; break; } end: DFDocumentRelease(relsDoc); if (ok) return result; free(result); return NULL; }
static int processParts(DFHashTable *parts, const char *documentPath, DFDocument *relsDoc, DFHashTable *documentRels, DFBuffer *output, DFStorage *storage, DFError **error) { int ok = 0; DFHashTable *includeTypes = DFHashTableNew((DFCopyFunction)xstrdup,free); DFHashTableAdd(includeTypes,WORDREL_HYPERLINK,""); DFHashTableAdd(includeTypes,WORDREL_IMAGE,""); if ((parts == NULL) || (DFHashTableLookup(parts,"document") != NULL)) { DFDocument *doc = DFParseXMLStorage(storage,documentPath,error); if (doc == NULL) goto end; addStrippedSerializedDoc(output,doc,"document.xml"); DFDocumentRelease(doc); } if ((parts == NULL) || (DFHashTableLookup(parts,"styles") != NULL)) { if (!addRelatedDoc(parts,documentRels,WORDREL_STYLES,"styles.xml",output,includeTypes,storage,error)) goto end; } if ((parts == NULL) || (DFHashTableLookup(parts,"numbering") != NULL)) { if (!addRelatedDoc(parts,documentRels,WORDREL_NUMBERING,"numbering.xml",output,includeTypes,storage,error)) goto end; } if ((parts == NULL) || (DFHashTableLookup(parts,"footnotes") != NULL)) { if (!addRelatedDoc(parts,documentRels,WORDREL_FOOTNOTES,"footnotes.xml",output,includeTypes,storage,error)) goto end; } if ((parts == NULL) || (DFHashTableLookup(parts,"endnotes") != NULL)) { if (!addRelatedDoc(parts,documentRels,WORDREL_ENDNOTES,"endnotes.xml",output,includeTypes,storage,error)) goto end; } if ((parts != NULL) && (DFHashTableLookup(parts,"settings") != NULL)) { if (!addRelatedDoc(parts,documentRels,WORDREL_SETTINGS,"settings.xml",output,includeTypes,storage,error)) goto end; } if ((parts != NULL) && (DFHashTableLookup(parts,"theme") != NULL)) { if (!addRelatedDoc(parts,documentRels,WORDREL_THEME,"theme.xml",output,includeTypes,storage,error)) goto end; } if ((DFHashTableLookup(documentRels,WORDREL_HYPERLINK) != NULL) || (DFHashTableLookup(documentRels,WORDREL_IMAGE) != NULL) || ((parts != NULL) && (DFHashTableLookup(parts,"documentRels") != NULL))) { if (relsDoc == NULL) { DFErrorFormat(error,"document.xml.rels does not exist"); goto end; } DFNode *next; for (DFNode *child = relsDoc->root->first; child != NULL; child = next) { next = child->next; if (child->tag != REL_RELATIONSHIP) continue; const char *type = DFGetAttribute(child,NULL_Type); if ((type != NULL) && (DFHashTableLookup(includeTypes,type) == NULL)) { DFRemoveNode(child); } } addSerializedDoc(output,relsDoc,"document.xml.rels"); } const char **entries = DFStorageList(storage,NULL); if (entries != NULL) { // FIXME: Should really report an error if this is not the case for (int i = 0; entries[i]; i++) { const char *filename = entries[i]; char *extension = DFPathExtension(filename); if (DFStringEqualsCI(extension,"png") || DFStringEqualsCI(extension,"jpg")) { char *absFilename; if (!DFStringHasSuffix(filename,"/")) absFilename = DFFormatString("/%s",filename); else absFilename = xstrdup(filename); DFBuffer *data = DFBufferReadFromStorage(storage,absFilename,NULL); addSerializedBinary(output,data,absFilename); DFBufferRelease(data); free(absFilename); } free(extension); } } free(entries); DFHashTableRelease(includeTypes); ok = 1; end: return ok; }
int WordConverterUpdateFromHTML(WordConverter *converter, DFError **error) { if (converter->package->document == NULL) { DFErrorFormat(error,"document.xml not found"); return 0; } DFNode *wordDocument = DFChildWithTag(converter->package->document->docNode,WORD_DOCUMENT); if (wordDocument == NULL) { DFErrorFormat(error,"word:document not found"); return 0; } // FIXME: Need a more reliable way of telling whether this is a new document or not - it could be that the // document already existed (with styles set up) but did not have any content DFNode *wordBody = DFChildWithTag(wordDocument,WORD_BODY); int creating = ((wordBody == NULL) || (wordBody->first == NULL)); converter->haveFields = Word_simplifyFields(converter->package); Word_mergeRuns(converter->package); assert(converter->package->styles); CSSSheetRelease(converter->styleSheet); converter->styleSheet = CSSSheetNew(); char *cssText = HTMLCopyCSSText(converter->html); CSSSheetUpdateFromCSSText(converter->styleSheet,cssText); free(cssText); addMissingDefaultStyles(converter); CSSEnsureReferencedStylesPresent(converter->html,converter->styleSheet); if (creating) CSSSetHTMLDefaults(converter->styleSheet); CSSEnsureUnique(converter->styleSheet,converter->html,creating); CSSStyle *pageStyle = CSSSheetLookupElement(converter->styleSheet,"@page",NULL,0,0); CSSStyle *bodyStyle = CSSSheetLookupElement(converter->styleSheet,"body",NULL,1,0); CSSProperties *page = (pageStyle != NULL) ? CSSPropertiesRetain(CSSStyleRule(pageStyle)) : CSSPropertiesNew(); CSSProperties *body = (bodyStyle != NULL) ? CSSPropertiesRetain(CSSStyleRule(bodyStyle)) : CSSPropertiesNew(); if (CSSGet(body,"margin-left") == NULL) CSSPut(body,"margin-left","10%"); if (CSSGet(body,"margin-right") == NULL) CSSPut(body,"margin-right","10%"); if (CSSGet(body,"margin-top") == NULL) CSSPut(body,"margin-top","10%"); if (CSSGet(body,"margin-bottom") == NULL) CSSPut(body,"margin-bottom","10%"); WordSectionUpdateFromCSSPage(converter->mainSection,page,body); WordPutData put; put.conv = converter; put.numIdByHtmlId = DFHashTableNew((DFCopyFunction)strdup,free); put.htmlIdByNumId = DFHashTableNew((DFCopyFunction)strdup,free); // Make sure we update styles.xml from the CSS stylesheet *before* doing any conversion of the content, // since the latter requires a full mapping of CSS selectors to styleIds to be in place. WordUpdateStyles(converter,converter->styleSheet); Word_preProcessHTMLDoc(converter,converter->html); buildListMapFromHTML(&put,converter->html->docNode); updateListTypes(&put); WordBookmarks_removeCaptionBookmarks(converter->package->document); WordObjectsCollapseBookmarks(converter->objects); WordObjectsScan(converter->objects); Word_setupBookmarkLinks(&put); WordObjectsAnalyzeBookmarks(converter->objects,converter->styles); WordDocumentLens.put(&put,converter->html->root,wordDocument); WordObjectsExpandBookmarks(converter->objects); WordRemoveNbsps(converter->package->document); // Make sure the updateFields flag is set Word_updateSettings(converter->package,converter->haveFields); // Remove any abstract numbering definitions that are no longer referenced from concrete // numbering definitions WordNumberingRemoveUnusedAbstractNums(converter->numbering); // Remove any relationships and images that have been removed from the HTML file and no longer // have any other references pointing to them WordGarbageCollect(converter->package); CSSPropertiesRelease(page); CSSPropertiesRelease(body); DFHashTableRelease(put.numIdByHtmlId); DFHashTableRelease(put.htmlIdByNumId); return 1; }