//-------------------------------------------------------------- bool ofxEditorSyntax::loadFile(const std::string& xmlFile) { std::string path = ofToDataPath(xmlFile); ofXml xml; if(!xml.load(path)) { ofLogError("ofxEditorSyntax") << "couldn't load \"" << ofFilePath::getFileName(xmlFile) << "\""; return false; } auto root = xml.getChild("syntax"); if(!root) { ofLogWarning("ofxEditorSyntax") << "root xml tag not \"syntax\", ignoring"; return false; } clear(); for(auto & child : root.getChildren()) { if(child.getName() == "lang") {setLang(child.getValue());} else if(child.getName() == "files") { for(auto & file : child.getChildren()) { if(file.getName() == "ext") {addFileExt(file.getValue());} else { ofLogWarning("ofxEditorSyntax") << "ignoring unknown files xml tag \"" << file.getName() << "\""; } } } else if(child.getName() == "singlecomment") {singleLineComment = string_to_wstring(child.getValue());} else if(child.getName() == "multicomment") { auto begin = child.getChild("begin"); auto end = child.getChild("end"); if(begin) {multiLineCommentBegin = string_to_wstring(begin.getValue());} if(end) {multiLineCommentBegin = string_to_wstring(end.getValue());} } else if(child.getName() == "stringliteral") { auto begin = child.getChild("begin"); auto end = child.getChild("end"); if(begin) {stringLiteralBegin = string_to_wstring(begin.getValue());} if(end) {stringLiteralEnd = string_to_wstring(end.getValue());} } else if(child.getName() == "preprocessor") {preprocessor = string_to_wstring(child.getValue());} else if(child.getName() == "hexliteral") { std::string b = child.getValue(); if(b == "true") {setHexLiteral(true);} else if(b == "false") {setHexLiteral(false);} else { ofLogWarning("ofxEditorSyntax") << "ignoring unknown xml bool string \"" << b << "\""; } } else if(child.getName() == "operator") {operatorChars = string_to_wstring(child.getValue());} else if(child.getName() == "punctuation") {punctuationChars = string_to_wstring(child.getValue());} else if(child.getName() == "words") { for(auto &word : child.getChildren()) { if(word.getName() == "keyword") {setWord(word.getValue(), KEYWORD);} else if(word.getName() == "typename") {setWord(word.getValue(), TYPENAME);} else if(word.getName() == "function") {setWord(word.getValue(), FUNCTION);} else { ofLogWarning("ofxEditorSyntax") << "ignoring unknown words xml tag \"" << word.getName() << "\""; } } } else { ofLogWarning("ofxEditorSyntax") << "ignoring unknown xml tag \"" << child.getName() << "\""; } } return true; }
//-------------------------------------------------------------- bool ofxEditorSyntax::loadFile(const string& xmlFile) { string path = ofToDataPath(xmlFile); ofXml xml; if(!xml.load(path)) { ofLogError("ofxEditorSyntax") << "couldn't load \"" << ofFilePath::getFileName(xmlFile) << "\""; return false; } xml.setToParent(); if(!xml.exists("syntax")) { ofLogWarning("ofxEditorSyntax") << "root xml tag not \"syntax\", ignoring"; return false; } xml.setTo("syntax"); int numTags = xml.getNumChildren(); clear(); for(int i = 0; i < numTags; ++i) { xml.setToChild(i); if(xml.getName() == "lang") {setLang(xml.getValue());} else if(xml.getName() == "files") { int numExts = xml.getNumChildren(); for(int e = 0; e < numExts; ++e) { xml.setToChild(e); if(xml.getName() == "ext") {addFileExt(xml.getValue());} else { ofLogWarning("ofxEditorSyntax") << "ignoring unknown files xml tag \"" << xml.getName() << "\""; } xml.setToParent(); } } else if(xml.getName() == "singlecomment") {singleLineComment = string_to_wstring(xml.getValue());} else if(xml.getName() == "multicomment") { if(xml.exists("begin")) {multiLineCommentBegin = string_to_wstring(xml.getValue("begin"));} if(xml.exists("end")) {multiLineCommentBegin = string_to_wstring(xml.getValue("end"));} } else if(xml.getName() == "preprocessor") {preprocessor = string_to_wstring(xml.getValue());} else if(xml.getName() == "hexliteral") { string b = xml.getValue(); if(b == "true") {setHexLiteral(true);} else if(b == "false") {setHexLiteral(false);} else { ofLogWarning("ofxEditorSyntax") << "ignoring unknown xml bool string \"" << b << "\""; } } else if(xml.getName() == "operator") {operatorChars = string_to_wstring(xml.getValue());} else if(xml.getName() == "punctuation") {punctuationChars = string_to_wstring(xml.getValue());} else if(xml.getName() == "words") { int numWords = xml.getNumChildren(); for(int w = 0; w < numWords; ++w) { xml.setToChild(w); if(xml.getName() == "keyword") {setWord(xml.getValue(), KEYWORD);} else if(xml.getName() == "typename") {setWord(xml.getValue(), TYPENAME);} else if(xml.getName() == "function") {setWord(xml.getValue(), FUNCTION);} else { ofLogWarning("ofxEditorSyntax") << "ignoring unknown words xml tag \"" << xml.getName() << "\""; } xml.setToParent(); } } else { ofLogWarning("ofxEditorSyntax") << "ignoring unknown xml tag \"" << xml.getName() << "\""; } xml.setToParent(); } xml.clear(); return true; }
//-------------------------------------------------------------- void ofxEditorSyntax::addFileExt(const std::vector<std::string> &exts) { for(int i = 0; i < exts.size(); ++i) { addFileExt(exts[i]); } }
char *uploadfile(char *filename, char *lookForSimilar, char *lang) { #ifndef CAN_MAGIC o_log(ERROR, "Unable to determin the file type, aborting."); return NULL; #else int width = 0, height = 0, itype = PLACE_HOLDER; char *final_filename, *ocrText = NULL, *tmp; #ifdef CAN_PDF char *thumbext = NULL; #else #ifdef CAN_READODF char *thumbext = NULL; #endif /* CAN_READODF */ #endif /* CAN_PDF */ char *docid; char *ftype; char *datafile; char *thumbfile = NULL; PIX *pix; datafile = o_printf("/tmp/%s.dat", filename); magic_t cookie = magic_open(MAGIC_MIME_TYPE); magic_load( cookie, NULL ); const char *t = magic_file( cookie, datafile ); ftype = o_strdup( t ); o_log( ERROR, "Uploaded file looks to be of type: %s", ftype ); magic_close( cookie ); // -------------------------------------- if( 0 == strcmp("application/pdf", ftype) ) { itype = PDF_FILETYPE; #ifdef CAN_PDF thumbfile = o_printf("/tmp/%s.thumb", filename); ocrText = parse_pdf( datafile, thumbfile ); // pdf_plug.cc [create thumbnail and return body text] thumbext = o_strdup("jpg"); #endif /* CAN_PDF */ o_log( INFORMATION, "Processed PDF"); } // -------------------------------------- else if( 0 == strcmp("application/vnd.oasis.opendocument.text", ftype) ) { itype = ODF_FILETYPE; #ifdef CAN_READODF thumbfile = o_printf("/tmp/%s.thumb", filename); get_odf_Thumb( datafile, thumbfile ); ocrText = get_odf_Text( datafile ); // odf_plug.c thumbext = o_strdup("png"); #endif /* CAN_READODF */ o_log( INFORMATION, "Processed ODF doc"); } // -------------------------------------- else if( 0 == strcmp("image/jpeg", ftype) ) { itype = JPG_FILETYPE; #ifdef CAN_OCR PIX *pix_l; if ( ( pix_l = pixRead( datafile ) ) == NULL) { o_log(ERROR, "Could not load the image data into a PIX"); return NULL; } int depth; pixGetDimensions( pix_l, &width, &height, &depth ); o_log(INFORMATION, "Convertion process: Loaded (depth: %d)", depth ); pix = pixScaleRGBToGrayFast( pix_l, 1, COLOR_GREEN ); pixDestroy( &pix_l ); if (pix == NULL ) { o_log(ERROR,"Conversion process failed pixScaleRGBToGrayFast! skip ocr"); } else { o_log(INFORMATION, "Convertion process: Reduced depth to %d", pixGetDepth(pix)); ocrText = getTextFromImage(pix, 0, "eng"); } #endif /* CAN_OCR */ o_log( INFORMATION, "Processed JPG doc"); } // -------------------------------------- else { free( ftype ); free( datafile ); o_log(ERROR, "unknown file type."); return NULL; } free( ftype ); // Set a default OCR text string if( ocrText == NULL ) { ocrText = o_strdup( getString("LOCAL_ocr_default_text", lang ) ); } // Save the record to the DB o_log(DEBUGM, "Saving doc import record"); docid = addNewFileDoc(itype, width, height, ocrText); // ocrText get freed in this method // Move the main datafile to the file store location final_filename = o_printf("%s/scans/%s", BASE_DIR, docid); // none image imported docs, are stored with no "_x" postfix. if( itype == JPG_FILETYPE ) { conCat(&final_filename, "_1"); } addFileExt(&final_filename, itype); fcopy(datafile, final_filename); o_log( DEBUGM, "Moved data file"); // The original file will be unlinked by the HTTPD process free(datafile); // Move any thumbnail image to the file store location if( thumbfile ) { free(final_filename); // This currently holds the main PDG or ODF file. final_filename = o_printf("%s/scans/%s_thumb.%s", BASE_DIR, docid, thumbext); // any thumbnails are postfixed with "_thumb" fcopy(thumbfile, final_filename); o_log( DEBUGM, "Moved thumbnail file"); unlink(thumbfile); free(thumbfile); free(thumbext); #ifdef CAN_PHASH o_log( DEBUGM, "About to perform pHash on file"); unsigned long long hash = getImagePhash_fn( final_filename ); savePhash( atoi(docid), hash ); #endif /* CAN_PHASH */ } else { #ifdef CAN_PHASH o_log( DEBUGM, "About to perform pHash on pix"); unsigned long long hash = getImagePhash_px( pix ); savePhash( atoi(docid), hash ); #endif /* CAN_PHASH */ pixDestroy( &pix ); } free(final_filename); // Should we look for a similar doc, on opening? char *findSim = ""; #ifdef CAN_PHASH if( lookForSimilar != (void *)NULL ) { findSim = "&findSimilar=1"; } #endif /* CAN_PHASH */ // Open the document for editing. tmp = o_printf("<html><HEAD><META HTTP-EQUIV=\"refresh\" CONTENT=\"0;URL=/opendias/docDetail.html?docid=%s%s\"></HEAD><body></body></html>", docid, findSim); free(docid); return tmp; #endif /* CAN_MAGIC */ }