GP<DjVuFile> lt_XMLParser::Impl::get_file(const GURL &url,GUTF8String id) { GP<DjVuFile> dfile; GP<DjVuDocument> doc; GCriticalSectionLock lock(&xmlparser_lock); { GPosition pos=m_docs.contains(url.get_string()); if(pos) { doc=m_docs[pos]; }else { doc=DjVuDocument::create_wait(url); if(! doc->wait_for_complete_init()) { G_THROW(( ERR_MSG("XMLAnno.fail_init") "\t")+url.get_string() ); } m_docs[url.get_string()]=doc; } if(id.is_int()) { const int xpage=id.toInt(); //atoi((char const *)page); if(xpage>0) id=doc->page_to_id(xpage-1); }else if(!id.length()) { id=doc->page_to_id(0); } } const GURL fileurl(doc->id_to_url(id)); GPosition dpos(m_files.contains(fileurl.get_string())); if(!dpos) { if(!doc->get_id_list().contains(id)) { G_THROW( ERR_MSG("XMLAnno.bad_page") ); } dfile=doc->get_djvu_file(id,false); if(!dfile) { G_THROW( ERR_MSG("XMLAnno.bad_page") ); } m_files[fileurl.get_string()]=dfile; }else { dfile=m_files[dpos]; } return dfile; }
void lt_XMLParser::Impl::parse(const lt_XMLTags &tags, GURL *pdjvufile) { const GPList<lt_XMLTags> Body(tags.get_Tags(bodytag)); GPosition pos=Body; if(!pos || (pos != Body.lastpos())) { G_THROW( ERR_MSG("XMLAnno.extra_body") ); } const GP<lt_XMLTags> GBody(Body[pos]); if(!GBody) { G_THROW( ERR_MSG("XMLAnno.no_body") ); } GMap<GUTF8String,GP<lt_XMLTags> > Maps; lt_XMLTags::get_Maps(maptag,"name",Body,Maps); const GPList<lt_XMLTags> Objects(GBody->get_Tags(objecttag)); lt_XMLTags::get_Maps(maptag,"name",Objects,Maps); for(GPosition Objpos=Objects;Objpos;++Objpos) { lt_XMLTags &GObject=*Objects[Objpos]; // Map of attributes to value (e.g. "width" --> "500") const GMap<GUTF8String,GUTF8String> &args=GObject.get_args(); GURL codebase; { DEBUG_MSG("Setting up codebase... m_codebase = " << m_codebase << "\n"); GPosition codebasePos=args.contains("codebase"); // If user specified a codebase attribute, assume it is correct (absolute URL): // the GURL constructor will throw an exception if it isn't if(codebasePos) { codebase=GURL::UTF8(args[codebasePos]); }else if (m_codebase.is_dir()) { codebase=m_codebase; }else { codebase=GURL::Filename::UTF8(GOS::cwd()); } DEBUG_MSG("codebase = " << codebase << "\n"); } // the data attribute specifies the input file. This can be // either an absolute URL (starts with file:/) or a relative // URL (for now, just a path and file name). If it's absolute, // our GURL will adequately wrap it. If it's relative, we need // to use the codebase attribute to form an absolute URL first. GPosition datapos=args.contains("data"); if(datapos) { bool isDjVuType=false; GPosition typePos(args.contains("type")); if(typePos) { if(args[typePos] != mimetype) { // DjVuPrintErrorUTF8("Ignoring %s Object tag\n",mimetype); continue; } isDjVuType=true; } const GURL url = (pdjvufile) ? *pdjvufile : GURL::UTF8(args[datapos], (args[datapos][0] == '/') ? codebase.base() : codebase); int width; { GPosition widthPos=args.contains("width"); width=(widthPos)?args[widthPos].toInt():0; } int height; { GPosition heightPos=args.contains("height"); height=(heightPos)?args[heightPos].toInt():0; } GUTF8String gamma; GUTF8String dpi; GUTF8String page; GUTF8String do_ocr; { GPosition paramPos(GObject.contains(paramtag)); if(paramPos) { const GPList<lt_XMLTags> Params(GObject[paramPos]); for(GPosition loc=Params;loc;++loc) { const GMap<GUTF8String,GUTF8String> &pargs=Params[loc]->get_args(); GPosition namepos=pargs.contains("name"); if(namepos) { GPosition valuepos=pargs.contains("value"); if(valuepos) { const GUTF8String name=pargs[namepos].downcase(); const GUTF8String &value=pargs[valuepos]; if(name == "flags") { GMap<GUTF8String,GUTF8String> args; lt_XMLTags::ParseValues(value,args,true); if(args.contains("page")) { page=args["page"]; } if(args.contains("dpi")) { dpi=args["dpi"]; } if(args.contains("gamma")) { gamma=args["gamma"]; } if(args.contains("ocr")) { do_ocr=args["ocr"]; } }else if(name == "page") { page=value; }else if(name == "dpi") { dpi=value; }else if(name == "gamma") { gamma=value; }else if(name == "ocr") { do_ocr=value; } } } } } } const GP<DjVuFile> dfile(get_file(url,page)); if(dpi.is_int() || gamma.is_float()) { int pos=0; ChangeInfo(*dfile,dpi.toInt(),gamma.toDouble(pos,pos)); } parse_anno(width,height,GObject,Maps,*dfile); parse_meta(GObject,*dfile); parse_text(width,height,GObject,*dfile); ChangeTextOCR(do_ocr,width,height,dfile); } } }