/* long GetDocUrlDistance (in nsIDOMDocument doc); */ NS_IMETHODIMP nsDomAttUtil::GetDocUrlDistance(nsIDOMDocument *doc, PRInt32 *_retval ) { nsresult rv; nsCOMPtr<nsIXpathUtil> xpathUtil=do_CreateInstance("@nyapc.com/XPCOM/nsXpathUtil;1",&rv); if (NS_FAILED(rv)) { return rv; } Distance ld; PRInt32 distValue=0; xpathUtil->SetDocument(doc); nsCOMPtr<nsISupportsArray> nodeArr(nsnull); nsCString xpath("//a[@href]"); xpathUtil->GetNodes(xpath, getter_AddRefs(nodeArr)); if(nodeArr!=nsnull) { PRUint32 arrlen=0; nodeArr->Count(&arrlen); std::string lastUrl; bool setinit=false; for(PRUint32 i=0;i<arrlen;i++) { nsCOMPtr<nsISupports> node; nodeArr->GetElementAt(i,getter_AddRefs(node)); nsCOMPtr<nsIDOMElement> domele=do_QueryInterface(node); nsString nsName=NS_ConvertUTF8toUTF16("href"); nsString nsValue; if(domele!=nsnull) { domele->GetAttribute(nsName,nsValue); if(!setinit) { lastUrl=std::string(NS_ConvertUTF16toUTF8(nsValue).get()); setinit=true; } else { std::string curUrl(NS_ConvertUTF16toUTF8(nsValue).get()); distValue+=ld.LD(lastUrl,curUrl); lastUrl=curUrl; } } } *_retval=distValue; } else { *_retval=-1; } return NS_OK; }
/* void GenVar (); */ NS_IMETHODIMP nsChlVarGenerator_1::GenVar() { //Get the target class PRInt32 len=0; m_model->GetClassCount(&len); double minValue=INT_MAX; Distance ld; std::list<int> classids; for(int i=0;i<len;i++) { nsCOMPtr<nsIAttVec> vec(nsnull); m_model->GetClassAtt(i, getter_AddRefs(vec) ); nsCOMPtr<nsIDOMNode> node(nsnull); vec->GetNode(getter_AddRefs(node)); nsCOMPtr<nsIDOMDocument> doc; vec->GetDoc(getter_AddRefs(doc)); nsCOMPtr<nsIDOMHTMLDocument> html=do_QueryInterface(doc); nsString docurl; html->GetURL(docurl); nsCString url; dUtil->GetLinkStr(node,0,url); std::string urlStr(url.get()); std::string docurlStr(NS_ConvertUTF16toUTF8(docurl).get()); LOG<<"url:"<<urlStr<<"\n"; LOG<<"docurl:"<<docurlStr<<"\n"; double tmp=ld.LD(urlStr,docurlStr); LOG<<"START Value:"<<tmp<<"\n"; double attValue; vec->GetValueByDim(0,&attValue); tmp+=attValue; LOG<<i<<"Second value:"<<tmp<<" Plus:"<<attValue<<"\n"; if(attValue==0) continue; vec->GetValueByDim(1,&attValue); tmp-=log(attValue)/log(2); LOG<<i<<"Third value:"<<tmp<<" SUB:"<<attValue<<"\n"; LOG<<i<<" minValue:"<<minValue<<"\n"; if(minValue>tmp) { classids.clear(); minValue=tmp; classids.push_back(i); }else if(fabs(minValue-tmp)<0.1) { classids.push_back(i); } } LOG<<"classid size:"<<classids.size()<<":"<<minValue<<"\n"; if(!classids.empty()) { int MaxCount=INT_MIN; int classid; for(std::list<int>::iterator it=classids.begin();it!=classids.end();++it) { nsCOMPtr<nsIAttSet> cset(nsnull); m_model->GetClassSet(*it, getter_AddRefs(cset)); PRInt32 tmplen=0; cset->GetLength(&tmplen); if(tmplen>MaxCount) { MaxCount=tmplen; classid=*it; } } LOG<<"classid"<<classid<<"\n"; nsCOMPtr<nsIAttSet> cset(nsnull); m_model->GetClassSet(classid, getter_AddRefs(cset)); PRInt32 vlen=0; cset->GetLength(&vlen); int chose=-1; double minatt=INT_MAX; string attnames=m_parm["PATH_ATTS"]; LOG<<"--------------------names:"<<attnames<<"\n"; std::vector<std::string> nameVec; if(attnames.length()>0) split(attnames, ";",nameVec); std::set<std::string> xpaths; for(int i=0;i<vlen;i++) { nsCOMPtr<nsIAttVec> vec(nsnull); cset->GetVectorByIndex(i, getter_AddRefs(vec)); nsresult rv; nsCOMPtr<nsIXpathUtil> xpathUtil=do_CreateInstance("@nyapc.com/XPCOM/nsXpathUtil;1",&rv); if (NS_FAILED(rv)) { continue; } for( std::vector<std::string>::iterator it=nameVec.begin();it!=nameVec.end();++it) { LOG<<"-----------------"<<*it<<"\n"; xpathUtil->AddAttName(nsCString(it->c_str())); } nsCOMPtr<nsIDOMDocument> doc; nsCOMPtr<nsIDOMNode> node; vec->GetDoc(getter_AddRefs(doc)); vec->GetNode(getter_AddRefs(node)); xpathUtil->SetDocument(doc); nsCString xpath; xpathUtil->GetXpath(node, 1,xpath); xpaths.insert(std::string(xpath.get())); } std::string name("box"); std::string value; for(std::set<std::string>::iterator it=xpaths.begin();it!=xpaths.end();++it) { value+=*it; std::set<std::string>::iterator tit=it; ++tit; if(tit!=xpaths.end()) value+="|"; } m_vars.insert(make_pair(name,value)); } return NS_OK; }
int editDistance(const std::string& a, const std::string& b) { Distance dist; return dist.LD(a.c_str(), b.c_str()); }