Beispiel #1
0
/* long GetDocUrlDistance (in nsIDOMDocument doc); */
NS_IMETHODIMP nsDomAttUtil::GetDocUrlDistance(nsIDOMDocument *doc, PRInt32 *_retval )
{
     nsresult rv;
     nsCOMPtr<nsIXpathUtil> xpathUtil=do_CreateInstance("@nyapc.com/XPCOM/nsXpathUtil;1",&rv);
     if (NS_FAILED(rv))
     {
	  return rv;
     }
     Distance ld;
     PRInt32 distValue=0;
     xpathUtil->SetDocument(doc);
     nsCOMPtr<nsISupportsArray> nodeArr(nsnull);
     nsCString xpath("//a[@href]");
     xpathUtil->GetNodes(xpath, getter_AddRefs(nodeArr));
     if(nodeArr!=nsnull)
     {
	  PRUint32 arrlen=0;
	  nodeArr->Count(&arrlen);
	  std::string lastUrl;
	  bool setinit=false;

	  for(PRUint32 i=0;i<arrlen;i++)
	  {
	       nsCOMPtr<nsISupports> node;
	       nodeArr->GetElementAt(i,getter_AddRefs(node));

	       nsCOMPtr<nsIDOMElement> domele=do_QueryInterface(node);
	       nsString nsName=NS_ConvertUTF8toUTF16("href");
	       nsString nsValue;
	       if(domele!=nsnull)
	       {
		    domele->GetAttribute(nsName,nsValue);


		    if(!setinit)
		    {
			 lastUrl=std::string(NS_ConvertUTF16toUTF8(nsValue).get());
			 setinit=true;
		    }
		    else
		    {
			 std::string curUrl(NS_ConvertUTF16toUTF8(nsValue).get());

			 distValue+=ld.LD(lastUrl,curUrl);
			 lastUrl=curUrl;
		    }
	       }
	  }
	  *_retval=distValue;
     }
     else
     {
	  *_retval=-1;
     }
     return NS_OK;
}
/* void GenVar (); */
NS_IMETHODIMP nsChlVarGenerator_1::GenVar()
{
     //Get the target class
     PRInt32 len=0;
     m_model->GetClassCount(&len);
     double minValue=INT_MAX;
     Distance ld;
     std::list<int> classids;
     for(int i=0;i<len;i++)
     {
	  nsCOMPtr<nsIAttVec> vec(nsnull);
	  m_model->GetClassAtt(i, getter_AddRefs(vec) );
	  nsCOMPtr<nsIDOMNode> node(nsnull);
	  vec->GetNode(getter_AddRefs(node));
	  nsCOMPtr<nsIDOMDocument> doc;
	  vec->GetDoc(getter_AddRefs(doc));
	  nsCOMPtr<nsIDOMHTMLDocument> html=do_QueryInterface(doc);
	  nsString docurl;
	  html->GetURL(docurl);
	  
	  nsCString url;
	  dUtil->GetLinkStr(node,0,url);
	 
	std::string urlStr(url.get());
	std::string docurlStr(NS_ConvertUTF16toUTF8(docurl).get());
	LOG<<"url:"<<urlStr<<"\n";
	LOG<<"docurl:"<<docurlStr<<"\n";
	  double tmp=ld.LD(urlStr,docurlStr);
	  LOG<<"START Value:"<<tmp<<"\n";
	  double attValue;
	  vec->GetValueByDim(0,&attValue);
	  tmp+=attValue;

	  LOG<<i<<"Second value:"<<tmp<<" Plus:"<<attValue<<"\n";
	  if(attValue==0)
	       continue;
	  vec->GetValueByDim(1,&attValue);
	  tmp-=log(attValue)/log(2);

	  LOG<<i<<"Third value:"<<tmp<<" SUB:"<<attValue<<"\n";
	  LOG<<i<<" minValue:"<<minValue<<"\n";
	  if(minValue>tmp)
	  {
	       classids.clear();
	       minValue=tmp;
	       classids.push_back(i);
	  }else if(fabs(minValue-tmp)<0.1)
	  {
	       classids.push_back(i);
	  }
	  
     }
     
     LOG<<"classid size:"<<classids.size()<<":"<<minValue<<"\n";
     if(!classids.empty())
     {
	  int MaxCount=INT_MIN;
	  int classid;
	  for(std::list<int>::iterator it=classids.begin();it!=classids.end();++it)
	  {
	       nsCOMPtr<nsIAttSet> cset(nsnull);
	       m_model->GetClassSet(*it, getter_AddRefs(cset));
	       PRInt32 tmplen=0;
	       cset->GetLength(&tmplen);
	       if(tmplen>MaxCount)
	       {
		    MaxCount=tmplen;
		    classid=*it;
	       }
	  }
	  LOG<<"classid"<<classid<<"\n";
	  nsCOMPtr<nsIAttSet> cset(nsnull);
	  m_model->GetClassSet(classid, getter_AddRefs(cset));
	  PRInt32 vlen=0;
	  cset->GetLength(&vlen);
	  int chose=-1;
	  double minatt=INT_MAX;
	  
	  string attnames=m_parm["PATH_ATTS"];
	  LOG<<"--------------------names:"<<attnames<<"\n";
	  std::vector<std::string> nameVec;
	  if(attnames.length()>0)
	       split(attnames, ";",nameVec);
	  
	  std::set<std::string> xpaths;
	  for(int i=0;i<vlen;i++)
	  {
	       nsCOMPtr<nsIAttVec> vec(nsnull);
	       cset->GetVectorByIndex(i, getter_AddRefs(vec));
	       nsresult rv;
	       nsCOMPtr<nsIXpathUtil> xpathUtil=do_CreateInstance("@nyapc.com/XPCOM/nsXpathUtil;1",&rv);
	       if (NS_FAILED(rv))
	       {
		    continue;
	       }
	       for( std::vector<std::string>::iterator it=nameVec.begin();it!=nameVec.end();++it)
	       {
		    LOG<<"-----------------"<<*it<<"\n";
		    xpathUtil->AddAttName(nsCString(it->c_str()));
	       }
	       nsCOMPtr<nsIDOMDocument> doc;
	       nsCOMPtr<nsIDOMNode> node;
	       vec->GetDoc(getter_AddRefs(doc));
	       vec->GetNode(getter_AddRefs(node));
	       xpathUtil->SetDocument(doc);
	       nsCString xpath;
	       xpathUtil->GetXpath(node, 1,xpath);
	       xpaths.insert(std::string(xpath.get()));
	  }
	  std::string name("box");
	  std::string value;
	  for(std::set<std::string>::iterator it=xpaths.begin();it!=xpaths.end();++it)
	  {
	       value+=*it;
	       std::set<std::string>::iterator tit=it;
	       ++tit;
	       if(tit!=xpaths.end())
		    value+="|";
	  }
		    
	  m_vars.insert(make_pair(name,value));
     }
     return NS_OK;
}
	int editDistance(const std::string& a, const std::string& b) {
		Distance dist;
		return dist.LD(a.c_str(), b.c_str());
	}