Exemplo n.º 1
0
double MultiProbeLshModel::recall (double x) const
{
    double x2 = W_ / x;
    double p = col_helper(x2);


    unsigned MT =  __probeSequenceTemplates[M_].size();
    if (MT > T_) MT = T_;
    
    double result = 0;
    for (unsigned i = 0; i < MT; i++)
    {
        double r = 1.0;
        for (unsigned j = 0; j < M_; j++)
        {
            Probe &probe = __probeSequenceTemplates[M_][i];
            if (probe.mask & leftshift(j))
            {
                double delta = (j + 1.0) / (M_ + 1.0) * 0.5; // expected value
                if (probe.shift & leftshift(j))
                {
                    r *= p_col_helper(x2, 1.0 - delta);
                }
                else
                {
                    r *= p_col_helper(x2, delta);
                }
            }
            else r *= p;
        }
        result += r;
    }
    return 1.0 - std::exp(std::log(1.0 - result) * L_);
}
Exemplo n.º 2
0
    void MultiProbeLsh::genProbeSequence (Domain obj, std::vector<unsigned>
            &seq, unsigned T) const
    {
        ProbeSequence scores;
        std::vector<unsigned> base;
        scores.resize(2 * lsh_.size());
        base.resize(lsh_.size());
        for (unsigned i = 0; i < lsh_.size(); ++i)
        {
            float delta;
            base[i] = Super::lsh_[i](obj, &delta);
            scores[2*i].mask = i;
            scores[2*i].reserve = 1;    // direction
            scores[2*i].score = delta;
            scores[2*i+1].mask = i;
            scores[2*i+1].reserve = unsigned(-1);
            scores[2*i+1].score = 1.0 - delta;
        }
        std::sort(scores.begin(), scores.end());

        const ProbeSequence &tmpl = (*ProbeSequenceTemplates::get())[lsh_.size()];

        seq.clear();
        for (ProbeSequence::const_iterator it = tmpl.begin();
                it != tmpl.end(); ++it)
        {
            if (seq.size() == T) break;
            const Probe &probe = *it;
            unsigned hash = 0;
            for (unsigned i = 0; i < lsh_.size(); ++i)
            {
                unsigned h = base[scores[i].mask];
                if (probe.mask & leftshift(i))
                {
                    if (probe.shift & leftshift(i))
                    {
                        h += scores[i].reserve;
                    }
                    else
                    {
                        h += unsigned(-1) * scores[i].reserve;
                    }
                }
                hash += h * a_[scores[i].mask];
            }
            seq.push_back(hash % H_);
        }
    }
Exemplo n.º 3
0
/*从字符串中抽取所有的超链接,移除左侧包含所有超链接的最短子串,返回剩余子串的长度*/
int extractLink(char *buf, char *domain)
{
	const char *regex = "href=\"[^ >]*\"";
	regex_t preg;
	regmatch_t pm[MAXMATCH];
	int nmatch = MAXMATCH;
	char tmp[MAX_LINK_LEN];
	
	if (regcomp(&preg, regex, REG_EXTENDED|REG_ICASE) != 0) {	/*编译正则表达式失败 */
		debug_printf("%s %d init regex failed \n",__func__,__LINE__);
		return leftshift(buf);
	}
	int z, i;
	z = regexec(&preg, buf, nmatch, pm, 0);
	regfree(&preg);
	if (z == REG_NOMATCH) {	/*无匹配项 */
		return leftshift(buf);
	} else {		/*有匹配的超链接 */
		for (i = 0; i < nmatch && pm[i].rm_so != -1; ++i) {	/*把超链接都提取出来 */
			int bpos = pm[i].rm_so + 6;
			int epos = pm[i].rm_eo - 2;
			int len = epos - bpos + 1;
			
			strncpy(tmp, buf + bpos, len);
			tmp[len] = '\0';
			debug_printf("%s %d original link:[%p]\n",__func__,__LINE__,tmp);
			Url *temp = calloc(1, sizeof(Url));
			if(!temp)
				continue;
			if(patchlink(tmp, domain,temp)) {
				free(temp);
				continue;
			}
			debug_printf("%s %d whole link:[%p]\n",__func__,__LINE__,temp->str);
			enqueue(temp);
		}
		
		return leftshift(buf + pm[nmatch - 1].rm_eo);
	}
}
Exemplo n.º 4
0
 void GenExpectScores (ProbeSequence &seq, unsigned M)
 {
     assert(M <= sizeof(seq[0].mask)* 8);
     seq.resize(2 * M);
     for (unsigned l = 0; l < M; ++l)
     {
         unsigned r = 2 * M - l - 1;
         seq[l].mask = seq[r].mask = seq[r].shift = leftshift(l);
         seq[l].shift = 0;
         seq[l].reserve = seq[r].reserve = 0;
         float delta = (l + 1.0) / (M + 1.0) * 0.5;
         seq[l].score = (l + 1.0) * (l + 2.0) / (M + 1.0) / (M + 2.0) * 0.25;
         seq[r].score = 1.0 - 2.0 * delta + seq[l].score;
     }
 }
Exemplo n.º 5
0
void randomization()
{
    unsigned char ch;
    int  a=0,j,k,i,check,nc1;
    int nl;

    long int nblock;
    int ii,jj,ii1,jj1;


    for(a=1; a<=times; a++)
    {
        check=a%5;
        switch(check)
        {
        case 0 :
            cycling();
            upshift();
            rightshift();
            downshift();
            leftshift();
            downshift();
            rightshift();
            upshift();
            cycling();
            break;
        case 1 :
            leftshift();
            cycling();
            upshift();
            rightshift();
            downshift();
            rightshift();
            upshift();
            cycling();
            leftshift();
            break;
        case 2 :
            downshift();
            leftshift();
            cycling();
            upshift();
            rightshift();
            upshift();
            cycling();
            leftshift();
            downshift();
            break;
        case 3 :
            rightshift();
            downshift();
            leftshift();
            cycling();
            upshift();
            cycling();
            leftshift();
            downshift();
            rightshift();
            break;
        case 4 :
            upshift();
            rightshift();
            downshift();
            leftshift();
            cycling();
            leftshift();
            downshift();
            rightshift();
            upshift();
            break;
        }
    }
}