コード例 #1
0
ファイル: Analyzer.cpp プロジェクト: kalinochkind/vkbot
long long stem(const wstring &wrd)
{

    long long h = phash(wrd);
    if(stemmed.count(h))
    {
        return stemmed[h];
    }
    return stemmed[h] = phash(sstem(wrd));
}
コード例 #2
0
std::ostream& operator<<(std::ostream& os, const CImagePatch& patch)
{
	os << "Patch:\n";
	os << "\tFrame:\n\t\t" << patch.GetFrame() << std::endl;
	os << "\tBlur value:\n\t\t" << patch.GetBlurValue() << std::endl;
	os << "\tStandart deviation:\n\t\t" << patch.GetStandartDeviation() << std::endl;
	
	os << "\tGrey image:\n" << patch.GrayImage() << std::endl;
	os << "\tBin image:\n" << patch.BinImage() << std::endl;
	
	std::bitset<sizeof(uint64) * 8> phash(patch.GetPHash());
	os << "\tPHash:\n\t\t" << phash << std::endl;
	
	std::bitset<sizeof(uint64) * 8> avgHash(patch.GetAvgHash());
	os << "\tAvgHash:\n\t\t" << avgHash << std::endl;
	
	return os;
}
コード例 #3
0
ファイル: storage.c プロジェクト: vi/forsnapshotfs
void storage__append_block(struct storage__file* c, unsigned char* buf) {
    int i;
    unsigned char hash = phash(buf, c->block_size);
    
    for(i=c->depscount-1; i>=0; --i) {
        unsigned char hc = storage__get_block_hash(c->deps[i], c->current_block);
        if(hc==hash) {
            int ret = storage__read_block_nonrecursive(c->deps[i], c->outbuf, c->current_block);
            if(ret!=0){
                ++c->writestat_dblrefs;
                continue;
            }
            
            if(!memcmp(c->outbuf, buf, c->block_size)) {
                storage__append_block_dep(c, i+1, hash);
                ++c->writestat_reused;
                return;
            } else {
                ++c->writestat_hashcoll;
            }
        }
    }
    
    if(hash==0) {
        // maybe the entire block is zero?
        int j;
        for (j=c->block_size; j>=0; --j) {
            if(buf[j])break;   
        }
        if(j==-1) {
            // the block is zero
            storage__append_block_dep(c, -0x8000, 0);
            ++c->writestat_zero;
            return;
        }
    }

    storage__append_block_simple(c, buf, hash);
}
コード例 #4
0
ファイル: Analyzer.cpp プロジェクト: kalinochkind/vkbot
// <hash, <start, len> >
vector<pair<long long, pair<int, int> > > splitWords(const wstring &s, vector<pair<long long, long long> > &fixedstem, vector<pair<long long, long long> > &replaced, set<long long> &names)
{
    vector<pair<long long, pair<int, int> > > ans;
    wstring word;
    int prevKind = 0;  // 1 - letter, 2 - digit
    wstring S = s + L' ';
    for(int j=0; j<(int)S.size(); j++)
    {
        wchar_t i = towupper(S[j]);
        if(isLetter(i) && prevKind != 2)
        {
            word.push_back(i);
            prevKind = 1;
        }
        else if(isDigit(i) && prevKind != 1)
        {
            word.push_back('0');
            prevKind = 2;
        }
        else
        {
            if(word.length())
            {
                bool st = 1;
                long long pw = phash(word);
                for(auto &t : fixedstem)
                {
                    if(t.first == pw)
                    {
                        pw = t.second;
//                        wcerr << pw << L" proc\n";
                        st = 0;
                        break;
                    }
                }
                if(names.count(pw))
                {
                    ans.push_back({phname, {j-word.length(), word.length()}});
                }
                else
                {
                    long long std = 0;
                    if(st)
                    {
                        std = stem(word);
                        for(auto &t : replaced)
                        {
                            if(std == t.first)
                            {
                                std = t.second;
                                break;
                            }
                        }
                    }
                    ans.push_back({st ? std : pw, {j-word.length(), word.length()}});
                }
            }
            word.clear();
            prevKind = 0;
            if(isLetter(i))
            {
                word.push_back(i);
                prevKind = 1;
            }
            else if(isDigit(i))
            {
                word.push_back('0');
                prevKind = 2;
            }
        }
    }
    return ans;
}
コード例 #5
0
ファイル: Analyzer.cpp プロジェクト: kalinochkind/vkbot
{
    return i == L'а' || i == L'я' || i == L'о' || i == L'у' || i == L'ю' || i == L'и' || i == L'е';
}

long long phash(const wstring &s)
{
    long long ans = 0;
    for(auto i: s)
    {
        ans *= 1000000007LL;
        ans += i;
    }
    return ans;
}

long long phname = phash(L"firstname");

// <hash, <start, len> >
vector<pair<long long, pair<int, int> > > splitWords(const wstring &s, vector<pair<long long, long long> > &fixedstem, vector<pair<long long, long long> > &replaced, set<long long> &names)
{
    vector<pair<long long, pair<int, int> > > ans;
    wstring word;
    int prevKind = 0;  // 1 - letter, 2 - digit
    wstring S = s + L' ';
    for(int j=0; j<(int)S.size(); j++)
    {
        wchar_t i = towupper(S[j]);
        if(isLetter(i) && prevKind != 2)
        {
            word.push_back(i);
            prevKind = 1;
コード例 #6
0
ファイル: debug.c プロジェクト: vi/forsnapshotfs
int main(int argc, char* argv[]) {
    if(argc<2 || !strcmp(argv[1], "--help")) {
        fprintf(stderr, "Usage:\n"
            "    fsfs-debug print-index block_size blockgroup_size file.idx [bgstart [bgcount]]\n"
            "    fsfs-debug comp-stats blockgroup_size file.idx\n"
            "    fsfs-debug decompress-block file.dat offset compressed_size > output\n"
            "    fsfs-debug decompress-block2 < input > output\n"
            "    fsfs-debug compress-block < input > output\n"
            "    fsfs-debug compress-block2 < input > output\n"
            "    fsfs-debug calculate-hash < input\n"
            "    fsfs-debug get-length dir name\n"
            "    fsfs-debug read-one-block dir name blocknum\n"
        );
        return 1;   
    }
    if(!strcmp(argv[1], "print-index")) {
        int bgsize=1020;
        int block_size=4096;
        long long int bgstart = 0;
        long long int bgcount = -1;
        assert(argc>=5 && argc<=7);
        sscanf(argv[2], "%d", &block_size);
        sscanf(argv[3], "%d", &bgsize);
        const char* idxfile = argv[4];
        if(argc>=6) sscanf(argv[5], "%lld", &bgstart);
        if(argc>=7) sscanf(argv[6], "%lld", &bgcount);
        
        int bglen = bgsize * 2 + 8;
        
        FILE* idx = stdin;
        if(strcmp(argv[2], "-")) idx = fopen(idxfile, "rb");
        
        assert(idx!=NULL);
        
        signed short int q;
        long long int baseoffset;
        
        if(bgstart!=0) { 
            int ret = fseek(idx, bgstart*bglen, SEEK_SET); 
            if(ret) { perror("fseek"); return 2;}
        }
        
        while(bgcount!=0) {
            int ret = fread(&baseoffset, 1, 8, idx);
            if(ret==0)break;
            if(ret!=8) {
                printf("Trimmed index file\n");
                return 2;
            }
            baseoffset = be64toh(baseoffset);
            printf("Block group %lld, base offset: 0x%016llX\n", bgstart, baseoffset);
            
            int i;
            int accum = 0;
            for(i=0; i<bgsize; ++i) {
                int ret = fread(&q, 1, 2, idx);
                if(ret!=2) {
                    printf("Trimmed index file\n");
                    return 2;
                }
                q = be16toh(q);
                
                printf("block %lld: ", i + bgstart*bgsize);
                if(q==-0x8000) {
                    printf("zero\n");
                } else
                if(q==-0x7FFF) {
                    printf("uncompressed (%d bytes) at 0x%016llX\n", block_size, baseoffset+accum);
                    accum+=block_size;
                } else
                if(q==0) {
                    printf("unallocated\n");
                } else
                if(q>0 && q<0x4444) {
                    printf("compressed (%d bytes) at 0x%016llX\n", q, baseoffset+accum);
                    accum+=q;
                } else
                if (q<0 && q >= -64) {
                    printf("reference to %d's dependency\n", (-q)-1);
                } else {
                    printf("probably invalid (%04X)\n", q);
                }
            }
                
            fflush(stdout);
            --bgcount;   
            ++bgstart;
        }
        
        return 0;  
    } else
    if(!strcmp(argv[1], "comp-stats")) {
        int bgsize=1020;
        assert(argc==4);
        sscanf(argv[2], "%d", &bgsize);
        const char* idxfile = argv[3];
        
        FILE* idx = stdin;
        if(strcmp(argv[2], "-")) idx = fopen(idxfile, "rb");
        
        assert(idx!=NULL);
        
        long long int baseoffset;
        signed short int q;
        
        unsigned long long int *stats = (unsigned long long int*) malloc(8*32768);
        unsigned long long int zeroes = 0;
        unsigned long long int invals = 0;
        unsigned long long int refs[64];
        unsigned long long int total = 0;
        unsigned long long int uncompressibles = 0;
        memset(&refs, 0, sizeof(refs));
        memset(stats, 0, 8*32768);
        
        int i;
        int trailing_zero_counter=0;
        for(;;) {
            int ret = fread(&baseoffset, 1, 8, idx);
            if(ret!=8)break;
            baseoffset = be64toh(baseoffset);
            
            for(i=0; i<bgsize; ++i) {
                int ret = fread(&q, 1, 2, idx);
                if(ret!=2) return 2;
                q = be16toh(q);
                
                if(q>0) {
                    ++stats[q];
                }else
                if(q==-0x7FFF) {
                    ++uncompressibles;
                }else
                if(q==-0x8000) {
                    ++zeroes;
                }else
                if(q==0) {
                    ++trailing_zero_counter;
                }else
                if(q<0 && q>=-64) {
                    ++refs[(-q)-1];
                }else{
                    ++invals;
                }
                ++total;
            }
        }
        
        total-=trailing_zero_counter;
        
        long long int running = 0;
        
        printf("total: %lld (100%%) ; 0%% \n", total);
        
        running+=zeroes;
        if(zeroes>0)printf("zero: %lld (%g%%) ; %g%%\n",   zeroes, 
            100.0*zeroes/total, 100.0*running/total);
        
        for(i=0; i<64; ++i) {
            running+=refs[i];
            if(refs[i]>0)printf("refs[%d]: %lld (%g%%) ; %g%%\n",  i, refs[i], 
                    100.0*refs[i]/total, 100.0*running/total);
        }
        for(i=0; i<32768; ++i) {
            running+=stats[i];
            if(stats[i]>0)printf("compressed[%d]: %lld (%g%%) ; %g%%\n",  i, stats[i],
                    100.0*stats[i]/total, 100.0*running/total);
        }
        running+=uncompressibles;
        if(uncompressibles>0)printf("uncompressible: %lld (%g%%) ; %g%%\n",   uncompressibles, 
            100.0*uncompressibles/total, 100.0*running/total);
        
        running+=invals;
        if(invals>0)printf("invalid: %lld (%g%%) ; %g%%\n",   invals, 
                100.0*invals/total, 100.0*running/total);
        
        free(stats);
        return 0;
    } else
    if(!strcmp(argv[1], "decompress-block")) {
        assert(argc==5);
        
        const char* datfile = argv[2];
        long long int offset = 0;
        int size;
        
        sscanf(argv[3], "%lld", &offset);
        sscanf(argv[4], "%d", &size);
        
        FILE* dat = fopen(datfile, "rb");
        
        int ret = fseek(dat, offset, SEEK_SET);
        assert(ret==0);
        
        assert(size<65536);
        
        unsigned char chunk[65536];
        unsigned char chunk2[65536+2048];
        
        ret = fread(&chunk, 1, size, dat);
        assert(ret==size);
        fclose(dat);
        
        lzo_uint len = 65536+2048;
        lzo1x_decompress_safe(chunk, ret, chunk2, &len, NULL);
        
        fwrite(chunk2, 1, len, stdout);
        
        
        return 0;
    } else
    if(!strcmp(argv[1], "decompress-block2")) {
        unsigned char chunk[65536];
        unsigned char chunk2[65536+2048];
        
        int ret = fread(&chunk, 1, 65536, stdin);
        
        lzo_uint len = 65536+2048;
        lzo1x_decompress_safe(chunk, ret, chunk2, &len, NULL);
        
        fwrite(chunk2, 1, len, stdout);
        return 0;
    } else
    if(!strcmp(argv[1], "compress-block")) {
        unsigned char chunk[65536];
        unsigned char chunk2[65536+2048];
        int ret = fread(&chunk, 1, 65536, stdin);
        
        char tmp[LZO1X_1_MEM_COMPRESS];
        lzo_uint len = 65536+2048;
        lzo1x_1_compress(chunk, ret, chunk2, &len, &tmp);
        
        fwrite(chunk2, 1, len, stdout);
        
        return 0;
    } else
    if(!strcmp(argv[1], "compress-block2")) {
        unsigned char chunk[65536];
        unsigned char chunk2[65536+2048];
        int ret = fread(&chunk, 1, 65536, stdin);
        
        char tmp[LZO1X_999_MEM_COMPRESS];
        lzo_uint len = 65536+2048;
        lzo1x_999_compress(chunk, ret, chunk2, &len, &tmp);
        
        fwrite(chunk2, 1, len, stdout);
        
        return 0;
    } else
    if(!strcmp(argv[1], "calculate-hash")) {
        unsigned char chunk[65536];
        int ret = fread(&chunk, 1, 65536, stdin);
        unsigned char c = phash(chunk, ret);
        printf("%02x\n", c);
        return 0;
    } else
    if(!strcmp(argv[1], "get-length")) {
        assert(argc==4);
        const char* dirname = argv[2];
        const char* basename = argv[3];
        int block_len = storage__get_block_size2(dirname, basename);
        long long int number_of_blocks = storage__get_number_of_blocks2(dirname, basename);
        printf("%lld\n", number_of_blocks*block_len);
        return 0;
    } else
    if(!strcmp(argv[1], "read-one-block")) {
        assert(argc==5);
        const char* dirname = argv[2];
        const char* basename = argv[3];
        long long int blocknum;
        sscanf(argv[4], "%lld", &blocknum);
        struct storage__file* f = storage__open(dirname, basename);
        int block_len = storage__get_block_size(f);
        
        unsigned char buf[65536];
        storage__read_block(f, buf, blocknum);
        fwrite(buf, 1, block_len, stdout);
        return 0;
    } else {
        fprintf(stderr, "Unknown command %s\n", argv[1]);
        return 1;
    } 
}