示例#1
0
文件: same.cpp 项目: mazonka/w
msu getListOfFiles(os::Path p)
{
    static ull sz = 0;

    string pstr = p.str();
    string prefix = pstr + '/';
    if (pstr.empty()) never(1);
    if (pstr == ".") prefix = "";

    msu r;
    os::Dir d = os::FileSys::readDirEx(p, true, true);

    sz += d.files.size();
    cout << sz << '\r' << std::flush;

    for ( auto f : d.files )
    {
        if ( f.first[0] == '.' ) continue;
        r[prefix + f.first] = f.second;
    }
    for ( auto f : d.dirs )
    {
        if ( f[0] == '.' ) continue;
        auto n = getListOfFiles(prefix + f);
        r.insert(n.begin(), n.end());
    }

    return r;
}
示例#2
0
文件: samehf.cpp 项目: mazonka/w
sam::mfu sam::getListOfFiles(os::Path p, bool dot)
{
    static ull sz = 0;

    string pstr = p.str();
    string prefix = pstr + '/';
    if (pstr.empty()) never(1);
    if (pstr == ".") prefix = "";

    mfu r;
    os::Dir d = os::FileSys::readDirEx(p, true, true);

    sz += d.files.size();
    cout << sz << '\r' << std::flush;

    for ( auto f : d.files )
    {
        if ( !dot && f.first[0] == '.' ) continue;
        string n = prefix + f.first;
        r[File {prefix, f.first, os::FileSys::mtime(n), ull(f.second)}] = f.second;
    }
    for ( auto f : d.dirs )
    {
        if ( !dot && f[0] == '.' ) continue;
        auto n = getListOfFiles(prefix + f, dot);
        r.insert(n.begin(), n.end());
    }

    return r;
}
示例#3
0
QStringList Helpers::getListOfFilesRecursively(const QString directory) const
{
    const QStringList dirs = getListOfDirs(directory) << directory;
    QStringListIterator iter(dirs);
    QStringList allFiles;

    while (iter.hasNext())
        allFiles += getListOfFiles(iter.next());

    return allFiles;
}
示例#4
0
文件: same.cpp 项目: mazonka/w
void mymain(ull sz)
{
    // idea - first get the list of files
    // remove files with unique size and with zero size
    // do not read '.'-directories
    // place remaining into size buckets
    // for each bucket read first 100 bytes
    // eliminate uniques
    // set SZ = 1k
    // in each size bucket read SZ and hash
    // eliminate with unique hashes
    // those that size<SZ declare SAME
    // make size-hash-buckets (SH)
    // set SZ *= 10
    // repeat

    msu allfiles = getListOfFiles(".");
    cout << "Read: " << allfiles.size() << " files\n";
    //for ( auto x : allfiles ) cout << x.first << '\t' << x.second << '\n';

    cout << "Removing unique size and 0-size files\n";
    {
        std::map<ull, ull> a;
        for ( auto x : allfiles ) a[x.second] += 1;
        msu b;
        for ( auto x : allfiles )
        {
            ull s = x.second;
            if ( s == 0 ) continue;
            if ( a[s] == 1 ) continue;
            b.insert(x);
        }
        b.swap(allfiles);
    }
    cout << "Remained: " << allfiles.size() << " files\n";
    //for ( auto x : allfiles ) cout << x.first << '\t' << x.second << '\n';

    std::map<SH, vstr> buckets;
    for ( auto x : allfiles )
    {
        SH s {x.second, ""};
        buckets[s].push_back(x.first);
    }
    cout << "Made " << buckets.size() << " buckets\n";
    //prn_buckets(buckets);

    // release allfiles
    allfiles.clear();

    std::map<SH, vstr> same;
    for (ull SZ = 1; SZ <= sz && !buckets.empty(); SZ *= 10 )
    {

        //count files
        ull fcnt = 0;
        for ( auto & buck : buckets )
            for ( auto f : buck.second ) fcnt++;

        cout << "Checking " << fcnt << " files for size " << kb(SZ) << std::flush;

        std::map<SH, vstr> newb;
        ull cntr = 0;
        Timer timer;
        for ( auto & buck : buckets )
        {
            for ( auto f : buck.second )
            {
                SH h { buck.first.size, gethash(f, SZ) };
                newb[h].push_back(f);

                ++cntr;
                //if ( ( cntr += SZ ) > PRN )
                //{ cout << "." << std::flush; cntr = 0; }
                if ( timer.get() / 1000 > 1)
                {
                    timer.init();
                    cout << "\rChecking " << cntr << "/" << fcnt
                         << " files for size " << kb(SZ) << " " << std::flush;
                }
            }
        }

        cout << "\rChecked " << cntr << "/" << fcnt
             << " files for size " << kb(SZ) << std::flush;
        cout << "   " << std::flush;

        buckets.clear();
        for ( auto x : newb )
            if ( x.second.size() > 1 )
            {
                if ( SZ < x.first.size )
                    buckets.insert(x);
                else
                    same.insert(x);
            }

        cout << "\n";
    } // next SZ

    cout << "\n";

    if ( !same.empty() )
    {
        cout << "Same files found, ";
        prn_buckets(same);
    }
    else
    {
        cout << "No same files found";
        if ( !buckets.empty() ) cout << ", BUT";
        cout << "\n";
    }

    if ( !buckets.empty() )
    {
        cout << "Unchecked files exceeded "
             << kb(sz) << "\n";

        prn_buckets(buckets);
    }
}