Esempio n. 1
0
// parse with info from
// http://www.wotsit.org/getfile.asp?file=wword8&sc=230027800
bool
OleEndAnalyzer::tryFIB(AnalysisResult& ar, InputStream* in) {
    const char* d;
    int32_t size = 426;
    int32_t nread = in->read(d, size, size);
    in->reset(0);
    if (nread != size
            || (unsigned char)d[0] != 0xec || (unsigned char)d[1] != 0xa5) {
        return false;
    }
    bool complex = (d[10] & 4) == 4;
    if (complex) return false;
    int32_t fcMin = readLittleEndianInt32(d+24);
    int32_t fcMac = readLittleEndianInt32(d+28);

    // for some reason we sometimes need to add 512 here. No clue why.
    // if the first 512 bytes are 0 we do this
    size = fcMin+512;
    nread = in->read(d, size, size);
    in->reset(0);
    if (nread != size) {
        return false;
    }
    int i;
    for (i=0; i<512 && d[i+fcMin] == 0; i++) ;
    if (i == 512) {
        fcMin += 512;
        fcMac += 512;
    }

    size = fcMac;
    nread = in->read(d, size, size);
    in->reset(0);
    if (nread != size) {
        return false;
    }

    wordtext.reset();
    for (int32_t dp = fcMin; dp < fcMac; dp += size) {
        size = fcMac-dp;
        if (size > 512) size = 512;
        wordtext.addText(d+dp, size);
    }
    wordtext.cleanText();
    ar.addText(wordtext.text(), (int32_t)wordtext.length());
    wordtext.reset();
    return true;
}
signed char
IFilterEndAnalyzer::analyze(AnalysisResult& idx, InputStream *in) {
    const string& filename = idx.fileName();
    int p = filename.find_last_of('.');
    if (p < 0 ||  extensions.find(filename.substr(p)) == extensions.end()) {
        return -1;
    }

    string filepath;
    bool fileisondisk = checkForFile(idx.depth(), filename);
    if (fileisondisk) {
        filepath = filename;
    } else {
        int p = filename.find_last_of(".");
        if ( p > 0 ){
            string ext = filename.substr(p).c_str();
            strlwr((char*)ext.c_str());
            p = ext.find_first_not_of("._abcdefghijklmnopqrstuvwxyz0123456789");
            if ( p >= 0 )
                filepath = writeToTempFile(in, "");
            else
                filepath = writeToTempFile(in, ext.c_str());
        }else
            filepath = writeToTempFile(in, "");

    }

    if (filepath.length() > 0) {

        IFilter* filter = NULL;
        void* pvfilter=NULL;

        wchar_t tmp[MAX_PATH];
        _cpycharToWide(tmp,filepath.c_str(),MAX_PATH);
        HRESULT hr = LoadIFilter(tmp,NULL,&pvfilter);
        if (hr == S_OK) {
            filter = (IFilter*)pvfilter;

            ULONG __i=0;
            hr = filter->Init(IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,0,NULL,&__i);
            if (FAILED( hr )) {
                if (!fileisondisk)
                    unlink(filepath.c_str());
                return -1;
            }

            const int sbBufferLen = 1024;
            wchar_t sbBuffer[sbBufferLen];

            STAT_CHUNK ps;
            hr = filter->GetChunk(&ps);
            while ( SUCCEEDED(hr) ) {
                if (ps.flags == CHUNK_TEXT) {
                    int resultText = 0;

                    while ( resultText >= 0 ) {
                        ULONG sizeBuffer=sbBufferLen;
                        resultText = filter->GetText(&sizeBuffer, sbBuffer);
                        if (sizeBuffer > 0 ) {
                            string str = wchartoutf8(sbBuffer,sbBuffer+sizeBuffer);
                            idx.addText(str.c_str(),str.length());
                        }
                    }
                } else if ( ps.flags == CHUNK_VALUE ) {
                    PROPVARIANT *pVar;
                    while ( SUCCEEDED( hr = filter->GetValue( &pVar ) ) ) {
                        //printf("propid: %d\nkind:%d\n",ps.attribute.psProperty.propid,ps.attribute.psProperty.ulKind);
                        if ( ps.attribute.psProperty.propid == 2 &&
                             ps.attribute.psProperty.ulKind == 1 &&
                             pVar->vt == VT_LPWSTR ) {

                            string str = wchartoutf8(pVar->pwszVal,pVar->pwszVal+wcslen(pVar->pwszVal));
                            idx.addValue("title", str );
                        }
                        PropVariantClear( pVar );
                        CoTaskMemFree( pVar );
                    }
                } else {
                    printf("other flag %d\n",ps.flags);
                }
                hr = filter->GetChunk(&ps);
            }
            filter->Release();
            if (!fileisondisk)
                unlink(filepath.c_str());
            return 0;
        }


        DWORD dw = GetLastError();
        if ( dw != 0 ) {
            LPVOID lpMsgBuf;
            FormatMessage(
                FORMAT_MESSAGE_ALLOCATE_BUFFER |
                FORMAT_MESSAGE_FROM_SYSTEM,
                NULL,
                dw,
                MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
                (LPTSTR) &lpMsgBuf,
                0, NULL );

            wprintf(L"%s\n", lpMsgBuf);
            LocalFree(lpMsgBuf);
        }
    }
    if (!fileisondisk && filepath.length()>0) {
        unlink(filepath.c_str());
    }
    return -1;
}