Ejemplo n.º 1
0
bool
HelperEndAnalyzer::checkForFile(const AnalysisResult& idx) const {
    if (idx.depth() > 0) return false;
    struct stat s;
    if (stat(idx.path().c_str(), &s)) return false;
    return true;
}
Ejemplo n.º 2
0
signed char
HelperEndAnalyzer::analyze(AnalysisResult& idx, InputStream* in){
    if(!in)
        return -1;

    signed char state = -1;
    const char* b;
    int32_t nread = in->read(b, 1024, 0);
    in->reset(0);
    if (nread > 0) {
        HelperProgramConfig::HelperRecord* h
            = helperconfig.findHelper(b, nread);
        if (h) {
//            fprintf(stderr, "calling %s on %s\n", h->arguments[0].c_str(),
//                idx.path().c_str());
#if !defined(_WIN32) && !defined(_WIN64)
#warning this does not work on windows because processinputstream does not compile!
            if (h->readfromstdin) {
                ProcessInputStream pis(h->arguments, in);
                TextEndAnalyzer t;
                state = t.analyze(idx, &pis);
            } else {
                string filepath;
                bool fileisondisk = checkForFile(idx);
                if (fileisondisk) {
                    filepath = idx.path();
                } else {
                    filepath = writeToTempFile(in);
                }
                vector<string> args = h->arguments;
                for (uint j=0; j<args.size(); ++j) {
                    if (args[j] == "%s") {
                        args[j] = filepath;
                    }
                }
                ProcessInputStream pis(args);
                TextEndAnalyzer t;
                state = t.analyze(idx, &pis);

                if (!fileisondisk) {
                    unlink(filepath.c_str());
                }
            }
#endif
        }
    }
    if (in->status() == Error) {
        m_error = in->error();
        state = Error;
    }
    return state;
}
Ejemplo n.º 3
0
signed char
StreamAnalyzerPrivate::analyze(AnalysisResult& idx, StreamBase<char>* input) {
    //cerr << "analyze " << idx.path().c_str() << endl;

    // retrieve or construct the through analyzers and end analyzers
    vector<vector<StreamThroughAnalyzer*> >::iterator tIter;
    vector<vector<StreamEndAnalyzer*> >::iterator eIter;
    while ((int)through.size() <= idx.depth()) {
        addThroughAnalyzers();
        addEndAnalyzers();
    }
    tIter = through.begin() + idx.depth();
    eIter = end.begin() + idx.depth();

    // read the headersize size before connecting the throughanalyzers
    // This ensures that the first read is at least this size, even if the
    // throughanalyzers read smaller chunks.
    bool finished = false;
    const char* header = 0;
    int32_t headersize = 1024;
    if (input) {
        headersize = input->read(header, headersize, headersize);
        input->reset(0);
        if (headersize < 0) finished = true;
    }

    // insert the through analyzers
    vector<StreamThroughAnalyzer*>::iterator ts;
    for (ts = tIter->begin(); (input == 0 || input->status() == Ok)
            && ts != tIter->end(); ++ts) {
        (*ts)->setIndexable(&idx);
        input = (*ts)->connectInputStream(input);
        if (input && input->position() != 0) {
            cerr << "Analyzer " << (*ts)->name() << " has left the stream in a bad state." << endl;
        }
    }

    // reread the header so we can use it for the endanalyzers
    if (input && headersize > 0) {
        headersize = input->read(header, headersize, headersize);
        if (headersize <= 0) {
            finished = true;
        } else if (input->reset(0) != 0) {
            cerr << "resetting is impossible!! pos: " << input->position()
                << " status: " << input->status() << endl;
        }
    } else {
        // indicate that we have no data in the stream
        headersize = -1;
        finished = true;
    }
    size_t es = 0;
    size_t itersize = eIter->size();
    while (!finished && es != itersize) {
        StreamEndAnalyzer* sea = (*eIter)[es];
        if (sea->checkHeader(header, headersize)) {
            idx.setEndAnalyzer(sea);
            char ar = sea->analyze(idx, input);
            if (ar) {
// FIXME: find either a NIE-compliant way to report errors or use some API for this
//                idx.addValue(errorfield, sea->name() + string(": ")
//                    + sea->error());
                if (!idx.config().indexMore()) {
                    removeIndexable(idx.depth());
                    return -1;
                }
                int64_t pos = input->reset(0);
                if (pos != 0) { // could not reset
                    cerr << "could not reset stream of " << idx.path().c_str()
                        << " from pos " << input->position()
                        << " to 0 after reading with " << sea->name()
                        << ": " << sea->error().c_str() << endl;
                    finished = true;
                } else {
                    // refresh the pointer to the start of the data
                    headersize = input->read(header, headersize, headersize);
    		    if (input->reset(0) != 0) {
        		cerr << "resetting again is impossible!! pos: "
                             << input->position() << " status: "
                             << input->status() << endl;
    		    }
                    if (headersize < 0) finished = true;
                }
            } else {
                finished = true;
            }
            eIter = end.begin() + idx.depth();
        }
        if (!finished) {
            finished = !conf.indexMore();
        }
        es++;
    }
    idx.setEndAnalyzer(0);
    if (input) {
        // make sure the entire stream is read if the size is not known
        bool ready;
        tIter = through.begin() + idx.depth();
        uint32_t skipsize = 4096;
        do {
            // ask the analyzerconfiguration if we should continue
            int64_t max = idx.config().maximalStreamReadLength(idx);
            if (!idx.config().indexMore()
                    || (max != -1 && input->position() >= max)) {
                // we are done
                return 0;
            }
            ready = input->size() != -1;
            vector<StreamThroughAnalyzer*>::iterator ts;
            for (ts = tIter->begin(); ready && ts != tIter->end(); ++ts) {
                ready = (*ts)->isReadyWithStream();
            }
            if (!ready) {
                input->skip(skipsize);
                if (skipsize < 131072) {
                    skipsize *= 4;
                }
            }
        } while (!ready && input->status() == Ok);
        if (input->status() == Error) {
            fprintf(stderr, "Error: %s\n", input->error());
            removeIndexable(idx.depth());
            return -2;
        }
    }

    // store the size of the stream
    if (input && input->status() != Error && input->size() >= 0) {
        // TODO remove cast
        idx.addValue(sizefield, (uint32_t)input->size());
    }

    // remove references to the analysisresult before it goes out of scope
    removeIndexable(idx.depth());
    return 0;
}