bool HelperEndAnalyzer::checkForFile(const AnalysisResult& idx) const { if (idx.depth() > 0) return false; struct stat s; if (stat(idx.path().c_str(), &s)) return false; return true; }
signed char HelperEndAnalyzer::analyze(AnalysisResult& idx, InputStream* in){ if(!in) return -1; signed char state = -1; const char* b; int32_t nread = in->read(b, 1024, 0); in->reset(0); if (nread > 0) { HelperProgramConfig::HelperRecord* h = helperconfig.findHelper(b, nread); if (h) { // fprintf(stderr, "calling %s on %s\n", h->arguments[0].c_str(), // idx.path().c_str()); #if !defined(_WIN32) && !defined(_WIN64) #warning this does not work on windows because processinputstream does not compile! if (h->readfromstdin) { ProcessInputStream pis(h->arguments, in); TextEndAnalyzer t; state = t.analyze(idx, &pis); } else { string filepath; bool fileisondisk = checkForFile(idx); if (fileisondisk) { filepath = idx.path(); } else { filepath = writeToTempFile(in); } vector<string> args = h->arguments; for (uint j=0; j<args.size(); ++j) { if (args[j] == "%s") { args[j] = filepath; } } ProcessInputStream pis(args); TextEndAnalyzer t; state = t.analyze(idx, &pis); if (!fileisondisk) { unlink(filepath.c_str()); } } #endif } } if (in->status() == Error) { m_error = in->error(); state = Error; } return state; }
signed char StreamAnalyzerPrivate::analyze(AnalysisResult& idx, StreamBase<char>* input) { //cerr << "analyze " << idx.path().c_str() << endl; // retrieve or construct the through analyzers and end analyzers vector<vector<StreamThroughAnalyzer*> >::iterator tIter; vector<vector<StreamEndAnalyzer*> >::iterator eIter; while ((int)through.size() <= idx.depth()) { addThroughAnalyzers(); addEndAnalyzers(); } tIter = through.begin() + idx.depth(); eIter = end.begin() + idx.depth(); // read the headersize size before connecting the throughanalyzers // This ensures that the first read is at least this size, even if the // throughanalyzers read smaller chunks. bool finished = false; const char* header = 0; int32_t headersize = 1024; if (input) { headersize = input->read(header, headersize, headersize); input->reset(0); if (headersize < 0) finished = true; } // insert the through analyzers vector<StreamThroughAnalyzer*>::iterator ts; for (ts = tIter->begin(); (input == 0 || input->status() == Ok) && ts != tIter->end(); ++ts) { (*ts)->setIndexable(&idx); input = (*ts)->connectInputStream(input); if (input && input->position() != 0) { cerr << "Analyzer " << (*ts)->name() << " has left the stream in a bad state." << endl; } } // reread the header so we can use it for the endanalyzers if (input && headersize > 0) { headersize = input->read(header, headersize, headersize); if (headersize <= 0) { finished = true; } else if (input->reset(0) != 0) { cerr << "resetting is impossible!! pos: " << input->position() << " status: " << input->status() << endl; } } else { // indicate that we have no data in the stream headersize = -1; finished = true; } size_t es = 0; size_t itersize = eIter->size(); while (!finished && es != itersize) { StreamEndAnalyzer* sea = (*eIter)[es]; if (sea->checkHeader(header, headersize)) { idx.setEndAnalyzer(sea); char ar = sea->analyze(idx, input); if (ar) { // FIXME: find either a NIE-compliant way to report errors or use some API for this // idx.addValue(errorfield, sea->name() + string(": ") // + sea->error()); if (!idx.config().indexMore()) { removeIndexable(idx.depth()); return -1; } int64_t pos = input->reset(0); if (pos != 0) { // could not reset cerr << "could not reset stream of " << idx.path().c_str() << " from pos " << input->position() << " to 0 after reading with " << sea->name() << ": " << sea->error().c_str() << endl; finished = true; } else { // refresh the pointer to the start of the data headersize = input->read(header, headersize, headersize); if (input->reset(0) != 0) { cerr << "resetting again is impossible!! pos: " << input->position() << " status: " << input->status() << endl; } if (headersize < 0) finished = true; } } else { finished = true; } eIter = end.begin() + idx.depth(); } if (!finished) { finished = !conf.indexMore(); } es++; } idx.setEndAnalyzer(0); if (input) { // make sure the entire stream is read if the size is not known bool ready; tIter = through.begin() + idx.depth(); uint32_t skipsize = 4096; do { // ask the analyzerconfiguration if we should continue int64_t max = idx.config().maximalStreamReadLength(idx); if (!idx.config().indexMore() || (max != -1 && input->position() >= max)) { // we are done return 0; } ready = input->size() != -1; vector<StreamThroughAnalyzer*>::iterator ts; for (ts = tIter->begin(); ready && ts != tIter->end(); ++ts) { ready = (*ts)->isReadyWithStream(); } if (!ready) { input->skip(skipsize); if (skipsize < 131072) { skipsize *= 4; } } } while (!ready && input->status() == Ok); if (input->status() == Error) { fprintf(stderr, "Error: %s\n", input->error()); removeIndexable(idx.depth()); return -2; } } // store the size of the stream if (input && input->status() != Error && input->size() >= 0) { // TODO remove cast idx.addValue(sizefield, (uint32_t)input->size()); } // remove references to the analysisresult before it goes out of scope removeIndexable(idx.depth()); return 0; }