/**
 * Tagging support.
 * Tags are a system of providing identifiers for regions of the input image.
 * Currently tags are just used for fragment type.
 */
void feature_recorder::write_tag(const pos0_t &pos0,size_t len,const string &tagName)
{
    if(flags & FLAG_DISABLED) return;           // disabled

    stringstream ss;
    string desc = pos0.alphaPart();

    // This allows you to set a breakpoint at a specific position
    // it could be a configurable variable, I guess...
#ifdef DEBUG_OFFSET
    if(len==DEBUG_OFFSET){
        std::cerr << "write_tag debug point pos0=" << pos0 << " len=" << len <<" name=" << tagName << "\n";
    }
#endif    

    /* offset is either the sbuf offset or the path offset */
    uint64_t offset = pos0.offset>0 ? pos0.offset : stoi64(pos0.path);

    /** Create what will got to the feature file */
    ss << offset << ":" << len << "\t";
    if(desc.size()>0) ss << desc << '/';
    ss << tagName;
    
    this->write(ss.str());
}
void Database_LevelDB::listAllLoadableBlocks(std::list<v3s16> &dst)
{
    leveldb::Iterator* it = m_database->NewIterator(leveldb::ReadOptions());
    for (it->SeekToFirst(); it->Valid(); it->Next()) {
        dst.push_back(getIntegerAsBlock(stoi64(it->key().ToString())));
    }
    ENSURE_STATUS_OK(it->status());  // Check for any errors found during the scan
    delete it;
}
Beispiel #3
0
void Database_Redis::listAllLoadableBlocks(std::list<v3s16> &dst)
{
	redisReply *reply;
	reply = (redisReply*) redisCommand(ctx, "HKEYS %s", hash.c_str());
	if(!reply)
		throw FileNotGoodException(std::string("redis command 'HKEYS %s' failed: ") + ctx->errstr);
	if(reply->type != REDIS_REPLY_ARRAY)
		throw FileNotGoodException("Failed to get keys from database");
	for(size_t i = 0; i < reply->elements; i++)
	{
		assert(reply->element[i]->type == REDIS_REPLY_STRING);
		dst.push_back(getIntegerAsBlock(stoi64(reply->element[i]->str)));
	}
	freeReplyObject(reply);
}
Beispiel #4
0
void Database_Redis::listAllLoadableBlocks(std::vector<v3s16> &dst)
{
	redisReply *reply = static_cast<redisReply *>(redisCommand(ctx, "HKEYS %s", hash.c_str()));
	if (!reply) {
		throw FileNotGoodException(std::string(
			"Redis command 'HKEYS %s' failed: ") + ctx->errstr);
	}
	switch (reply->type) {
	case REDIS_REPLY_ARRAY:
		for (size_t i = 0; i < reply->elements; i++) {
			assert(reply->element[i]->type == REDIS_REPLY_STRING);
			dst.push_back(getIntegerAsBlock(stoi64(reply->element[i]->str)));
		}
	case REDIS_REPLY_ERROR:
		throw FileNotGoodException(std::string(
			"Failed to get keys from database: ") + reply->str);
	}
	freeReplyObject(reply);
}
DBRedis::DBRedis(const std::string &mapdir) :
	m_blocksReadCount(0),
	m_blocksQueriedCount(0)
{
	Settings world_mt(mapdir + "/world.mt");
	std::string address;
	if (!world_mt.check("redis_address", address) || !world_mt.check("redis_hash", hash)) {
		throw std::runtime_error("Set redis_address and redis_hash in world.mt to use the redis backend");
	}
	int port = stoi64(world_mt.get("redis_port", "6379"));
	ctx = redisConnect(address.c_str(), port);
	if(!ctx)
		throw std::runtime_error("Cannot allocate redis context");
	else if(ctx->err) {
		std::string err = std::string("Connection error: ") + ctx->errstr;
		redisFree(ctx);
		throw std::runtime_error(err);
	}
}
Beispiel #6
0
static void process_open_path(const image_process &p,std::string path,scanner_params::PrintOptions &po,
                              const size_t process_path_bufsize)
{
    /* Check for "/r" in path which means print raw */
    if(path.size()>2 && path.substr(path.size()-2,2)=="/r"){
	path = path.substr(0,path.size()-2);
    }

    std::string  prefix = get_and_remove_token(path);
    int64_t offset = stoi64(prefix);

    /* Get the offset into the buffer process */
    u_char *buf = (u_char *)calloc(process_path_bufsize,1);
    if(!buf){
        std::cerr << "Cannot allocate " << process_path_bufsize << " buffer\n";
        return;
    }
    int count = p.pread(buf,process_path_bufsize,offset);
    if(count<0){
        std::cerr << p.image_fname() << ": " << strerror(errno) << " (Read Error)\n";
	return;
    }

    /* make up a bogus feature recorder set and with a disabled feature recorder.
     * Then we call the path printer, which throws an exception after the printing
     * to prevent further printing.
     *
     * The printer is called when a PRINT token is found in the
     * forensic path, so that has to be added.
     */
    feature_recorder_set fs(feature_recorder_set::SET_DISABLED,feature_recorder_set::null_hasher,
                            feature_recorder_set::NO_INPUT,feature_recorder_set::NO_OUTDIR);

    pos0_t pos0(path+"-PRINT"); // insert the PRINT token
    sbuf_t sbuf(pos0,buf,count,count,true); // sbuf system will free
    scanner_params sp(scanner_params::PHASE_SCAN,sbuf,fs,po);
    try {
        process_path_printer(sp);
    }
    catch (path_printer_finished &e) {
    }
}
Beispiel #7
0
static void bulk_process_feature_file(const std::string &fn)
{
    if(ends_with(fn,".txt")==false) return; // don't process binary files
    if(ends_with(fn,"_histogram.txt")==true) return; // ignore histogram files

    const string features(fn.substr(fn.rfind('/')+1,fn.size()-fn.rfind('/')-5));
    const string name(features+": ");
    const string SPACE(" ");
    const string UNKNOWN("UNKNOWN");
    bool tagfile = ends_with(fn,"_tags.txt");
    ifstream f(fn.c_str());
    if(!f.is_open()){
	cerr << "Cannot open tag input file: " << fn << "\n";
	return;
    }
    try {
	string line;
	while(getline(f,line)){
	    if(line.size()==0 || line[0]=='#' || line.substr(0,4)=="\357\273\277#") continue;
	    vector<string> fields = split(line,'\t'); // fields of the feature file
	    if(fields.size()<2) continue;	      // improper formatting
	    std::string &taglocation  = fields[0];
	    std::string &tagtype = fields[1];
	    uint64_t offset = stoi64(taglocation);
	    uint64_t sector =  offset / opt_bulk_block_size;

	    /* If the array hasn't been expanded to the point of this element, expand it with blanks */
	    while(sector > sector_typetags.size()){
		sector_typetags.push_back(sector_typetag()); // expand to fill gap
	    }

	    if(tagfile){		// first pass
		/* Process a tag */
		vector<string> vals  = split(taglocation,':');
		
		if(vals.size()!=2){
		    std::cerr << "Invalid tag file line: " << line << " (size=" << vals.size() << ")\n";
		    exit(1);
		}

		uint32_t len = stoi(vals[1]);

		// If no data for this sector, simply append this type
		// and then continue
		if(sector_typetags.size()==sector){ 
		    sector_typetags.push_back(sector_typetag(len,tagtype,string("")));
		    continue;
		} 

		// We have new data for the same element. Which is better?
		if(sector_typetags[sector].specificity() < sector_typetag::specificity(tagtype)){
		    // New is more specific than the old.
		    // Preserve the old one 
		    sector_typetags[sector].scomment = sector_typetags[sector].stype + string("; ") + sector_typetags[sector].scomment;
		    sector_typetags[sector].stype = tagtype; // specify new tag type
		} else {
		    // New is less specific than the old, so just make new type a comment.
		    sector_typetags[sector].scomment = tagtype + string("; ") + sector_typetags[sector].scomment;
		}
		continue;
	    }
	    /* Process a feature, which will add specificity to the tag */
	    if(sector_typetags.size()==sector){ 
		/* Hm... No tag (and all tags got processed first), so this is unknown */
		sector_typetags.push_back(sector_typetag(opt_bulk_block_size,UNKNOWN,SPACE));
	    } 
	    /* append what we've learned regarding this feature */
	    
	    // If we got an MD5 as field1 and there is a second field, go with that
	    
	    sector_typetag &s = sector_typetags[sector];
	    int field = 1;
	    if(fields.size()>2 && fields[1].size()==32 && fields[2].size()>0 && fields[2][0]=='<'){
		field = 2;		// go with the second field
	    }
	    s.scomment += " " + name + fields[field];
	    
	    // append any XML if it is present
	    if(field==1 && fields.size()>2 && fields[2].size()>0 && fields[2][0]=='<'){
		s.scomment += " " + name + " " + fields[2];
	    }

	    // Scan through the feature indicator table and if we find a match note the type
	    for(int i=0;feature_indicators[i].feature_file_name;i++){
		if(features!=feature_indicators[i].feature_file_name) continue;
		if(feature_indicators[i].feature_content==0
		   || fields[1].find(feature_indicators[i].feature_content)!=string::npos
		   || fields[2].find(feature_indicators[i].feature_content)!=string::npos){
		    s.stype = pos0_t(fields[0]).alphaPart();
		    if(s.stype.size()>1){
			char lastchar = s.stype.at(s.stype.size()-1);
			if(lastchar!='-' && lastchar!='/') s.stype += string("-");
		    }
		    s.stype += feature_indicators[i].dfrws_type;
		}
	    }
	}
    }
    catch (const std::exception &e) {
	cerr << "ERROR: " << e.what() << " processing tagfile " << fn << "\n";
    }
}
Beispiel #8
0
int64 VString::toi64() const { return stoi64((char*)data, sz); }
Beispiel #9
0
int main(int argc,char **argv)
{
#ifdef HAVE_MCHECK
    mtrace();
#endif
    /* setup */
    feature_recorder::set_main_threadid();
    const char *progname = argv[0];

    word_and_context_list alert_list;		/* shold be flagged */
    word_and_context_list stop_list;		/* should be ignored */

    scanner_info::scanner_config   s_config;    // the bulk extractor phase 1 config created from the command line
    BulkExtractor_Phase1::Config   cfg;
    cfg.num_threads = threadpool::numCPU();

    /* Options */
    const char *opt_path = 0;
    int         opt_recurse = 0;
    int         opt_zap = 0;
    int         opt_h = 0;
    int         opt_H = 0;
    std::string opt_sampling_params;
    std::string opt_outdir;
    bool        opt_write_feature_files = true;
    bool        opt_write_sqlite3     = false;
    bool        opt_enable_histograms=true;

    /* Startup */
    setvbuf(stdout,0,_IONBF,0);		// don't buffer stdout
    std::string command_line = dfxml_writer::make_command_line(argc,argv);
    std::vector<std::string> scanner_dirs; // where to look for scanners

    /* Add the default plugin_path */
    add_if_present(scanner_dirs,"/usr/local/lib/bulk_extractor");
    add_if_present(scanner_dirs,"/usr/lib/bulk_extractor");
    add_if_present(scanner_dirs,".");

    if (getenv("BE_PATH")) {
        std::vector<std::string> dirs = split(getenv("BE_PATH"),':');
        for(std::vector<std::string>::const_iterator it = dirs.begin(); it!=dirs.end(); it++){
            add_if_present(scanner_dirs,*it);
        }
    }

#ifdef WIN32
    setmode(1,O_BINARY);		// make stdout binary
    threadpool::win32_init();
#endif
    /* look for usage first */
    if(argc==1) opt_h=1;

    /* Process options */
    int ch;
    while ((ch = getopt(argc, argv, "A:B:b:C:d:E:e:F:f:G:g:Hhij:M:m:o:P:p:q:Rr:S:s:VW:w:x:Y:z:Z")) != -1) {
	switch (ch) {
	case 'A': feature_recorder::offset_add  = stoi64(optarg);break;
	case 'b': feature_recorder::banner_file = optarg; break;
	case 'C': feature_recorder::context_window_default = atoi(optarg);break;
	case 'd':
	{
            if(strcmp(optarg,"h")==0) debug_help();
	    int d = atoi(optarg);
	    switch(d){
	    case DEBUG_ALLOCATE_512MiB: 
		if(calloc(1024*1024*512,1)){
                    std::cerr << "-d1002 -- Allocating 512MB of RAM; may be repeated\n";
		} else {
                    std::cerr << "-d1002 -- CANNOT ALLOCATE MORE RAM\n";
		}
		break;
	    default:
		cfg.debug  = d;
		break;
	    }
            be13::plugin::set_scanner_debug(cfg.debug);
	}
	break;
	case 'E':
            be13::plugin::scanners_disable_all();
	    be13::plugin::scanners_enable(optarg);
	    break;
	case 'e':
	    be13::plugin::scanners_enable(optarg);
	    break;
	case 'F': FindOpts::get().Files.push_back(optarg); break;
	case 'f': FindOpts::get().Patterns.push_back(optarg); break;
	case 'G': cfg.opt_pagesize = scaled_stoi64(optarg); break;
	case 'g': cfg.opt_marginsize = scaled_stoi64(optarg); break;
        case 'i':
            std::cout << "info mode:\n";
            cfg.opt_info = true;
            break;
	case 'j': cfg.num_threads = atoi(optarg); break;
	case 'M': scanner_def::max_depth = atoi(optarg); break;
	case 'm': cfg.max_bad_alloc_errors = atoi(optarg); break;
	case 'o': opt_outdir = optarg;break;
	case 'P': scanner_dirs.push_back(optarg);break;
	case 'p': opt_path = optarg; break;
        case 'q':
	    if(atoi(optarg)==-1) cfg.opt_quiet = 1;// -q -1 turns off notifications
	    else cfg.opt_notify_rate = atoi(optarg);
	    break;
	case 'r':
	    if(alert_list.readfile(optarg)){
		err(1,"Cannot read alert list %s",optarg);
	    }
	    break;
	case 'R': opt_recurse = 1; break;
	case 'S':
	{
	    std::vector<std::string> params = split(optarg,'=');
	    if(params.size()!=2){
		std::cerr << "Invalid paramter: " << optarg << "\n";
		exit(1);
	    }
	    s_config.namevals[params[0]] = params[1];
	    continue;
	}
	case 's':
#if defined(HAVE_SRANDOM) && !defined(HAVE_SRANDOMDEV)
            srandom(time(0));
#endif
#if defined(HAVE_SRANDOMDEV)
            srandomdev();               // if we are sampling initialize
#endif
            opt_sampling_params = optarg;
            break;
	case 'V': std::cout << "bulk_extractor " << PACKAGE_VERSION << "\n"; exit (1);
	case 'W':
            fprintf(stderr,"-W has been deprecated. Specify with -S word_min=NN and -S word_max=NN\n");
            exit(1);
	    break;
	case 'w': if(stop_list.readfile(optarg)){
		err(1,"Cannot read stop list %s",optarg);
	    }
	    break;
	case 'x':
	    be13::plugin::scanners_disable(optarg);
	    break;
	case 'Y': {
	    std::string optargs = optarg;
	    size_t dash = optargs.find('-');
	    if(dash==std::string::npos){
		cfg.opt_offset_start = stoi64(optargs);
	    } else {
		cfg.opt_offset_start = scaled_stoi64(optargs.substr(0,dash));
		cfg.opt_offset_end   = scaled_stoi64(optargs.substr(dash+1));
	    }
	    break;
	}
	case 'z': cfg.opt_page_start = stoi64(optarg);break;
	case 'Z': opt_zap=true;break;
	case 'H': opt_H++;continue;
	case 'h': opt_h++;continue;
	}
    }

    cfg.validate();
    argc -= optind;
    argv += optind;

    if(cfg.debug & DEBUG_PRINT_STEPS) std::cerr << "DEBUG: DEBUG_PRINT_STEPS\n";
    if(cfg.debug & DEBUG_PEDANTIC) validateOrEscapeUTF8_validate = true;

    /* Create a configuration that will be used to initialize the scanners */
    scanner_info si;

    s_config.debug       = cfg.debug;
    si.config = &s_config;

    /* Make individual configuration options appear on the command line interface. */
    si.get_config("work_start_work_end",&worker::opt_work_start_work_end,
                  "Record work start and end of each scanner in report.xml file");
    si.get_config("enable_histograms",&opt_enable_histograms,
                  "Disable generation of histograms");
    si.get_config("debug_histogram_malloc_fail_frequency",&HistogramMaker::debug_histogram_malloc_fail_frequency,
                  "Set >0 to make histogram maker fail with memory allocations");
    si.get_config("hash_alg",&be_hash_name,"Specifies hash algorithm to be used for all hash calculations");
    si.get_config("dup_data_alerts",&be13::plugin::dup_data_alerts,"Notify when duplicate data is not processed");
    si.get_config("write_feature_files",&opt_write_feature_files,"Write features to flat files");
    si.get_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3");

    /* Make sure that the user selected a valid hash */
    {
        uint8_t buf[1];
        be_hash_func(buf,0);
    }

    /* Load all the scanners and enable the ones we care about */

    be13::plugin::load_scanner_directories(scanner_dirs,s_config);
    be13::plugin::load_scanners(scanners_builtin,s_config); 
    be13::plugin::scanners_process_enable_disable_commands();

    /* Print usage if necessary */
    if(opt_H){ be13::plugin::info_scanners(true,true,scanners_builtin,'e','x'); exit(0);}
    if(opt_h){ usage(progname);be13::plugin::info_scanners(false,true,scanners_builtin,'e','x'); exit(0);}

    /* Give an error if a find list was specified
     * but no scanner that uses the find list is enabled.
     */

    if(!FindOpts::get().empty()) {
        /* Look through the enabled scanners and make sure that
	 * at least one of them is a FIND scanner
	 */
        if(!be13::plugin::find_scanner_enabled()){
            errx(1,"find words are specified with -F but no find scanner is enabled.\n");
        }
    }

    if(opt_path){
	if(argc!=1) errx(1,"-p requires a single argument.");
	process_path(argv[0],opt_path,cfg.opt_pagesize,cfg.opt_marginsize);
	exit(0);
    }
    if(opt_outdir.size()==0) errx(1,"error: -o outdir must be specified");

    /* The zap option wipes the contents of a directory, useful for debugging */
    if(opt_zap){
	DIR *dirp = opendir(opt_outdir.c_str());
	if(dirp){
	    struct dirent *dp;
	    while ((dp = readdir(dirp)) != NULL){
                std::string name = dp->d_name;
		if(name=="." || name=="..") continue;
                std::string fname = opt_outdir + std::string("/") + name;
		unlink(fname.c_str());
		std::cout << "erasing " << fname << "\n";
	    }
	}
	if(rmdir(opt_outdir.c_str())){
            std::cout << "rmdir " << opt_outdir << "\n";
        }
    }

    /* Start the clock */
    aftimer timer;
    timer.start();

    /* If output directory does not exist, we are not restarting! */
    std::string reportfilename = opt_outdir + "/report.xml";

    BulkExtractor_Phase1::seen_page_ids_t seen_page_ids; // pages that do not need re-processing
    image_process *p = 0;                                // the image process iterator

    /* Get image or directory */
    if (*argv == NULL) {
        if (opt_recurse) {
            fprintf(stderr,"filedir not provided\n");
        } else {
            fprintf(stderr,"imagefile not provided\n");
        }
        exit(1);
    }
    std::string image_fname = *argv;

    if(opt_outdir.size()==0){
        fprintf(stderr,"output directory not provided\n");
        exit(1);
    }

    if(directory_missing(opt_outdir) || directory_empty(opt_outdir)){
        /* First time running */
	/* Validate the args */
	if ( argc !=1 ) errx(1,"Disk image option not provided. Run with -h for help.");
	validate_fn(image_fname);
	if (directory_missing(opt_outdir)) be_mkdir(opt_outdir);
    } else {
	/* Restarting */
	std::cout << "Restarting from " << opt_outdir << "\n";
        bulk_extractor_restarter r(opt_outdir,reportfilename,image_fname,seen_page_ids);

        /* Rename the old report and create a new one */
        std::string old_reportfilename = reportfilename + "." + itos(time(0));
        if(rename(reportfilename.c_str(),old_reportfilename.c_str())){
            std::cerr << "Could not rename " << reportfilename << " to " << old_reportfilename << ": " << strerror(errno) << "\n";
            exit(1);
        }
    }

    /* Open the image file (or the device) now */
    p = image_process::open(image_fname,opt_recurse,cfg.opt_pagesize,cfg.opt_marginsize);
    if(!p) err(1,"Cannot open %s: ",image_fname.c_str());
    
    /***
     *** Create the feature recording set.
     *** Initialize the scanners.
     ****/

    /* Determine the feature files that will be used */
    feature_file_names_t feature_file_names;
    be13::plugin::get_scanner_feature_file_names(feature_file_names);
    uint32_t flags = 0;
    if (stop_list.size()>0)        flags |= feature_recorder_set::CREATE_STOP_LIST_RECORDERS;
    if (opt_write_sqlite3)         flags |= feature_recorder_set::ENABLE_SQLITE3_RECORDERS;
    if (!opt_write_feature_files)  flags |= feature_recorder_set::DISABLE_FILE_RECORDERS;

    {
        feature_recorder_set fs(flags,be_hash,image_fname,opt_outdir);
        fs.init(feature_file_names);
        if(opt_enable_histograms) be13::plugin::add_enabled_scanner_histograms_to_feature_recorder_set(fs);
        be13::plugin::scanners_init(fs);

        fs.set_stop_list(&stop_list);
        fs.set_alert_list(&alert_list);

        /* Look for commands that impact per-recorders */
        for(scanner_info::config_t::const_iterator it=s_config.namevals.begin();it!=s_config.namevals.end();it++){
            /* see if there is a <recorder>: */
            std::vector<std::string> params = split(it->first,':');
            if(params.size()>=3 && params.at(0)=="fr"){
                feature_recorder *fr = fs.get_name(params.at(1));
                const std::string &cmd = params.at(2);
                if(fr){
                    if(cmd=="window")        fr->set_context_window(stoi64(it->second));
                    if(cmd=="window_before") fr->set_context_window_before(stoi64(it->second));
                    if(cmd=="window_after")  fr->set_context_window_after(stoi64(it->second));
                }
            }
            /* See if there is a scanner? */
        }

        /* Store the configuration in the XML file */
        dfxml_writer  *xreport = new dfxml_writer(reportfilename,false);
        dfxml_create(*xreport,command_line,cfg);
        xreport->xmlout("provided_filename",image_fname); // save this information

        /* provide documentation to the user; the DFXML information comes from elsewhere */
        if(!cfg.opt_quiet){
            std::cout << "bulk_extractor version: " << PACKAGE_VERSION << "\n";
#ifdef HAVE_GETHOSTNAME
            char hostname[1024];
            gethostname(hostname,sizeof(hostname));
            std::cout << "Hostname: " << hostname << "\n";
#endif
            std::cout << "Input file: " << image_fname << "\n";
            std::cout << "Output directory: " << opt_outdir << "\n";
            std::cout << "Disk Size: " << p->image_size() << "\n";
            std::cout << "Threads: " << cfg.num_threads << "\n";
        }

        /****************************************************************
         *** THIS IS IT! PHASE 1!
         ****************************************************************/

        if ( fs.flag_set(feature_recorder_set::ENABLE_SQLITE3_RECORDERS )) {
            fs.db_transaction_begin();
        }
        BulkExtractor_Phase1 phase1(*xreport,timer,cfg);
        if(cfg.debug & DEBUG_PRINT_STEPS) std::cerr << "DEBUG: STARTING PHASE 1\n";

        if(opt_sampling_params.size()>0) BulkExtractor_Phase1::set_sampling_parameters(cfg,opt_sampling_params);
        xreport->add_timestamp("phase1 start");
        phase1.run(*p,fs,seen_page_ids);

        if(cfg.debug & DEBUG_PRINT_STEPS) std::cerr << "DEBUG: WAITING FOR WORKERS\n";
        std::string md5_string;
        phase1.wait_for_workers(*p,&md5_string);
        delete p;				// not strictly needed, but why not?
        p = 0;

        if ( fs.flag_set(feature_recorder_set::ENABLE_SQLITE3_RECORDERS )) {
            fs.db_transaction_commit();
        }
        xreport->add_timestamp("phase1 end");
        if(md5_string.size()>0){
            std::cout << "MD5 of Disk Image: " << md5_string << "\n";
        }

        /*** PHASE 2 --- Shutdown ***/
        if(cfg.opt_quiet==0) std::cout << "Phase 2. Shutting down scanners\n";
        xreport->add_timestamp("phase2 start");
        be13::plugin::phase_shutdown(fs);
        xreport->add_timestamp("phase2 end");

        /*** PHASE 3 --- Create Histograms ***/
        if(cfg.opt_quiet==0) std::cout << "Phase 3. Creating Histograms\n";
        xreport->add_timestamp("phase3 start");
        if(opt_enable_histograms) fs.dump_histograms(0,histogram_dump_callback,0);        // TK - add an xml error notifier!
        xreport->add_timestamp("phase3 end");

        /*** PHASE 4 ---  report and then print final usage information ***/
        xreport->push("report");
        xreport->xmlout("total_bytes",phase1.total_bytes);
        xreport->xmlout("elapsed_seconds",timer.elapsed_seconds());
        xreport->xmlout("max_depth_seen",be13::plugin::get_max_depth_seen());
        xreport->xmlout("dup_data_encountered",be13::plugin::dup_data_encountered);
        xreport->pop();			// report
        xreport->flush();

        xreport->push("scanner_times");
        fs.get_stats(xreport,stat_callback);
        xreport->pop();
        xreport->add_rusage();
        xreport->pop();			// bulk_extractor
        xreport->close();
        if(cfg.opt_quiet==0){
            float mb_per_sec = (phase1.total_bytes / 1000000.0) / timer.elapsed_seconds();

            std::cout.precision(4);
            printf("Elapsed time: %g sec.\n",timer.elapsed_seconds());
            printf("Total MB processed: %d\n",int(phase1.total_bytes / 100000));
        
            printf("Overall performance: %g MBytes/sec (%g MBytes/sec/thread)\n",
                   mb_per_sec,mb_per_sec/cfg.num_threads);
            if (fs.has_name("email")) {
                feature_recorder *fr = fs.get_name("email");
                if(fr){
                    std::cout << "Total " << fr->name << " features found: " << fr->count() << "\n";
                }
            }
        }
    }
#ifdef HAVE_MCHECK
    muntrace();
#endif
    exit(0);
}
Beispiel #10
0
void process_path_printer(const scanner_params &sp)
{
    /* 1. Get next token 
     * 2. if prefix part is a number, skip forward that much in sbuf and repeat.
     *    if the prefix is PRINT, print the buffer
     *    if next part is a string, strip it and run that decoder.
     *    if next part is a |, print
     * 3. If we are print, throw an exception to prevent continued analysis of buffer.
     */

    std::string new_path = sp.sbuf.pos0.path;
    std::string prefix = get_and_remove_token(new_path);

    /* Time to print ?*/
    if(prefix.size()==0 || prefix=="PRINT"){

	uint64_t print_start = 0;
	uint64_t print_len = 4096;
    
	/* Check for options */
	scanner_params::PrintOptions::iterator it;

	it = sp.print_options.find("Content-Length");
	if(it!=sp.print_options.end()){
	    print_len = stoi64(it->second);
	}

	it = sp.print_options.find("Range");
	if(it!=sp.print_options.end()){
	    if(it->second[5]=='='){
		size_t dash = it->second.find('-');
                std::string v1 = it->second.substr(6,dash-6);
                std::string v2 = it->second.substr(dash+1);
		print_start = stoi64(v1);
		print_len = stoi64(v2)-print_start+1;
	    }
	}

	if(print_start>sp.sbuf.bufsize){
	    print_len = 0;			// can't print anything
	}

	if(print_len>0 && print_start+print_len>sp.sbuf.bufsize){
	    print_len = sp.sbuf.bufsize-print_start;
	}

	switch(scanner_params::getPrintMode(sp.print_options)){
	case scanner_params::MODE_HTTP:
	    std::cout << "Content-Length: "		<< print_len  << HTTP_EOL;
	    std::cout << "Content-Range: bytes "	<< print_start << "-" << print_start+print_len-1 << HTTP_EOL;
	    std::cout << "X-Range-Available: bytes " << 0 << "-" << sp.sbuf.bufsize-1 << HTTP_EOL;
	    std::cout << HTTP_EOL;
	    sp.sbuf.raw_dump(std::cout,print_start,print_len); // send to stdout as binary
	    break;
	case scanner_params::MODE_RAW:
	    std::cout << print_len << HTTP_EOL;
	    std::cout.flush();
	    sp.sbuf.raw_dump(std::cout,print_start,print_len); // send to stdout as binary
	    break;
	case scanner_params::MODE_HEX:
	    sp.sbuf.hex_dump(std::cout,print_start,print_len);
	    break;
	case scanner_params::MODE_NONE:
	    break;
	}
        throw printing_done;
	//return;			// our job is done
    }
    /* If we are in an offset block, process recursively with the offset */
    if(isdigit(prefix[0])){
	uint64_t offset = stoi64(prefix);
	if(offset>sp.sbuf.bufsize){
	    printf("Error: %s only has %u bytes; can't offset to %u\n",
		   new_path.c_str(),(unsigned int)sp.sbuf.bufsize,(unsigned int)offset);
	    return;
	}
	process_path_printer(scanner_params(scanner_params::PHASE_SCAN,
					    sbuf_t(new_path,sp.sbuf+offset),
					    sp.fs,sp.print_options));
	return;
    }
    /* Find the scanner and use it */
    scanner_t *s = be13::plugin::find_scanner(lowerstr(prefix));
    if(s){
        (*s)(scanner_params(scanner_params::PHASE_SCAN,
                            sbuf_t(new_path,sp.sbuf),
                            sp.fs,sp.print_options),
             recursion_control_block(process_path_printer,prefix));
        return;
    }
    std::cerr << "Unknown name in path: " << prefix << "\n";
}