Exemplo n.º 1
0
//parse NCSA format access.log entry into components
bool NCSALog::parseLine(std::string& line, LogEntry& entry) {

    std::vector<std::string> matches;
    ls_ncsa_entry_start.match(line, &matches);

    if(matches.size()!=5) {
        return 0;
    }

    //get details
    entry.vhost    = matches[0];
    entry.hostname = matches[1];
    //entry.username = matches[1];

    //parse timestamp
    struct tm time_str;

    int day, month, year, hour, minute, second;

    std::string request_str = matches[4];
    std::string datestr     = matches[3];

    matches.clear();
    ls_ncsa_entry_date.match(datestr, &matches);

    if(matches.size()!=8) {
        return 0;
    }

    day    = atoi(matches[0].c_str());
    month  = atoi(matches[1].c_str());
    year   = atoi(matches[2].c_str());
    hour   = atoi(matches[3].c_str());
    minute = atoi(matches[4].c_str());
    second = atoi(matches[5].c_str());

    if(month) {
        month--;
    } else {
        //parse non numeric month
        for(int i=0;i<12;i++) {
            if(strcmp(matches[1].c_str(), ls_ncsa_months[i])==0) {
                month=i;
                break;
            }
        }
    }

    //could not parse month (range 0-11 as used by mktime)
    if(month<0 || month>11) return 0;

    //convert zone to utc offset
    int tz_hour = atoi(matches[7].substr(0,2).c_str());
    int tz_min  = atoi(matches[7].substr(2,2).c_str());

    int tz_offset = tz_hour * 3600 + tz_min * 60;

    if(matches[6] == "-") {
        tz_offset = -tz_offset;
    }

    time_str.tm_year = year - 1900;
    time_str.tm_mon  = month;
    time_str.tm_mday = day;
    time_str.tm_hour = hour;
    time_str.tm_min = minute;
    time_str.tm_sec = second;
    time_str.tm_isdst = -1;

    entry.timestamp = mktime(&time_str);

    //apply utc offset
    entry.timestamp -= tz_offset;

    matches.clear();
    ls_ncsa_entry_request.match(request_str, &matches);

    if(matches.size() < 5) {
        return 0;
    }

//    entry.method    = matches[0];
    entry.path      = matches[1];
//    entry.protocol  = matches[2];

    entry.response_code = matches[3].c_str();
    entry.response_size = atol(matches[4].c_str());

    if(matches.size() > 5) {
        std::string agentstr = matches[5];
        matches.clear();
        ls_ncsa_entry_agent.match(agentstr, &matches);

        if(matches.size()==3) {
            entry.referrer   = matches[0];
            entry.user_agent = matches[1];

            std::string extra = matches[2];

            // NOTE: could store extra fields and allow --paddle-mode to address then via their offset
            if(!extra.empty()) {

                std::vector<std::string> extra_fields;
                if(ls_ncsa_extra_field.matchAll(extra, &extra_fields)) {

//                     for(size_t i=0;i<extra_fields.size();i++) {
//                         debugLog("extra fields %d: %s", i, extra_fields[i].c_str());
//                     }

                    if(!extra_fields.empty() && !extra_fields[0].empty()) {
                        entry.pid = extra_fields[0];

                        if(entry.pid.size()>=2 && entry.pid[0] == '"' && entry.pid[entry.pid.size()-1] == '"') {
                            entry.pid = entry.pid.substr(1, entry.pid.size()-2);
                        }
                    }
                }
            }
        }
    }

    //successful if response code less than 400
    int code = atoi(entry.response_code.c_str());

    entry.setSuccess();
    entry.setResponseColour();

    return entry.validate();
}