static int exclude_crawler (GLogItem * glog) { return conf.ignore_crawlers && is_crawler (glog->agent) ? 0 : 1; }
/* process a line from the log and store it accordingly */ static int process_log (GLog * logger, char *line, int test) { GLogItem *glog; char buf[DATE_LEN]; char *qmark = NULL, *req_key = NULL; int not_found = 0; /* 404s */ /* make compiler happy */ memset (buf, 0, sizeof (buf)); if ((line == NULL) || (*line == '\0')) { logger->invalid++; return 0; } /* ignore comments */ if (*line == '#' || *line == '\n') return 0; if (parsing_spinner != NULL && parsing_spinner->state == SPN_RUN) pthread_mutex_lock (&parsing_spinner->mutex); logger->process++; #ifdef TCB_BTREE process_generic_data (ht_general_stats, "total_requests"); #endif if (parsing_spinner != NULL && parsing_spinner->state == SPN_RUN) pthread_mutex_unlock (&parsing_spinner->mutex); glog = init_log_item (logger); if (parse_format (glog, conf.log_format, conf.date_format, line) == 1) { logger->invalid++; #ifdef TCB_BTREE process_generic_data (ht_general_stats, "failed_requests"); #endif goto cleanup; } /* must have the following fields */ if (glog->host == NULL || glog->date == NULL || glog->status == NULL || glog->req == NULL) { logger->invalid++; goto cleanup; } if (test) goto cleanup; convert_date (buf, glog->date, conf.date_format, "%Y%m%d", DATE_LEN); if (buf == NULL) goto cleanup; /* ignore host */ if (conf.ignore_ip_idx && ip_in_range (glog->host)) { logger->exclude_ip++; #ifdef TCB_BTREE process_generic_data (ht_general_stats, "exclude_ip"); #endif goto cleanup; } /* ignore crawlers */ if (conf.ignore_crawlers && is_crawler (glog->agent)) goto cleanup; /* agent will be null in cases where %u is not specified */ if (glog->agent == NULL) glog->agent = alloc_string ("-"); /* process visitors, browsers, and OS */ process_unique_data (glog->host, buf, glog->agent); /* process agents that are part of a host */ if (conf.list_agents) process_host_agents (glog->host, glog->agent); /* is this a 404? */ if (!memcmp (glog->status, "404", 3)) { not_found = 1; } /* treat 444 as 404? */ else if (!memcmp (glog->status, "444", 3) && conf.code444_as_404) { not_found = 1; } /* check if we need to remove the request's query string */ else if (conf.ignore_qstr) { if ((qmark = strchr (glog->req, '?')) != NULL) { if ((qmark - glog->req) > 0) *qmark = '\0'; } } req_key = xstrdup (glog->req); /* include HTTP method/protocol to request */ if (conf.append_method && glog->method) { str_to_upper (glog->method); append_method_to_request (&req_key, glog->method); } if (conf.append_protocol && glog->protocol) { str_to_upper (glog->protocol); append_protocol_to_request (&req_key, glog->protocol); } if ((conf.append_method) || (conf.append_protocol)) req_key = deblank (req_key); /* process 404s */ if (not_found) process_request (ht_not_found_requests, req_key, glog); /* process static files */ else if (verify_static_content (glog->req)) process_request (ht_requests_static, req_key, glog); /* process regular files */ else process_request (ht_requests, req_key, glog); /* process referrers */ process_referrers (glog->ref); /* process status codes */ process_generic_data (ht_status_code, glog->status); /* process hosts */ process_generic_data (ht_hosts, glog->host); /* process bandwidth */ process_request_meta (ht_date_bw, buf, glog->resp_size); process_request_meta (ht_file_bw, req_key, glog->resp_size); process_request_meta (ht_host_bw, glog->host, glog->resp_size); /* process time taken to serve the request, in microseconds */ process_request_meta (ht_file_serve_usecs, req_key, glog->serve_time); process_request_meta (ht_host_serve_usecs, glog->host, glog->serve_time); logger->resp_size += glog->resp_size; #ifdef TCB_BTREE process_request_meta (ht_general_stats, "bandwidth", glog->resp_size); #endif cleanup: free_logger (glog); if (req_key != NULL) free (req_key); return 0; }