/* Allocate a new buffer and put it at the end of the chain of buffers * scheduled for output. Return 1 if we have more bytes in buffers * than allowed afterwards. */ static INLINE int append_buffer(struct pike_string *s) /* 1=buffer full */ { struct buffer *b; debug_malloc_touch(s); if(THIS->fd!= -1) { fd_lseek(THIS->fd, THIS->pos, SEEK_SET); fd_write(THIS->fd, s->str, s->len); THIS->pos+=s->len; return 0; } else { nbuffers++; b=ALLOC_STRUCT(buffer); b->next=NULL; b->s=s; sbuffers += s->len; add_ref(s); if (THIS->lastbuffer) THIS->lastbuffer->next=b; else THIS->firstbuffer=b; THIS->lastbuffer=b; THIS->bytes_in_buffer+=s->len; } return THIS->bytes_in_buffer > MAX_BYTES_IN_BUFFER; }
static void f_ultraparse( INT32 args ) { FD f = -1; int lines=0, cls, c=0, my_fd=1, tzs=0, state=0, next; unsigned char *char_pointer=0; /* array with offsets for fields in the string buffer */ int buf_points[16]; INT32 v=0, offs0=0, len=0, bytes=0, gotdate=0; INT32 last_hour=0, last_date=0, last_year=0, last_month=0, this_date=0, broken_lines=0, tmpinteger=0, field_position=0; time_t start; unsigned char *read_buf; struct svalue *statfun, *daily, *pagexts=0, *file, *refsval, *log_format; unsigned char *buf; char *field_buf; #ifdef BROKEN_LINE_DEBUG INT32 broken_line_pos=0; unsigned char *broken_line; #endif INT32 *state_list, *save_field_num, *field_endings, num_states; char *notref = 0; INT32 state_pos=0, bufpos=0, i, fieldnum=0; struct pike_string *url_str = 0, *ref_str = 0, *rfc_str = 0, *hst_str = 0, *tmpagent = 0; struct svalue *url_sval; ONERROR unwind_protect; unsigned INT32 hits_per_hour[24]; unsigned INT32 hosts_per_hour[24]; unsigned INT32 pages_per_hour[24]; unsigned INT32 sessions_per_hour[24]; double kb_per_hour[24]; unsigned INT32 session_length[24]; /* struct mapping *unique_per_hour = allocate_mapping(1);*/ struct mapping *hits_per_error = allocate_mapping(10); struct mapping *error_urls = allocate_mapping(10); struct mapping *error_refs = allocate_mapping(10); struct mapping *user_agents = allocate_mapping(10); struct mapping *directories = allocate_mapping(20); struct mapping *referrers = allocate_mapping(1); struct mapping *refsites = allocate_mapping(1); struct mapping *referredto = allocate_mapping(1); struct mapping *pages = allocate_mapping(1); struct mapping *hosts = allocate_mapping(1); struct mapping *hits = allocate_mapping(1); struct mapping *session_start = allocate_mapping(1); struct mapping *session_end = allocate_mapping(1); struct mapping *hits20x = allocate_mapping(300); struct mapping *hits302 = allocate_mapping(2); struct mapping *sites = allocate_mapping(1); struct mapping *domains = allocate_mapping(1); struct mapping *topdomains = allocate_mapping(1); struct mapping *tmpdest = NULL; /* struct mapping *hits30x = allocate_mapping(2);*/ if(args>6 && sp[-1].type == T_INT) { offs0 = sp[-1].u.integer; pop_n_elems(1); --args; } if(args>5 && sp[-1].type == T_STRING) { notref = sp[-1].u.string->str; pop_n_elems(1); --args; } lmu = 0; get_all_args("UltraLog.ultraparse", args, "%*%*%*%*%*", &log_format, &statfun, &daily, &file, &pagexts); if(log_format->type != T_STRING) Pike_error("Bad argument 1 to Ultraparse.ultraparse, expected string.\n"); if(statfun->type != T_FUNCTION) Pike_error("Bad argument 2 to Ultraparse.ultraparse, expected function.\n"); if(daily->type != T_FUNCTION) Pike_error("Bad argument 3 to Ultraparse.ultraparse, expected function.\n"); if(pagexts->type != T_MULTISET) Pike_error("Bad argument 5 to Ultraparse.ultraparse, expected multiset.\n"); if(file->type == T_OBJECT) { f = fd_from_object(file->u.object); if(f == -1) Pike_error("UltraLog.ultraparse: File is not open.\n"); my_fd = 0; } else if(file->type == T_STRING && file->u.string->size_shift == 0) { do { f=fd_open(file->u.string->str, fd_RDONLY, 0); } while(f < 0 && errno == EINTR); if(errno < 0) Pike_error("UltraLog.ultraparse(): Failed to open file for reading (errno=%d).\n", errno); } else Pike_error("Bad argument 4 to UltraLog.ultraparse, expected string or object .\n"); state_list = malloc((log_format->u.string->len +3) * sizeof(INT32)); save_field_num = malloc((log_format->u.string->len +3) * sizeof(INT32)); field_endings = malloc((log_format->u.string->len +3) * sizeof(INT32)); num_states = parse_log_format(log_format->u.string, state_list, field_endings, save_field_num); if(num_states < 1) { free(state_list); free(save_field_num); free(field_endings); Pike_error("UltraLog.ultraparse(): Failed to parse log format.\n"); } fd_lseek(f, offs0, SEEK_SET); read_buf = malloc(READ_BLOCK_SIZE+1); buf = malloc(MAX_LINE_LEN+2); #ifdef BROKEN_LINE_DEBUG broken_line = malloc(MAX_LINE_LEN*10); #endif MEMSET(hits_per_hour, 0, sizeof(hits_per_hour)); MEMSET(hosts_per_hour, 0, sizeof(hosts_per_hour)); MEMSET(session_length, 0, sizeof(session_length)); MEMSET(pages_per_hour, 0, sizeof(pages_per_hour)); MEMSET(sessions_per_hour, 0, sizeof(sessions_per_hour)); MEMSET(kb_per_hour, 0, sizeof(kb_per_hour)); /*url_sval.u.type = TYPE_STRING;*/ BUFSET(0); field_position = bufpos; buf_points[0] = buf_points[1] = buf_points[2] = buf_points[3] = buf_points[4] = buf_points[5] = buf_points[6] = buf_points[7] = buf_points[8] = buf_points[9] = buf_points[10] = buf_points[11] = buf_points[12] = buf_points[13] = buf_points[14] = buf_points[15] = 0; while(1) { /* THREADS_ALLOW();*/ do { len = fd_read(f, read_buf, READ_BLOCK_SIZE); } while(len < 0 && errno == EINTR); /* THREADS_DISALLOW();*/ if(len <= 0) break; /* nothing more to read or error. */ offs0 += len; char_pointer = read_buf+len - 1; while(len--) { c = char_pointer[-len]; cls = char_class[c]; #if 0 fprintf(stdout, "DFA(%d:%d): '%c' (%d) ", state, state_pos, c, (int)c); switch(cls) { case CLS_WSPACE: fprintf(stdout, "CLS_WSPACE\n"); break; case CLS_CRLF: fprintf(stdout, "CLS_CRLF\n"); break; case CLS_TOKEN: fprintf(stdout, "CLS_TOKEN\n"); break; case CLS_DIGIT: fprintf(stdout, "CLS_DIGIT\n"); break; case CLS_QUOTE: fprintf(stdout, "CLS_QUOTE\n"); break; case CLS_LBRACK: fprintf(stdout, "CLS_LBRACK\n"); break; case CLS_RBRACK: fprintf(stdout, "CLS_RBRACK\n"); break; case CLS_SLASH: fprintf(stdout, "CLS_SLASH\n"); break; case CLS_COLON: fprintf(stdout, "CLS_COLON\n"); break; case CLS_HYPHEN: fprintf(stdout, "CLS_HYPHEN/CLS_MINUS\n"); break; case CLS_PLUS: fprintf(stdout, "CLS_PLUS\n"); break; default: fprintf(stdout, "??? %d ???\n", cls); } #endif #ifdef BROKEN_LINE_DEBUG broken_line[broken_line_pos++] = c; #endif if(cls == field_endings[state_pos]) { /* Field is done. Nullify. */ process_field: /* printf("Processing field %d of %d\n", state_pos, num_states);*/ switch(save_field_num[state_pos]) { case DATE: case HOUR: case MINUTE: case UP_SEC: case CODE: /* BUFSET(0);*/ tmpinteger = 0; for(v = field_position; v < bufpos; v++) { if(char_class[buf[v]] == CLS_DIGIT) tmpinteger = tmpinteger*10 + (buf[v]&0xf); else { goto skip; } } BUFPOINT = tmpinteger; break; case YEAR: tmpinteger = 0; for(v = field_position; v < bufpos; v++) { if(char_class[buf[v]] == CLS_DIGIT) tmpinteger = tmpinteger*10 + (buf[v]&0xf); else { goto skip; } } if(tmpinteger < 100) { if(tmpinteger < 60) tmpinteger += 2000; else tmpinteger += 1900; } BUFPOINT = tmpinteger; break; case BYTES: v = field_position; switch(char_class[buf[v++]]) { case CLS_QUESTION: case CLS_HYPHEN: if(v == bufpos) tmpinteger = 0; else { goto skip; } break; case CLS_DIGIT: tmpinteger = (buf[field_position]&0xf); for(; v < bufpos; v++) { if(char_class[buf[v]] == CLS_DIGIT) tmpinteger = tmpinteger*10 + (buf[v]&0xf); else { goto skip; } } /* printf("Digit: %d\n", tmpinteger);*/ break; default: goto skip; } BUFPOINT = tmpinteger; /* bufpos++;*/ break; case MONTH: /* Month */ /* BUFSET(0);*/ /* field_buf = buf + field_positions[state_pos];*/ switch(bufpos - field_position) { case 2: tmpinteger = 0; for(v = field_position; v < bufpos; v++) { if(char_class[buf[v]] == CLS_DIGIT) tmpinteger = tmpinteger*10 + (buf[v]&0xf); else { goto skip; } } break; case 3: switch(((buf[field_position]|0x20)<<16)|((buf[field_position+1]|0x20)<<8)| (buf[field_position+2]|0x20)) { case ('j'<<16)|('a'<<8)|'n': tmpinteger = 1; break; case ('f'<<16)|('e'<<8)|'b': tmpinteger = 2; break; case ('m'<<16)|('a'<<8)|'r': tmpinteger = 3; break; case ('a'<<16)|('p'<<8)|'r': tmpinteger = 4; break; case ('m'<<16)|('a'<<8)|'y': tmpinteger = 5; break; case ('j'<<16)|('u'<<8)|'n': tmpinteger = 6; break; case ('j'<<16)|('u'<<8)|'l': tmpinteger = 7; break; case ('a'<<16)|('u'<<8)|'g': tmpinteger = 8; break; case ('s'<<16)|('e'<<8)|'p': tmpinteger = 9; break; case ('o'<<16)|('c'<<8)|'t': tmpinteger = 10; break; case ('n'<<16)|('o'<<8)|'v': tmpinteger = 11; break; case ('d'<<16)|('e'<<8)|'c': tmpinteger = 12; break; } break; default: goto skip; } /*printf("Month: %0d\n", mm);*/ if(tmpinteger < 1 || tmpinteger > 12) goto skip; /* Broken Month */ BUFPOINT = tmpinteger; /* bufpos++;*/ break; case ADDR: case REFER: case AGENT: case TZ: case METHOD: case URL: case RFC: case PROTO: BUFSET(0); SETPOINT(); /* printf("Field %d, pos %d, %s\n", save_field_num[state_pos],BUFPOINT,*/ /* buf + BUFPOINT); */ break; } state_pos++; field_position = bufpos; if(cls != CLS_CRLF) continue; } else if(cls != CLS_CRLF) { BUFSET(c); continue; } else { /* printf("Processing last field (%d).\n", state_pos);*/ goto process_field; /* End of line - process what we got */ } /* printf("%d %d\n", state_pos, num_states);*/ /* buf_points[8] = buf_points[9] = buf_points[10] = buf_points[11] = buf;*/ /* buf_points[12] = buf_points[13] = buf_points[14] = buf_points[15] = buf;*/ #if 0 if(!((lines+broken_lines)%100000)) { push_int(lines+broken_lines); push_int((int)((float)offs0/1024.0/1024.0)); apply_svalue(statfun, 2); pop_stack(); /*printf("%5dk lines, %5d MB\n", lines/1000, (int)((float)offs0/1024.0/1024.0));*/ } #endif if(state_pos < num_states) { #ifdef BROKEN_LINE_DEBUG broken_line[broken_line_pos] = 0; printf("too few states (pos=%d): %s\n", state_pos, broken_line); #endif broken_lines++; goto ok; } #define yy buf_points[YEAR] #define mm buf_points[MONTH] #define dd buf_points[DATE] #define h buf_points[HOUR] #define m buf_points[MINUTE] #define s buf_points[UP_SEC] #define v buf_points[CODE] #define bytes buf_points[BYTES] this_date = (yy*10000) + (mm*100) + dd; if(!this_date) { broken_lines++; goto ok; } #if 1 if(!last_date) { /* First loop w/o a value.*/ last_date = this_date; last_hour = h; } else { if(last_hour != h || last_date != this_date) { pages_per_hour[last_hour] += hourly_page_hits(hits20x, pages, hits, pagexts->u.multiset, 200); /* pages_per_hour[last_hour] +=*/ /* hourly_page_hits(hits304, pages, hits, pagexts->u.multiset, 300);*/ /* printf("%5d %5d for %d %02d:00\n",*/ /* pages_per_hour[last_hour], hits_per_hour[last_hour],*/ /*last_date, last_hour);*/ if(m_sizeof(session_start)) { summarize_sessions(last_hour, sessions_per_hour, session_length, session_start, session_end); free_mapping(session_start); free_mapping(session_end); session_start = allocate_mapping(1); session_end = allocate_mapping(1); } hosts_per_hour[last_hour] += m_sizeof(sites); do_map_addition(hosts, sites); free_mapping(sites); sites = allocate_mapping(100); last_hour = h; free_mapping(hits20x); /* Reset this one */ /* free_mapping(hits304); Reset this one */ /* hits304 = allocate_mapping(2);*/ hits20x = allocate_mapping(2); } #if 1 if(last_date != this_date) { /* printf("%d %d\n", last_date, this_date);*/ tmpdest = allocate_mapping(1); summarize_refsites(refsites, referrers, tmpdest); free_mapping(referrers); referrers = tmpdest; tmpdest = allocate_mapping(1); clean_refto(referredto, tmpdest, pagexts->u.multiset); free_mapping(referredto); referredto = tmpdest; summarize_directories(directories, pages); summarize_directories(directories, hits); tmpdest = allocate_mapping(1); http_decode_mapping(user_agents, tmpdest); free_mapping(user_agents); user_agents = tmpdest; tmpdest = allocate_mapping(1); summarize_hosts(hosts, domains, topdomains, tmpdest); free_mapping(hosts); hosts = tmpdest; #if 1 push_int(last_date / 10000); push_int((last_date % 10000)/100); push_int((last_date % 10000)%100); push_mapping(pages); push_mapping(hits); push_mapping(hits302); push_mapping(hits_per_error); push_mapping(error_urls); push_mapping(error_refs); push_mapping(referredto); push_mapping(refsites); push_mapping(referrers); push_mapping(directories); push_mapping(user_agents); push_mapping(hosts); push_mapping(domains); push_mapping(topdomains); for(i = 0; i < 24; i++) { push_int(sessions_per_hour[i]); } f_aggregate(24); for(i = 0; i < 24; i++) { push_int(hits_per_hour[i]); hits_per_hour[i] = 0; } f_aggregate(24); for(i = 0; i < 24; i++) { push_int(pages_per_hour[i]); pages_per_hour[i] = 0; } f_aggregate(24); for(i = 0; i < 24; i++) { /* KB per hour.*/ push_float(kb_per_hour[i]); kb_per_hour[i] = 0.0; } f_aggregate(24); for(i = 0; i < 24; i++) { push_float(sessions_per_hour[i] ? ((float)session_length[i] / (float)sessions_per_hour[i]) / 60.0 : 0.0); sessions_per_hour[i] = 0; session_length[i] = 0; } f_aggregate(24); for(i = 0; i < 24; i++) { push_int(hosts_per_hour[i]); hosts_per_hour[i] = 0; } f_aggregate(24); apply_svalue(daily, 23); pop_stack(); #else free_mapping(error_refs); free_mapping(referredto); free_mapping(refsites); free_mapping(directories); free_mapping(error_urls); free_mapping(hits); free_mapping(hits_per_error); free_mapping(pages); free_mapping(hosts); free_mapping(domains); free_mapping(topdomains); free_mapping(referrers); free_mapping(hits302); #endif user_agents = allocate_mapping(10); hits302 = allocate_mapping(1); hits_per_error = allocate_mapping(10); error_urls = allocate_mapping(10); error_refs = allocate_mapping(10); directories = allocate_mapping(20); referrers = allocate_mapping(1); referredto = allocate_mapping(1); refsites = allocate_mapping(1); pages = allocate_mapping(1); hits = allocate_mapping(1); sites = allocate_mapping(1); hosts = allocate_mapping(1); domains = allocate_mapping(1); topdomains = allocate_mapping(1); last_date = this_date; } #endif } #endif #if 1 process_session(buf+buf_points[ADDR], h*3600+m*60+s, h, sessions_per_hour, session_length, session_start, session_end, sites); url_str = make_shared_binary_string((char *)(buf + buf_points[URL]), strlen((char *)(buf + buf_points[URL]))); #if 1 switch(v) { /* Do error-code specific logging. Error urls that are specially treated do not include auth required, service unavailable etc. They are only included in the return code summary. */ case 200: case 201: case 202: case 203: case 204: case 205: case 206: case 207: case 304: mapaddstr(hits20x, url_str); DO_REFERRER(); break; case 300: case 301: case 302: case 303: case 305: mapaddstr(hits302, url_str); DO_REFERRER(); break; case 400: case 404: case 405: case 406: case 408: case 409: case 410: case 411: case 412: case 413: case 414: case 415: case 416: case 500: case 501: DO_ERREF(); map2addint(error_urls, v, url_str); break; } /*rfc_str = http_decode_string(buf + buf_points[RFC]);*/ /*hst_str = make_shared_binary_string(buf, strlen(buf));*/ #endif free_string(url_str); mapaddint(hits_per_error, v); kb_per_hour[h] += (float)bytes / 1024.0; hits_per_hour[h]++; /*#endif*/ if(strlen((char *)(buf + buf_points[AGENT]))>1) { /* Got User Agent */ tmpagent = make_shared_string((char *)(buf + buf_points[AGENT])); mapaddstr(user_agents, tmpagent); free_string(tmpagent); } #endif lines++; #if 0 printf("%s %s %s\n%s %s %s\n%04d-%02d-%02d %02d:%02d:%02d \n%d %d\n", buf + buf_points[ADDR], buf + buf_points[REFER], buf + buf_points[ RFC ], buf + buf_points[METHOD], buf + buf_points[ URL ], buf + buf_points[PROTO], yy, mm, dd, h, m, s, v, bytes); /* if(lines > 10) exit(0);*/ #endif ok: gotdate = /* v = bytes =h = m = s = tz = tzs = dd = mm = yy = */ buf_points[0] = buf_points[1] = buf_points[2] = buf_points[3] = buf_points[4] = buf_points[5] = buf_points[6] = buf_points[7] = /*buf_points[8] = buf_points[9] = buf_points[10] =*/ buf_points[11] = buf_points[12] = buf_points[13] = buf_points[14] = buf_points[15] = bufpos = state_pos = 0; field_position = 1; #ifdef BROKEN_LINE_DEBUG broken_line_pos = 0; #endif BUFSET(0); } } cleanup: free(save_field_num); free(state_list); free(field_endings); free(buf); push_int(lines); push_int((int)((float)offs0 / 1024.0/1024.0)); push_int(1); apply_svalue(statfun, 3); pop_stack(); free(read_buf); #ifdef BROKEN_LINE_DEBUG free(broken_line); #endif if(my_fd) /* If my_fd == 0, the second argument was an object and thus we don't * want to free it. */ fd_close(f); /* push_int(offs0); */ /* printf("Done: %d %d %d ", yy, mm, dd);*/ if(yy && mm && dd) { /* printf("\nLast Summary for %d-%02d-%02d %02d:%02d\n", yy, mm, dd, h, m);*/ pages_per_hour[last_hour] += hourly_page_hits(hits20x, pages, hits, pagexts->u.multiset, 200); if(m_sizeof(session_start)) { summarize_sessions(last_hour, sessions_per_hour, session_length, session_start, session_end); } hosts_per_hour[last_hour] += m_sizeof(sites); do_map_addition(hosts, sites); free_mapping(sites); tmpdest = allocate_mapping(1); summarize_refsites(refsites, referrers, tmpdest); free_mapping(referrers); referrers = tmpdest; summarize_directories(directories, pages); summarize_directories(directories, hits); tmpdest = allocate_mapping(1); clean_refto(referredto, tmpdest, pagexts->u.multiset); free_mapping(referredto); referredto = tmpdest; tmpdest = allocate_mapping(1); http_decode_mapping(user_agents, tmpdest); free_mapping(user_agents); user_agents = tmpdest; tmpdest = allocate_mapping(1); summarize_hosts(hosts, domains, topdomains, tmpdest); free_mapping(hosts); hosts = tmpdest; push_int(yy); push_int(mm); push_int(dd); push_mapping(pages); push_mapping(hits); push_mapping(hits302); push_mapping(hits_per_error); push_mapping(error_urls); push_mapping(error_refs); push_mapping(referredto); push_mapping(refsites); push_mapping(referrers); push_mapping(directories); push_mapping(user_agents); push_mapping(hosts); push_mapping(domains); push_mapping(topdomains); for(i = 0; i < 24; i++) { push_int(sessions_per_hour[i]); } f_aggregate(24); for(i = 0; i < 24; i++) { push_int(hits_per_hour[i]); } f_aggregate(24); for(i = 0; i < 24; i++) { push_int(pages_per_hour[i]); } f_aggregate(24); for(i = 0; i < 24; i++) { push_float(kb_per_hour[i]); } f_aggregate(24); for(i = 0; i < 24; i++) { push_float(sessions_per_hour[i] ? ((float)session_length[i] / (float)sessions_per_hour[i]) / 60.0 : 0.0); } f_aggregate(24); for(i = 0; i < 24; i++) { push_int(hosts_per_hour[i]); hosts_per_hour[i] = 0; } f_aggregate(24); apply_svalue(daily, 23); pop_stack(); } else { free_mapping(error_refs); free_mapping(referredto); free_mapping(refsites); free_mapping(directories); free_mapping(error_urls); free_mapping(hits); free_mapping(hits_per_error); free_mapping(pages); free_mapping(referrers); free_mapping(hits302); free_mapping(user_agents); free_mapping(hosts); free_mapping(domains); free_mapping(topdomains); } free_mapping(hits20x); free_mapping(session_start); free_mapping(session_end); /* free_mapping(hits30x); */ printf("\nTotal lines: %d, broken lines: %d, mapping lookups: %d\n\n", lines, broken_lines, lmu); fflush(stdout); pop_n_elems(args); push_int(offs0); return; skip: broken_lines++; while(1) { while(len--) { #ifdef BROKEN_LINE_DEBUG broken_line[broken_line_pos] = char_pointer[-len]; #endif if(char_class[char_pointer[-len]] == CLS_CRLF) { #ifdef BROKEN_LINE_DEBUG broken_line[broken_line_pos] = 0; printf("Broken Line (pos=%d): %s\n", state_pos, broken_line); #endif goto ok; } } do { len = fd_read(f, read_buf, READ_BLOCK_SIZE); } while(len < 0 && errno == EINTR); if(len <= 0) break; /* nothing more to read. */ offs0 += len; char_pointer = read_buf+len - 1; } goto cleanup; }
static void f_read( INT32 args ) { char *read_buf; struct svalue *logfun, *file; FD f = -1; int cls, c, my_fd=1, state=0, tzs=0; char *char_pointer; INT32 v=0, yy=0, mm=0, dd=0, h=0, m=0, s=0, tz=0; ptrdiff_t offs0=0, len=0; struct svalue *old_sp; /* #define DYNAMIC_BUF */ #ifdef DYNAMIC_BUF dynamic_buffer buf; #else #define BUFSET(X) do { if(bufpos == bufsize) { bufsize *= 2; buf = realloc(buf, bufsize+1); } buf[bufpos++] = c; } while(0) #define PUSHBUF() do { push_string( make_shared_binary_string( buf,bufpos ) ); bufpos=0; } while(0) char *buf; int bufsize=CLF_BLOCK_SIZE, bufpos=0; #endif if(args>2 && sp[-1].type == T_INT) { offs0 = sp[-1].u.integer; pop_n_elems(1); --args; } old_sp = sp; get_all_args("CommonLog.read", args, "%*%*", &logfun, &file); if(logfun->type != T_FUNCTION) SIMPLE_BAD_ARG_ERROR("CommonLog.read", 1, "function"); if(file->type == T_OBJECT) { f = fd_from_object(file->u.object); if(f == -1) Pike_error("CommonLog.read: File is not open.\n"); my_fd = 0; } else if(file->type == T_STRING && file->u.string->size_shift == 0) { #ifdef PIKE_SECURITY if(!CHECK_SECURITY(SECURITY_BIT_SECURITY)) { if(!CHECK_SECURITY(SECURITY_BIT_CONDITIONAL_IO)) Pike_error("Permission denied.\n"); push_text("read"); push_int(0); ref_push_string(file->u.string); push_text("r"); push_int(00666); safe_apply(OBJ2CREDS(CURRENT_CREDS)->user,"valid_open",5); switch(Pike_sp[-1].type) { case PIKE_T_INT: switch(Pike_sp[-1].u.integer) { case 0: /* return 0 */ errno=EPERM; Pike_error("CommonLog.read(): Failed to open file for reading (errno=%d).\n", errno); case 2: /* ok */ pop_stack(); break; case 3: /* permission denied */ Pike_error("CommonLog.read: permission denied.\n"); default: Pike_error("Error in user->valid_open, wrong return value.\n"); } break; default: Pike_error("Error in user->valid_open, wrong return type.\n"); case PIKE_T_STRING: /* if(Pike_sp[-1].u.string->shift_size) */ /* file=Pike_sp[-1]; */ pop_stack(); } } #endif do { THREADS_ALLOW(); f=fd_open((char *)STR0(file->u.string), fd_RDONLY, 0); THREADS_DISALLOW(); if (f >= 0 || errno != EINTR) break; check_threads_etc(); } while (1); if(f < 0) Pike_error("CommonLog.read(): Failed to open file for reading (errno=%d).\n", errno); } else SIMPLE_BAD_ARG_ERROR("CommonLog.read", 2, "string|Stdio.File"); #ifdef HAVE_LSEEK64 lseek64(f, offs0, SEEK_SET); #else fd_lseek(f, offs0, SEEK_SET); #endif read_buf = malloc(CLF_BLOCK_SIZE+1); #ifndef DYNAMIC_BUF buf = malloc(bufsize); #endif while(1) { do { THREADS_ALLOW(); len = fd_read(f, read_buf, CLF_BLOCK_SIZE); THREADS_DISALLOW(); if (len >= 0 || errno != EINTR) break; check_threads_etc(); } while (1); if(len == 0) break; /* nothing more to read. */ if(len < 0) break; char_pointer = read_buf; while(len--) { offs0++; c = char_pointer[0] & 0xff; char_pointer ++; cls = char_class[c]; #ifdef TRACE_DFA fprintf(stderr, "DFA(%d): '%c' ", state, (c<32? '.':c)); switch(cls) { case CLS_WSPACE: fprintf(stderr, "CLS_WSPACE"); break; case CLS_CRLF: fprintf(stderr, "CLS_CRLF"); break; case CLS_TOKEN: fprintf(stderr, "CLS_TOKEN"); break; case CLS_DIGIT: fprintf(stderr, "CLS_DIGIT"); break; case CLS_QUOTE: fprintf(stderr, "CLS_QUOTE"); break; case CLS_LBRACK: fprintf(stderr, "CLS_LBRACK"); break; case CLS_RBRACK: fprintf(stderr, "CLS_RBRACK"); break; case CLS_SLASH: fprintf(stderr, "CLS_SLASH"); break; case CLS_COLON: fprintf(stderr, "CLS_COLON"); break; case CLS_HYPHEN: fprintf(stderr, "CLS_HYPHEN"); break; case CLS_PLUS: fprintf(stderr, "CLS_PLUS"); break; default: fprintf(stderr, "???"); } fprintf(stderr, " %d items on stack\n", sp-old_sp); #endif switch(state) { case 0: if(sp != old_sp) { if(sp == old_sp+15) { f_aggregate(15); push_int64(offs0); apply_svalue(logfun, 2); pop_stack(); } else pop_n_elems(sp-old_sp); } if(cls > CLS_CRLF) { if(cls == CLS_HYPHEN) { push_int(0); state = 2; break; } #ifdef DYNAMIC_BUF buf.s.str = NULL; initialize_buf( &buf ); low_my_putchar( c, &buf ); #else bufpos = 0; BUFSET(c); #endif state=1; } break; case 1: if(cls > CLS_CRLF) { #ifdef DYNAMIC_BUF low_my_putchar( c, &buf ); #else BUFSET(c); #endif break; } #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); /* remotehost */ #else PUSHBUF(); #endif state = (cls == CLS_WSPACE? 2:0); break; case 2: if(cls > CLS_CRLF) { if(cls == CLS_HYPHEN) { push_int(0); state = 4; break; } #ifdef DYNAMIC_BUF buf.s.str = NULL; initialize_buf( &buf ); low_my_putchar( c, &buf ); #else bufpos = 0; BUFSET(c); #endif state=3; } else if(cls == CLS_CRLF) state=0; break; case 3: if(cls > CLS_CRLF) { #ifdef DYNAMIC_BUF low_my_putchar( c, &buf ); #else BUFSET(c); #endif break; } #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); /* rfc931 */ #else PUSHBUF(); /* rfc931 */ #endif state = (cls == CLS_WSPACE? 4:0); break; case 4: if(cls > CLS_CRLF) { if(cls == CLS_HYPHEN) { push_int(0); state = 6; break; } #ifdef DYNAMIC_BUF buf.s.str = NULL; initialize_buf( &buf ); low_my_putchar( c, &buf ); #else bufpos = 0; BUFSET(c); #endif state=5; } else if(cls == CLS_CRLF) state=0; break; case 5: if(cls > CLS_CRLF) { #ifdef DYNAMIC_BUF low_my_putchar( c, &buf ); #else BUFSET(c); #endif break; } #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); /* authuser */ #else PUSHBUF(); /* authuser */ #endif state = (cls == CLS_WSPACE? 6:0); break; case 6: if(cls == CLS_LBRACK) state = 15; else if(cls == CLS_CRLF) state = 0; else if(cls == CLS_HYPHEN) { push_int(0); push_int(0); push_int(0); state = 7; } break; case 7: if(cls == CLS_QUOTE) { #ifdef DYNAMIC_BUF buf.s.str = NULL; initialize_buf( &buf ); #else bufpos = 0; #endif state = 31; } else if(cls == CLS_CRLF) state = 0; else if(cls == CLS_HYPHEN) { push_int(0); push_int(0); push_int(0); state = 10; } break; case 8: if(cls == CLS_QUOTE) state = 9; else if(cls == CLS_CRLF) { #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); #else PUSHBUF(); #endif state = 0; } else #ifdef DYNAMIC_BUF low_my_putchar( c, &buf ); #else BUFSET(c); #endif break; case 9: if(cls > CLS_CRLF) { #ifdef DYNAMIC_BUF low_my_putchar( '"', &buf); low_my_putchar( c, &buf); #else BUFSET('"'); BUFSET(c); #endif state = 8; break; } #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); /* protocol */ #else PUSHBUF(); /* protoocl */ #endif state = (cls == CLS_CRLF? 0 : 10); break; case 10: if(cls == CLS_DIGIT) { v = c&0xf; state = 11; } else if(cls == CLS_CRLF) state = 0; else if(cls == CLS_HYPHEN) { push_int(0); state = 12; } break; case 11: if(cls == CLS_DIGIT) v = v*10+(c&0xf); else if(cls == CLS_WSPACE) { push_int(v); /* status */ state = 12; } else state = 0; break; case 12: if(cls == CLS_DIGIT) { v = c&0xf; state = 13; } else if(cls == CLS_CRLF) state = 0; else if(cls == CLS_HYPHEN) { push_int(0); state = 14; } break; case 13: if(cls == CLS_DIGIT) v = v*10+(c&0xf); else { push_int(v); /* bytes */ state = (cls == CLS_CRLF? 0:14); } break; case 14: if(cls == CLS_CRLF) state = 0; break; case 15: if(cls == CLS_DIGIT) { dd = c&0xf; state = 16; } else state = (cls == CLS_CRLF? 0:14); break; case 16: /* getting day */ if(cls == CLS_DIGIT) dd = dd*10+(c&0xf); else if(cls == CLS_SLASH) state = 17; else state = (cls == CLS_CRLF? 0:14); break; case 17: if(cls == CLS_DIGIT) { mm = c&0xf; state = 18; } else if(cls == CLS_TOKEN) { mm = c|0x20; state = 21; } else state = (cls == CLS_CRLF? 0:14); break; case 18: /* getting numeric month */ if(cls == CLS_DIGIT) mm = mm*10+(c&0xf); else if(cls == CLS_SLASH) state = 19; else state = (cls == CLS_CRLF? 0:14); break; case 19: if(cls == CLS_DIGIT) { yy = c&0xf; state = 20; } else state = (cls == CLS_CRLF? 0:14); break; case 20: /* getting year */ if(cls == CLS_DIGIT) yy = yy*10+(c&0xf); else if(cls == CLS_COLON) state = 22; else state = (cls == CLS_CRLF? 0:14); break; case 21: /* getting textual month */ if(cls == CLS_TOKEN) mm = (mm<<8)|c|0x20; else if(cls == CLS_SLASH) { state = 19; switch(mm) { case ('j'<<16)|('a'<<8)|'n': mm=1; break; case ('f'<<16)|('e'<<8)|'b': mm=2; break; case ('m'<<16)|('a'<<8)|'r': mm=3; break; case ('a'<<16)|('p'<<8)|'r': mm=4; break; case ('m'<<16)|('a'<<8)|'y': mm=5; break; case ('j'<<16)|('u'<<8)|'n': mm=6; break; case ('j'<<16)|('u'<<8)|'l': mm=7; break; case ('a'<<16)|('u'<<8)|'g': mm=8; break; case ('s'<<16)|('e'<<8)|'p': mm=9; break; case ('o'<<16)|('c'<<8)|'t': mm=10; break; case ('n'<<16)|('o'<<8)|'v': mm=11; break; case ('d'<<16)|('e'<<8)|'c': mm=12; break; default: state = 14; } } break; case 22: if(cls == CLS_DIGIT) { h = c&0xf; state = 23; } else state = (cls == CLS_CRLF? 0:14); break; case 23: /* getting hour */ if(cls == CLS_DIGIT) h = h*10+(c&0xf); else if(cls == CLS_COLON) state = 24; else state = (cls == CLS_CRLF? 0:14); break; case 24: if(cls == CLS_DIGIT) { m = c&0xf; state = 25; } else state = (cls == CLS_CRLF? 0:14); break; case 25: /* getting minute */ if(cls == CLS_DIGIT) m = m*10+(c&0xf); else if(cls == CLS_COLON) state = 26; else state = (cls == CLS_CRLF? 0:14); break; case 26: if(cls == CLS_DIGIT) { s = c&0xf; state = 27; } else state = (cls == CLS_CRLF? 0:14); break; case 27: /* getting second */ if(cls == CLS_DIGIT) s = s*10+(c&0xf); else if(cls == CLS_WSPACE) state = 28; else state = (cls == CLS_CRLF? 0:14); break; case 28: if(cls>=CLS_HYPHEN) { state = 29; tzs = cls!=CLS_PLUS; tz = 0; } else if(cls == CLS_DIGIT) { state = 29; tzs = 0; tz = c&0xf; } else if(cls==CLS_CRLF) state = 0; break; case 29: /* getting timezone */ if(cls == CLS_DIGIT) tz = tz*10+(c&0xf); else { if(tzs) tz = -tz; push_int(yy); push_int(mm); push_int(dd); push_int(h); push_int(m); push_int(s); push_int(tz); if(cls == CLS_RBRACK) state = 7; else state = (cls == CLS_CRLF? 0 : 30); } break; case 30: if(cls == CLS_RBRACK) state = 7; else if(cls == CLS_CRLF) state = 0; break; case 31: if(cls == CLS_QUOTE) { #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); #else PUSHBUF(); #endif push_int(0); push_int(0); state = 10; } else if(cls >= CLS_TOKEN) #ifdef DYNAMIC_BUF low_my_putchar( c, &buf ); #else BUFSET(c); #endif else { #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); /* method */ #else PUSHBUF(); /* method */ #endif state = (cls == CLS_CRLF? 0 : 32); } break; case 32: if(cls == CLS_QUOTE) { push_int(0); push_int(0); state = 10; } else if(cls >= CLS_TOKEN) { #ifdef DYNAMIC_BUF buf.s.str = NULL; initialize_buf( &buf ); low_my_putchar( c, &buf ); #else bufpos = 0; BUFSET(c); #endif state = 33; } else if(cls == CLS_CRLF) state = 0; break; case 33: if(cls == CLS_QUOTE) state = 34; else if(cls == CLS_CRLF) { #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); #else PUSHBUF(); #endif state = 0; } else if(cls == CLS_WSPACE) { #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); /* path */ #else PUSHBUF(); /* path */ #endif state = 35; } else #ifdef DYNAMIC_BUF low_my_putchar( c, &buf ); #else BUFSET(c); #endif break; case 34: if(cls >= CLS_TOKEN) { #ifdef DYNAMIC_BUF low_my_putchar( '"', &buf ); low_my_putchar( c, &buf ); #else BUFSET('"'); BUFSET(c); #endif state = 33; } else if(cls == CLS_CRLF) { #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); #else PUSHBUF(); #endif state = 0; } else { #ifdef DYNAMIC_BUF push_string( low_free_buf( &buf ) ); #else PUSHBUF(); #endif push_text("HTTP/0.9"); state = 10; } break; case 35: if(cls == CLS_QUOTE) { push_text("HTTP/0.9"); state = 10; } else if(cls >= CLS_TOKEN) { #ifdef DYNAMIC_BUF buf.s.str = NULL; initialize_buf( &buf ); low_my_putchar( c, &buf ); #else bufpos = 0; BUFSET(c); #endif state = 8; } else if(cls == CLS_CRLF) state = 0; break; } } }
/*! @decl void output(object obj, int|void start_pos) *! *! Add an output file object. */ static void pipe_output(INT32 args) { struct object *obj; struct output *o; int fd; struct stat s; struct buffer *b; if (args<1 || sp[-args].type != T_OBJECT || !sp[-args].u.object || !sp[-args].u.object->prog) Pike_error("Bad/missing argument 1 to pipe->output().\n"); if (args==2 && sp[1-args].type != T_INT) Pike_error("Bad argument 2 to pipe->output().\n"); if (THIS->fd==-1) /* no buffer */ { /* test if usable as buffer */ apply(sp[-args].u.object,"query_fd",0); if ((sp[-1].type==T_INT) && (fd=sp[-1].u.integer)>=0 && (fstat(fd,&s)==0) && S_ISREG(s.st_mode) && (THIS->fd=fd_dup(fd))!=-1 ) { /* keep the file pointer of the duped fd */ THIS->pos=fd_lseek(fd, 0L, SEEK_CUR); THIS->living_outputs++; while (THIS->firstbuffer) { b=THIS->firstbuffer; THIS->firstbuffer=b->next; fd_lseek(THIS->fd, THIS->pos, SEEK_SET); fd_write(THIS->fd,b->s->str,b->s->len); sbuffers-=b->s->len; nbuffers--; free_string(b->s); free((char *)b); } THIS->lastbuffer=NULL; /* keep the file pointer of the duped fd THIS->pos=0; */ push_int(0); apply(sp[-args-2].u.object,"set_id", 1); pop_n_elems(args+2); /* ... and from apply x 2 */ return; } pop_stack(); /* from apply */ } THIS->living_outputs++; /* add_ref(THISOBJ); */ /* Weird */ /* Allocate a new struct output */ obj=clone_object(output_program,0); o=(struct output *)(obj->storage); o->next=THIS->firstoutput; THIS->firstoutput=obj; noutputs++; o->obj=NULL; add_ref(o->obj=sp[-args].u.object); o->write_offset=find_identifier("write",o->obj->prog); o->set_nonblocking_offset=find_identifier("set_nonblocking",o->obj->prog); o->set_blocking_offset=find_identifier("set_blocking",o->obj->prog); if (o->write_offset<0 || o->set_nonblocking_offset<0 || o->set_blocking_offset<0) { free_object(o->obj); Pike_error("illegal file object%s%s%s\n", ((o->write_offset<0)?"; no write":""), ((o->set_nonblocking_offset<0)?"; no set_nonblocking":""), ((o->set_blocking_offset<0)?"; no set_blocking":"")); } o->mode=O_RUN; /* keep the file pointer of the duped fd o->pos=0; */ /* allow start position as 2nd argument for additional outputs o->pos=THIS->pos; */ if(args>=2) o->pos=sp[1-args].u.integer; else o->pos=THIS->pos; push_object(obj); /* Ok, David, this is probably correct, but I dare you to explain why :) */ apply(o->obj,"set_id",1); pop_stack(); push_int(0); push_callback(offset_output_write_callback); push_callback(offset_output_close_callback); apply_low(o->obj,o->set_nonblocking_offset,3); pop_stack(); pop_n_elems(args-1); }
/*! @decl void input(object obj) *! *! Add an input file to this pipe. */ static void pipe_input(INT32 args) { struct input *i; int fd=-1; /* Per, one less warning to worry about... */ struct object *obj; if (args<1 || sp[-args].type != T_OBJECT) Pike_error("Bad/missing argument 1 to pipe->input().\n"); obj=sp[-args].u.object; if(!obj || !obj->prog) Pike_error("pipe->input() on destructed object.\n"); push_int(0); apply(sp[-args-1].u.object,"set_id", 1); pop_stack(); i=new_input(); #if defined(HAVE_MMAP) && defined(HAVE_MUNMAP) /* We do not handle mmaps if we have a buffer */ if(THIS->fd == -1) { char *m; struct stat s; apply(obj, "query_fd", 0); if(sp[-1].type == T_INT) fd=sp[-1].u.integer; pop_stack(); if (fd != -1 && fstat(fd,&s)==0) { off_t filep=fd_lseek(fd, 0L, SEEK_CUR); /* keep the file pointer */ size_t len = s.st_size - filep; if(S_ISREG(s.st_mode) /* regular file */ && ((m=(char *)mmap(0, len, PROT_READ, MAP_FILE|MAP_SHARED,fd,filep))+1)) { mmapped += len; i->type=I_MMAP; i->len = len; i->u.mmap=m; #if defined(HAVE_MADVISE) && defined(MADV_SEQUENTIAL) /* Mark the pages as sequential read only access... */ madvise(m, len, MADV_SEQUENTIAL); #endif pop_n_elems(args); push_int(0); return; } } } #endif i->u.obj=obj; nobjects++; i->type=I_OBJ; add_ref(i->u.obj); i->set_nonblocking_offset=find_identifier("set_nonblocking",i->u.obj->prog); i->set_blocking_offset=find_identifier("set_blocking",i->u.obj->prog); if (i->set_nonblocking_offset<0 || i->set_blocking_offset<0) { if (find_identifier("read", i->u.obj->prog) < 0) { /* Not even a read function */ free_object(i->u.obj); i->u.obj=NULL; i->type=I_NONE; nobjects--; Pike_error("illegal file object%s%s\n", ((i->set_nonblocking_offset<0)?"; no set_nonblocking":""), ((i->set_blocking_offset<0)?"; no set_blocking":"")); } else { /* Try blocking mode */ i->type = I_BLOCKING_OBJ; if (i==THIS->firstinput) { /* * FIXME: What if read_som_data() returns 0? */ read_some_data(); } return; } } if (i==THIS->firstinput) { push_callback(offset_input_read_callback); push_int(0); push_callback(offset_input_close_callback); apply_low(i->u.obj,i->set_nonblocking_offset,3); pop_stack(); } else { /* DOESN'T WORK!!! */ push_int(0); push_int(0); push_callback(offset_input_close_callback); apply_low(i->u.obj,i->set_nonblocking_offset,3); pop_stack(); } pop_n_elems(args); push_int(0); }
/* This function reads some data from the file cache.. * Called when we want some data to send. */ static INLINE struct pike_string* gimme_some_data(size_t pos) { struct buffer *b; ptrdiff_t len; struct pipe *this = THIS; /* We have a file cache, read from it */ if (this->fd!=-1) { char buffer[READ_BUFFER_SIZE]; if (this->pos<=pos) return NULL; /* no data */ len=this->pos-pos; if (len>READ_BUFFER_SIZE) len=READ_BUFFER_SIZE; THREADS_ALLOW(); fd_lseek(this->fd, pos, SEEK_SET); THREADS_DISALLOW(); do { THREADS_ALLOW(); len = fd_read(this->fd, buffer, len); THREADS_DISALLOW(); if (len < 0) { if (errno != EINTR) { return(NULL); } check_threads_etc(); } } while(len < 0); /* * FIXME: What if len is 0? */ return make_shared_binary_string(buffer,len); } if (pos<this->pos) return make_shared_string("buffer underflow"); /* shit */ /* We want something in the next buffer */ while (this->firstbuffer && pos>=this->pos+this->firstbuffer->s->len) { /* Free the first buffer, and update THIS->pos */ b=this->firstbuffer; this->pos+=b->s->len; this->bytes_in_buffer-=b->s->len; this->firstbuffer=b->next; if (!b->next) this->lastbuffer=NULL; sbuffers-=b->s->len; nbuffers--; free_string(b->s); free((char *)b); /* Wake up first input if it was sleeping and we * have room for more in the buffer. */ if (this->sleeping && this->firstinput && this->bytes_in_buffer<MAX_BYTES_IN_BUFFER) { if (this->firstinput->type == I_BLOCKING_OBJ) { if (!read_some_data()) { this->sleeping = 0; input_finish(); } } else { this->sleeping=0; push_callback(offset_input_read_callback); push_int(0); push_callback(offset_input_close_callback); apply(this->firstinput->u.obj, "set_nonblocking", 3); pop_stack(); } } } while (!this->firstbuffer) { if (this->firstinput) { #if defined(HAVE_MMAP) && defined(HAVE_MUNMAP) if (this->firstinput->type==I_MMAP) { char *src; struct pike_string *tmp; if (pos >= this->firstinput->len + this->pos) /* end of mmap */ { this->pos += this->firstinput->len; input_finish(); continue; } len = this->firstinput->len + this->pos - pos; if (len > READ_BUFFER_SIZE) len=READ_BUFFER_SIZE; tmp = begin_shared_string( len ); src = this->firstinput->u.mmap + pos - this->pos; /* This thread_allow/deny is at the cost of one extra memory copy */ THREADS_ALLOW(); MEMCPY(tmp->str, src, len); THREADS_DISALLOW(); return end_shared_string(tmp); } else #endif if (this->firstinput->type!=I_OBJ) { /* FIXME: What about I_BLOCKING_OBJ? */ input_finish(); /* shouldn't be anything else ... maybe a finished object */ } } return NULL; /* no data */ } if (pos==this->pos) { add_ref(this->firstbuffer->s); return this->firstbuffer->s; } return make_shared_binary_string(this->firstbuffer->s->str+ pos-this->pos, this->firstbuffer->s->len- pos+this->pos); }