Пример #1
0
/* Allocate a new buffer and put it at the end of the chain of buffers
 * scheduled for output. Return 1 if we have more bytes in buffers
 * than allowed afterwards.
 */
static INLINE int append_buffer(struct pike_string *s)
   /* 1=buffer full */
{
   struct buffer *b;

   debug_malloc_touch(s);

   if(THIS->fd!= -1)
   {
     fd_lseek(THIS->fd, THIS->pos, SEEK_SET);
     fd_write(THIS->fd, s->str, s->len);
     THIS->pos+=s->len;
     return 0;
   }
   else
   {
     nbuffers++;
     b=ALLOC_STRUCT(buffer);
     b->next=NULL;
     b->s=s;
     sbuffers += s->len;
     add_ref(s);

     if (THIS->lastbuffer)
       THIS->lastbuffer->next=b;
     else
       THIS->firstbuffer=b;

     THIS->lastbuffer=b;

     THIS->bytes_in_buffer+=s->len;
   }
   return THIS->bytes_in_buffer > MAX_BYTES_IN_BUFFER;
}
Пример #2
0
static void f_ultraparse( INT32 args )
{
  FD f = -1;
  int lines=0, cls, c=0, my_fd=1, tzs=0, state=0, next;
  unsigned char *char_pointer=0;
  /* array with offsets for fields in the string buffer */
  int buf_points[16];
  INT32 v=0, offs0=0, len=0, bytes=0, gotdate=0;
  INT32 last_hour=0, last_date=0, last_year=0, last_month=0,
    this_date=0, broken_lines=0, tmpinteger=0, field_position=0;
  time_t start;
  unsigned char *read_buf;
  struct svalue *statfun, *daily, *pagexts=0, *file,  *refsval, *log_format;
  unsigned char *buf;
  char *field_buf;
#ifdef BROKEN_LINE_DEBUG
  INT32 broken_line_pos=0;
  unsigned char *broken_line;
#endif
  INT32 *state_list, *save_field_num, *field_endings, num_states;

  char *notref = 0;
  INT32 state_pos=0, bufpos=0, i, fieldnum=0;
  struct pike_string *url_str = 0,  *ref_str = 0, *rfc_str = 0, *hst_str = 0, *tmpagent = 0;
  struct svalue *url_sval;
  ONERROR unwind_protect;
  unsigned INT32 hits_per_hour[24];
  unsigned INT32 hosts_per_hour[24];
  unsigned INT32 pages_per_hour[24];
  unsigned INT32 sessions_per_hour[24];
  double kb_per_hour[24];
  unsigned INT32 session_length[24];
  /*  struct mapping *unique_per_hour  = allocate_mapping(1);*/
  struct mapping *hits_per_error  = allocate_mapping(10);
  struct mapping *error_urls      = allocate_mapping(10);
  struct mapping *error_refs      = allocate_mapping(10);
  struct mapping *user_agents     = allocate_mapping(10);
  struct mapping *directories     = allocate_mapping(20);
  struct mapping *referrers       = allocate_mapping(1);
  struct mapping *refsites        = allocate_mapping(1);
  struct mapping *referredto      = allocate_mapping(1);
  struct mapping *pages           = allocate_mapping(1);
  struct mapping *hosts           = allocate_mapping(1);
  struct mapping *hits            = allocate_mapping(1);
  struct mapping *session_start   = allocate_mapping(1);
  struct mapping *session_end     = allocate_mapping(1);
  struct mapping *hits20x    	  = allocate_mapping(300);
  struct mapping *hits302    	  = allocate_mapping(2);
  struct mapping *sites    	  = allocate_mapping(1);
  struct mapping *domains    	  = allocate_mapping(1);
  struct mapping *topdomains   	  = allocate_mapping(1);
  struct mapping *tmpdest = NULL;
  /*  struct mapping *hits30x     = allocate_mapping(2);*/
  
  if(args>6 && sp[-1].type == T_INT) {
    offs0 = sp[-1].u.integer;
    pop_n_elems(1);
    --args;
  }
  if(args>5 && sp[-1].type == T_STRING) {
    notref = sp[-1].u.string->str;
    pop_n_elems(1);
    --args;
  }
  lmu = 0;
  get_all_args("UltraLog.ultraparse", args, "%*%*%*%*%*", &log_format, &statfun, &daily, &file,
	       &pagexts);
  if(log_format->type != T_STRING) 
    Pike_error("Bad argument 1 to Ultraparse.ultraparse, expected string.\n");
  if(statfun->type != T_FUNCTION)  
    Pike_error("Bad argument 2 to Ultraparse.ultraparse, expected function.\n");
  if(daily->type != T_FUNCTION)    
    Pike_error("Bad argument 3 to Ultraparse.ultraparse, expected function.\n");
  if(pagexts->type != T_MULTISET)  
    Pike_error("Bad argument 5 to Ultraparse.ultraparse, expected multiset.\n");
  
  if(file->type == T_OBJECT)
  {
    f = fd_from_object(file->u.object);
    
    if(f == -1)
      Pike_error("UltraLog.ultraparse: File is not open.\n");
    my_fd = 0;
  } else if(file->type == T_STRING &&
	    file->u.string->size_shift == 0) {
    do {
      f=fd_open(file->u.string->str, fd_RDONLY, 0);
    } while(f < 0 && errno == EINTR);
    
    if(errno < 0)
      Pike_error("UltraLog.ultraparse(): Failed to open file for reading (errno=%d).\n",
	    errno);
  } else 
    Pike_error("Bad argument 4 to UltraLog.ultraparse, expected string or object .\n");

  state_list = malloc((log_format->u.string->len +3) * sizeof(INT32));
  save_field_num = malloc((log_format->u.string->len +3) * sizeof(INT32));
  field_endings = malloc((log_format->u.string->len +3) * sizeof(INT32));

  num_states = parse_log_format(log_format->u.string, state_list, field_endings, save_field_num);
  if(num_states < 1)
  {
    free(state_list);
    free(save_field_num);
    free(field_endings);
    Pike_error("UltraLog.ultraparse(): Failed to parse log format.\n");
  }
  
  fd_lseek(f, offs0, SEEK_SET);
  read_buf = malloc(READ_BLOCK_SIZE+1);
  buf = malloc(MAX_LINE_LEN+2);
#ifdef BROKEN_LINE_DEBUG
  broken_line = malloc(MAX_LINE_LEN*10);
#endif
  MEMSET(hits_per_hour, 0, sizeof(hits_per_hour));
  MEMSET(hosts_per_hour, 0, sizeof(hosts_per_hour));
  MEMSET(session_length, 0, sizeof(session_length));
  MEMSET(pages_per_hour, 0, sizeof(pages_per_hour));
  MEMSET(sessions_per_hour, 0, sizeof(sessions_per_hour));
  MEMSET(kb_per_hour, 0, sizeof(kb_per_hour));

  /*url_sval.u.type = TYPE_STRING;*/
  BUFSET(0);
  field_position = bufpos;
  buf_points[0] = buf_points[1] = buf_points[2] = buf_points[3] = 
    buf_points[4] = buf_points[5] = buf_points[6] = buf_points[7] = 
    buf_points[8] = buf_points[9] = buf_points[10] = buf_points[11] = 
    buf_points[12] = buf_points[13] = buf_points[14] = buf_points[15] = 0;
  while(1) {
    /*    THREADS_ALLOW();*/
    do {
      len = fd_read(f, read_buf, READ_BLOCK_SIZE);
    } while(len < 0 && errno == EINTR);
    /*    THREADS_DISALLOW();*/
    if(len <= 0)  break; /* nothing more to read or error. */
    offs0 += len;
    char_pointer = read_buf+len - 1;
    while(len--) {
      c = char_pointer[-len]; 
      cls = char_class[c];
#if 0
      fprintf(stdout, "DFA(%d:%d): '%c' (%d) ", state, state_pos, c, (int)c);
      switch(cls) {
       case CLS_WSPACE: fprintf(stdout, "CLS_WSPACE\n"); break;
       case CLS_CRLF: fprintf(stdout, "CLS_CRLF\n"); break;
       case CLS_TOKEN: fprintf(stdout, "CLS_TOKEN\n"); break;
       case CLS_DIGIT: fprintf(stdout, "CLS_DIGIT\n"); break;
       case CLS_QUOTE: fprintf(stdout, "CLS_QUOTE\n"); break;
       case CLS_LBRACK: fprintf(stdout, "CLS_LBRACK\n"); break;
       case CLS_RBRACK: fprintf(stdout, "CLS_RBRACK\n"); break;
       case CLS_SLASH: fprintf(stdout, "CLS_SLASH\n"); break;
       case CLS_COLON: fprintf(stdout, "CLS_COLON\n"); break;
       case CLS_HYPHEN: fprintf(stdout, "CLS_HYPHEN/CLS_MINUS\n"); break;
       case CLS_PLUS: fprintf(stdout, "CLS_PLUS\n"); break;
       default: fprintf(stdout, "??? %d ???\n", cls);
      }
#endif
#ifdef BROKEN_LINE_DEBUG
      broken_line[broken_line_pos++] = c;
#endif
      if(cls == field_endings[state_pos]) {
	/* Field is done. Nullify. */
      process_field:
	/*	printf("Processing field %d of %d\n", state_pos, num_states);*/
	switch(save_field_num[state_pos]) {
	 case DATE:
	 case HOUR:
	 case MINUTE:
	 case UP_SEC:
	 case CODE:
	   /*	  BUFSET(0);*/
	  tmpinteger = 0;
	  for(v = field_position; v < bufpos; v++) {
	    if(char_class[buf[v]] == CLS_DIGIT)
	      tmpinteger = tmpinteger*10 + (buf[v]&0xf);
	    else {
	      goto skip;
	      
	    }
	  }
	  BUFPOINT = tmpinteger;
	  break;
	  
	 case YEAR:
	  tmpinteger = 0;
	  for(v = field_position; v < bufpos; v++) {
	    if(char_class[buf[v]] == CLS_DIGIT)
	      tmpinteger = tmpinteger*10 + (buf[v]&0xf);
	    else {
	      goto skip;
	    }
	  }
	  if(tmpinteger < 100) {
	    if(tmpinteger < 60)
	      tmpinteger += 2000;
	    else
	      tmpinteger += 1900;
	  }
	  BUFPOINT = tmpinteger;	  
	  break;

	 case BYTES:
	  v = field_position;
	  switch(char_class[buf[v++]]) {
	   case CLS_QUESTION:
	   case CLS_HYPHEN:
	    if(v == bufpos)
	      tmpinteger = 0;
	    else {
	      goto skip;
	    }
	    break;
	   case CLS_DIGIT:
	    tmpinteger = (buf[field_position]&0xf);
	    for(; v < bufpos; v++) {
	      if(char_class[buf[v]] == CLS_DIGIT)
		tmpinteger = tmpinteger*10 + (buf[v]&0xf);
	      else {
		goto skip;
	      }		
	    }
	    /*	    printf("Digit: %d\n", tmpinteger);*/
	    break;
	   default:
	    goto skip;
	  }
	  BUFPOINT = tmpinteger;
	  /*	  bufpos++;*/
	  break;	  
	 case MONTH:
	  /* Month */
	  /*	  BUFSET(0);*/
	  /*	  field_buf = buf + field_positions[state_pos];*/
	  switch(bufpos - field_position)
	  {
	   case 2:
	    tmpinteger = 0;
	    for(v = field_position; v < bufpos; v++) {
	      if(char_class[buf[v]] == CLS_DIGIT)
		tmpinteger = tmpinteger*10 + (buf[v]&0xf);
	      else {
		goto skip;
	      }
	    }
	    break;

	   case 3:
	    switch(((buf[field_position]|0x20)<<16)|((buf[field_position+1]|0x20)<<8)|
		   (buf[field_position+2]|0x20))
	    {
	     case ('j'<<16)|('a'<<8)|'n': tmpinteger = 1;   break;
	     case ('f'<<16)|('e'<<8)|'b': tmpinteger = 2;   break;
	     case ('m'<<16)|('a'<<8)|'r': tmpinteger = 3;   break;
	     case ('a'<<16)|('p'<<8)|'r': tmpinteger = 4;   break;
	     case ('m'<<16)|('a'<<8)|'y': tmpinteger = 5;   break;
	     case ('j'<<16)|('u'<<8)|'n': tmpinteger = 6;   break;
	     case ('j'<<16)|('u'<<8)|'l': tmpinteger = 7;   break;
	     case ('a'<<16)|('u'<<8)|'g': tmpinteger = 8;   break;
	     case ('s'<<16)|('e'<<8)|'p': tmpinteger = 9;   break;
	     case ('o'<<16)|('c'<<8)|'t': tmpinteger = 10;  break;
	     case ('n'<<16)|('o'<<8)|'v': tmpinteger = 11;  break;
	     case ('d'<<16)|('e'<<8)|'c': tmpinteger = 12;  break;
	    }
	    break;

	   default:
	    goto skip;
	  }
	  /*printf("Month: %0d\n", mm);*/

	  if(tmpinteger < 1 || tmpinteger > 12)
	    goto skip; /* Broken Month */
	  BUFPOINT = tmpinteger;
	  /*	  bufpos++;*/
	  break;
	  
	 case ADDR:
	 case REFER:
	 case AGENT:
	 case TZ:
	 case METHOD:
	 case URL:
	 case RFC:
	 case PROTO:
	  BUFSET(0);
	  SETPOINT();
	  /*	  printf("Field %d, pos %d, %s\n", save_field_num[state_pos],BUFPOINT,*/
	  /*		 buf + BUFPOINT);	 */
	  break;
	  
	}	  
	state_pos++;
	field_position = bufpos;
	if(cls != CLS_CRLF)		  
	  continue;
      } else if(cls != CLS_CRLF) {
	BUFSET(c);
	continue;
      } else {
	/*	printf("Processing last field (%d).\n", state_pos);*/
	goto process_field; /* End of line - process what we got */
      }
      /*	printf("%d %d\n", state_pos, num_states);*/
      /*      buf_points[8] = buf_points[9] = buf_points[10] = buf_points[11] = buf;*/
      /*      buf_points[12] = buf_points[13] = buf_points[14] = buf_points[15] = buf;*/
#if 0
      if(!((lines+broken_lines)%100000)) {
	push_int(lines+broken_lines);
	push_int((int)((float)offs0/1024.0/1024.0));
	apply_svalue(statfun, 2);
	pop_stack();
	/*printf("%5dk lines, %5d MB\n", lines/1000, (int)((float)offs0/1024.0/1024.0));*/
      }
#endif
      if(state_pos < num_states)
      {
#ifdef BROKEN_LINE_DEBUG
	broken_line[broken_line_pos] = 0;
	printf("too few states (pos=%d): %s\n", state_pos, broken_line);
#endif
	broken_lines++;
	goto ok;
      }
      
#define yy 	buf_points[YEAR] 
#define mm 	buf_points[MONTH] 
#define dd 	buf_points[DATE] 
#define h  	buf_points[HOUR] 
#define m  	buf_points[MINUTE] 
#define s  	buf_points[UP_SEC] 
#define v  	buf_points[CODE] 
#define bytes	buf_points[BYTES] 

      this_date = (yy*10000) + (mm*100) + dd;
      if(!this_date) {
	broken_lines++;
	goto ok;
      }
#if 1
      if(!last_date) { /* First loop w/o a value.*/
	last_date = this_date;
	last_hour = h;
      } else {
	if(last_hour != h ||
	   last_date != this_date)
	{
	  pages_per_hour[last_hour] +=
	    hourly_page_hits(hits20x, pages, hits, pagexts->u.multiset, 200);
	  /*	    pages_per_hour[last_hour] +=*/
	  /*	      hourly_page_hits(hits304, pages, hits, pagexts->u.multiset, 300);*/
	  
	  /*	    printf("%5d %5d for %d %02d:00\n",*/
	  /*		   pages_per_hour[last_hour], hits_per_hour[last_hour],*/
	  /*last_date, last_hour);*/
	  if(m_sizeof(session_start)) {
	    summarize_sessions(last_hour, sessions_per_hour,
			       session_length, session_start, session_end);
	    free_mapping(session_start); 
	    free_mapping(session_end); 
	    session_start = allocate_mapping(1);
	    session_end   = allocate_mapping(1);
	  }
	  hosts_per_hour[last_hour] += m_sizeof(sites);
	  do_map_addition(hosts, sites);
	  free_mapping(sites);
	  sites = allocate_mapping(100);
	  last_hour = h;
	  free_mapping(hits20x); /* Reset this one */
	  /*	    free_mapping(hits304);  Reset this one */
	  /*	    hits304   = allocate_mapping(2);*/
	  hits20x   = allocate_mapping(2);
	}
#if 1
	if(last_date != this_date) {
	  /*	  printf("%d   %d\n", last_date, this_date);*/
	  tmpdest = allocate_mapping(1);
	  summarize_refsites(refsites, referrers, tmpdest);
	  free_mapping(referrers);
	  referrers = tmpdest;

	  tmpdest = allocate_mapping(1);
	  clean_refto(referredto, tmpdest, pagexts->u.multiset);
	  free_mapping(referredto);
	  referredto = tmpdest;
	  
	  summarize_directories(directories, pages);
	  summarize_directories(directories, hits);

	  tmpdest = allocate_mapping(1);
	  http_decode_mapping(user_agents, tmpdest);
	  free_mapping(user_agents);
	  user_agents = tmpdest;

	  tmpdest = allocate_mapping(1);
	  summarize_hosts(hosts, domains, topdomains, tmpdest);
	  free_mapping(hosts);
	  hosts = tmpdest;
#if 1
	  push_int(last_date / 10000);
	  push_int((last_date % 10000)/100);
	  push_int((last_date % 10000)%100);
	  push_mapping(pages);
	  push_mapping(hits);
	  push_mapping(hits302);
	  push_mapping(hits_per_error);
	  push_mapping(error_urls);
	  push_mapping(error_refs);
	  push_mapping(referredto);
	  push_mapping(refsites); 
	  push_mapping(referrers); 
	  push_mapping(directories); 
	  push_mapping(user_agents); 
	  push_mapping(hosts); 
	  push_mapping(domains); 
	  push_mapping(topdomains); 
	  for(i = 0; i < 24; i++) {
	    push_int(sessions_per_hour[i]);
	  }
	  f_aggregate(24);
	  for(i = 0; i < 24; i++) {
	    push_int(hits_per_hour[i]);
	    hits_per_hour[i] = 0;
	  }
	  f_aggregate(24);
	  for(i = 0; i < 24; i++) {
	    push_int(pages_per_hour[i]);
	    pages_per_hour[i] = 0;
	  }
	  f_aggregate(24);
	  for(i = 0; i < 24; i++) {
	    /* KB per hour.*/
	    push_float(kb_per_hour[i]);
	    kb_per_hour[i] = 0.0;
	  }
	  f_aggregate(24);
	  for(i = 0; i < 24; i++) {
	    push_float(sessions_per_hour[i] ?
		       ((float)session_length[i] /
			(float)sessions_per_hour[i]) / 60.0 : 0.0);
	    sessions_per_hour[i] = 0;
	    session_length[i] = 0;
	  }
	  f_aggregate(24);
	  for(i = 0; i < 24; i++) {
	    push_int(hosts_per_hour[i]);
	    hosts_per_hour[i] = 0;
	  }
	  f_aggregate(24);
	  apply_svalue(daily, 23);
	  pop_stack();
#else
	  free_mapping(error_refs);
	  free_mapping(referredto); 
	  free_mapping(refsites); 
	  free_mapping(directories); 
	  free_mapping(error_urls);
	  free_mapping(hits);
	  free_mapping(hits_per_error);
	  free_mapping(pages);
	  free_mapping(hosts);
	  free_mapping(domains);
	  free_mapping(topdomains);
	  free_mapping(referrers); 
	  free_mapping(hits302);
#endif
	  user_agents 	 = allocate_mapping(10);
	  hits302 	 = allocate_mapping(1);
	  hits_per_error = allocate_mapping(10);
	  error_urls     = allocate_mapping(10);
	  error_refs     = allocate_mapping(10);
	  directories    = allocate_mapping(20);
	  referrers      = allocate_mapping(1);
	  referredto     = allocate_mapping(1);
	  refsites       = allocate_mapping(1);
	  pages  	 = allocate_mapping(1);
	  hits 	         = allocate_mapping(1);
	  sites	         = allocate_mapping(1);
	  hosts	         = allocate_mapping(1);
	  domains	 = allocate_mapping(1);
	  topdomains     = allocate_mapping(1);
	  last_date = this_date;
	}
#endif
      }
#endif
#if 1
      process_session(buf+buf_points[ADDR], h*3600+m*60+s, h, 
		      sessions_per_hour, session_length, session_start,
		      session_end, sites);
      url_str = make_shared_binary_string((char *)(buf + buf_points[URL]),
					  strlen((char *)(buf + buf_points[URL])));
#if 1
      switch(v) {
      /* Do error-code specific logging. Error urls that are
	   specially treated do not include auth required, service
	   unavailable etc. They are only included in the return
	   code summary.
	*/
       case 200: case 201: case 202: case 203: 
       case 204: case 205: case 206: case 207:
       case 304:
	mapaddstr(hits20x, url_str);
	DO_REFERRER();
	break;

       case 300: case 301: case 302:
       case 303: case 305:
	mapaddstr(hits302, url_str);
	DO_REFERRER();
	break;

       case 400: case 404: case 405: case 406: case 408:
       case 409: case 410: case 411: case 412: case 413:
       case 414: case 415: case 416: case 500: case 501:
	DO_ERREF();
	map2addint(error_urls, v, url_str);
	break;
      }
      /*rfc_str = http_decode_string(buf + buf_points[RFC]);*/
      /*hst_str = make_shared_binary_string(buf, strlen(buf));*/
#endif	
      free_string(url_str);
      mapaddint(hits_per_error, v);
      kb_per_hour[h] += (float)bytes / 1024.0;
      hits_per_hour[h]++;
      /*#endif*/
      if(strlen((char *)(buf + buf_points[AGENT]))>1) {
	/* Got User Agent */
	tmpagent = make_shared_string((char *)(buf + buf_points[AGENT]));
	mapaddstr(user_agents, tmpagent);
	free_string(tmpagent);
      }
#endif
      lines++;
#if 0
      printf("%s  %s  %s\n%s  %s  %s\n%04d-%02d-%02d  %02d:%02d:%02d  \n%d   %d\n",
	     buf + buf_points[ADDR], buf + buf_points[REFER], buf + buf_points[ RFC ],
	     buf + buf_points[METHOD], buf + buf_points[ URL ], buf + buf_points[PROTO],
	     yy, mm, dd, h, m, s, v, bytes);
      /*      if(lines > 10)
	      exit(0);*/
#endif
    ok:
      gotdate = /* v = bytes =h = m = s = tz = tzs = dd = mm = yy =  */
	buf_points[0] = buf_points[1] = buf_points[2] = buf_points[3] = 
	buf_points[4] = buf_points[5] = buf_points[6] = buf_points[7] = 
	/*buf_points[8] = buf_points[9] = buf_points[10] =*/
	buf_points[11] = 
	buf_points[12] = buf_points[13] = buf_points[14] = buf_points[15] = 
	bufpos = state_pos = 0;
      field_position = 1;
#ifdef BROKEN_LINE_DEBUG
      broken_line_pos = 0;
#endif
      BUFSET(0);
      
    }    
  }  
 cleanup:
  free(save_field_num);
  free(state_list);
  free(field_endings);
  free(buf);
  push_int(lines);
  push_int((int)((float)offs0 / 1024.0/1024.0));
  push_int(1);
  apply_svalue(statfun, 3);
  pop_stack();
  free(read_buf);
#ifdef BROKEN_LINE_DEBUG
  free(broken_line);
#endif
  if(my_fd)
    /* If my_fd == 0, the second argument was an object and thus we don't
     * want to free it.
     */
    fd_close(f);
  /*  push_int(offs0);  */
  /*  printf("Done: %d %d %d ", yy, mm, dd);*/
  if(yy && mm && dd) { 
    /*    printf("\nLast Summary for %d-%02d-%02d %02d:%02d\n", yy, mm, dd, h, m);*/
    pages_per_hour[last_hour] += 
      hourly_page_hits(hits20x, pages, hits, pagexts->u.multiset, 200);
    if(m_sizeof(session_start)) {
      summarize_sessions(last_hour, sessions_per_hour,
			 session_length, session_start, session_end);
    }
    hosts_per_hour[last_hour] += m_sizeof(sites);
    do_map_addition(hosts, sites);
    free_mapping(sites);
	  
    tmpdest = allocate_mapping(1);
    summarize_refsites(refsites, referrers, tmpdest);
    free_mapping(referrers);
    referrers = tmpdest;
    summarize_directories(directories, pages);
    summarize_directories(directories, hits);
    tmpdest = allocate_mapping(1);
    clean_refto(referredto, tmpdest, pagexts->u.multiset);
    free_mapping(referredto);
    referredto = tmpdest;

    tmpdest = allocate_mapping(1);
    http_decode_mapping(user_agents, tmpdest);
    free_mapping(user_agents);
    user_agents = tmpdest;

    tmpdest = allocate_mapping(1);
    summarize_hosts(hosts, domains, topdomains, tmpdest);
    free_mapping(hosts);
    hosts = tmpdest;

    push_int(yy);
    push_int(mm);
    push_int(dd);
    push_mapping(pages);
    push_mapping(hits);
    push_mapping(hits302);
    push_mapping(hits_per_error);
    push_mapping(error_urls);
    push_mapping(error_refs);
    push_mapping(referredto); 
    push_mapping(refsites); 
    push_mapping(referrers); 
    push_mapping(directories); 
    push_mapping(user_agents); 
    push_mapping(hosts); 
    push_mapping(domains); 
    push_mapping(topdomains); 

    for(i = 0; i < 24; i++) {  push_int(sessions_per_hour[i]);  }
    f_aggregate(24);

    for(i = 0; i < 24; i++) {  push_int(hits_per_hour[i]);      }
    f_aggregate(24);

    for(i = 0; i < 24; i++) {  push_int(pages_per_hour[i]);     }
    f_aggregate(24);
    
    for(i = 0; i < 24; i++) {  push_float(kb_per_hour[i]);      }
    f_aggregate(24);

    for(i = 0; i < 24; i++) {
      push_float(sessions_per_hour[i] ?
		 ((float)session_length[i] /
		  (float)sessions_per_hour[i]) / 60.0 : 0.0);
    }
    f_aggregate(24);

    for(i = 0; i < 24; i++) {
      push_int(hosts_per_hour[i]);
      hosts_per_hour[i] = 0;
    }
    f_aggregate(24);

    apply_svalue(daily, 23);
    pop_stack();
  } else {
    free_mapping(error_refs);
    free_mapping(referredto); 
    free_mapping(refsites); 
    free_mapping(directories); 
    free_mapping(error_urls);
    free_mapping(hits);
    free_mapping(hits_per_error);
    free_mapping(pages);
    free_mapping(referrers); 
    free_mapping(hits302); 
    free_mapping(user_agents); 
    free_mapping(hosts);
    free_mapping(domains);
    free_mapping(topdomains);
  }
  free_mapping(hits20x); 
  free_mapping(session_start); 
  free_mapping(session_end); 
  /*  free_mapping(hits30x); */
  printf("\nTotal lines: %d, broken lines: %d, mapping lookups: %d\n\n", lines,
	 broken_lines, lmu);
  fflush(stdout);
  pop_n_elems(args);  
  push_int(offs0);
  return; 
      
 skip:
  broken_lines++;
  while(1) 
  {
    while(len--) {
#ifdef BROKEN_LINE_DEBUG
      broken_line[broken_line_pos] = char_pointer[-len];
#endif
      if(char_class[char_pointer[-len]] == CLS_CRLF) {
#ifdef BROKEN_LINE_DEBUG
	broken_line[broken_line_pos] = 0;
	printf("Broken Line (pos=%d): %s\n", state_pos, broken_line);
#endif
	goto ok;
      }
    }
    do {
      len = fd_read(f, read_buf, READ_BLOCK_SIZE);
    } while(len < 0 && errno == EINTR);
    if(len <= 0)
      break; /* nothing more to read. */
    offs0 += len;
    char_pointer = read_buf+len - 1;
  }
  goto cleanup;
}
Пример #3
0
static void f_read( INT32 args )
{
  char *read_buf;
  struct svalue *logfun, *file;
  FD f = -1;
  int cls, c, my_fd=1, state=0, tzs=0;
  char *char_pointer;
  INT32 v=0, yy=0, mm=0, dd=0, h=0, m=0, s=0, tz=0;
  ptrdiff_t offs0=0, len=0;
  struct svalue *old_sp;
  /* #define DYNAMIC_BUF */
#ifdef DYNAMIC_BUF
  dynamic_buffer buf;
#else
#define BUFSET(X) do { if(bufpos == bufsize) { bufsize *= 2; buf = realloc(buf, bufsize+1); } buf[bufpos++] = c; } while(0)
#define PUSHBUF() do { push_string( make_shared_binary_string( buf,bufpos ) ); bufpos=0; } while(0)
  char *buf;
  int bufsize=CLF_BLOCK_SIZE, bufpos=0;
#endif

  if(args>2 && sp[-1].type == T_INT) {
    offs0 = sp[-1].u.integer;
    pop_n_elems(1);
    --args;
  }
  old_sp = sp;

  get_all_args("CommonLog.read", args, "%*%*", &logfun, &file);
  if(logfun->type != T_FUNCTION)
    SIMPLE_BAD_ARG_ERROR("CommonLog.read", 1, "function");

  if(file->type == T_OBJECT)
  {
    f = fd_from_object(file->u.object);
    
    if(f == -1)
      Pike_error("CommonLog.read: File is not open.\n");
    my_fd = 0;
  } else if(file->type == T_STRING &&
	    file->u.string->size_shift == 0) {
#ifdef PIKE_SECURITY
      if(!CHECK_SECURITY(SECURITY_BIT_SECURITY))
      {
	if(!CHECK_SECURITY(SECURITY_BIT_CONDITIONAL_IO))
	  Pike_error("Permission denied.\n");
	push_text("read");
	push_int(0);
	ref_push_string(file->u.string);
	push_text("r");
	push_int(00666);

	safe_apply(OBJ2CREDS(CURRENT_CREDS)->user,"valid_open",5);
	switch(Pike_sp[-1].type)
	{
	case PIKE_T_INT:
	  switch(Pike_sp[-1].u.integer)
	  {
	  case 0: /* return 0 */
	    errno=EPERM;
	    Pike_error("CommonLog.read(): Failed to open file for reading (errno=%d).\n",
		       errno);

	  case 2: /* ok */
	    pop_stack();
	    break;

	  case 3: /* permission denied */
	    Pike_error("CommonLog.read: permission denied.\n");

	  default:
	    Pike_error("Error in user->valid_open, wrong return value.\n");
	  }
	  break;

	default:
	  Pike_error("Error in user->valid_open, wrong return type.\n");

	case PIKE_T_STRING:
	  /*	  if(Pike_sp[-1].u.string->shift_size) */
	  /*	    file=Pike_sp[-1]; */
	  pop_stack();
	}

      }
#endif
    do {
      THREADS_ALLOW();
      f=fd_open((char *)STR0(file->u.string), fd_RDONLY, 0);
      THREADS_DISALLOW();
      if (f >= 0 || errno != EINTR) break;
      check_threads_etc();
    } while (1);

    if(f < 0)
      Pike_error("CommonLog.read(): Failed to open file for reading (errno=%d).\n",
	    errno);
  } else
    SIMPLE_BAD_ARG_ERROR("CommonLog.read", 2, "string|Stdio.File");

#ifdef HAVE_LSEEK64
  lseek64(f, offs0, SEEK_SET);
#else
  fd_lseek(f, offs0, SEEK_SET);
#endif
  read_buf = malloc(CLF_BLOCK_SIZE+1);
#ifndef DYNAMIC_BUF
  buf = malloc(bufsize);
#endif
  while(1) {
    do {
      THREADS_ALLOW();
      len = fd_read(f, read_buf, CLF_BLOCK_SIZE);
      THREADS_DISALLOW();
      if (len >= 0 || errno != EINTR) break;
      check_threads_etc();
    } while (1);
    if(len == 0)
      break; /* nothing more to read. */
    if(len < 0)
      break;
    char_pointer = read_buf;
    while(len--) {
      offs0++;
      c = char_pointer[0] & 0xff;
      char_pointer ++;
      cls = char_class[c];
#ifdef TRACE_DFA
      fprintf(stderr, "DFA(%d): '%c' ", state, (c<32? '.':c));
      switch(cls) {
      case CLS_WSPACE: fprintf(stderr, "CLS_WSPACE"); break;
      case CLS_CRLF: fprintf(stderr, "CLS_CRLF"); break;
      case CLS_TOKEN: fprintf(stderr, "CLS_TOKEN"); break;
      case CLS_DIGIT: fprintf(stderr, "CLS_DIGIT"); break;
      case CLS_QUOTE: fprintf(stderr, "CLS_QUOTE"); break;
      case CLS_LBRACK: fprintf(stderr, "CLS_LBRACK"); break;
      case CLS_RBRACK: fprintf(stderr, "CLS_RBRACK"); break;
      case CLS_SLASH: fprintf(stderr, "CLS_SLASH"); break;
      case CLS_COLON: fprintf(stderr, "CLS_COLON"); break;
      case CLS_HYPHEN: fprintf(stderr, "CLS_HYPHEN"); break;
      case CLS_PLUS: fprintf(stderr, "CLS_PLUS"); break;
      default: fprintf(stderr, "???");
      }
      fprintf(stderr, " %d items on stack\n", sp-old_sp);
#endif
      switch(state) {
      case 0:
	if(sp != old_sp) {
	  if(sp == old_sp+15) {
	    f_aggregate(15);
	    push_int64(offs0);
	    apply_svalue(logfun, 2);
	    pop_stack();
	  } else
	    pop_n_elems(sp-old_sp);
	}
	if(cls > CLS_CRLF) {
	  if(cls == CLS_HYPHEN) {
	    push_int(0);
	    state = 2;
	    break;
	  }
#ifdef DYNAMIC_BUF
	  buf.s.str = NULL;
	  initialize_buf( &buf );
	  low_my_putchar( c, &buf );
#else
	  bufpos = 0;
	  BUFSET(c);
#endif
	  state=1;
	}
	break;
      case 1:
	if(cls > CLS_CRLF) {
#ifdef DYNAMIC_BUF
	  low_my_putchar( c, &buf );
#else
	  BUFSET(c);
#endif
	  break;
	}
#ifdef DYNAMIC_BUF
	push_string( low_free_buf( &buf ) ); /* remotehost */
#else
	PUSHBUF();
#endif
	state = (cls == CLS_WSPACE? 2:0);
	break;
      case 2:
	if(cls > CLS_CRLF) {
	  if(cls == CLS_HYPHEN) {
	    push_int(0);
	    state = 4;
	    break;
	  }
#ifdef DYNAMIC_BUF
	  buf.s.str = NULL;
	  initialize_buf( &buf );
	  low_my_putchar( c, &buf );
#else
	  bufpos = 0;
	  BUFSET(c);
#endif

	  state=3;
	} else if(cls == CLS_CRLF)
	  state=0;
	break;
      case 3:
	if(cls > CLS_CRLF) {
#ifdef DYNAMIC_BUF
	  low_my_putchar( c, &buf );
#else
	  BUFSET(c);
#endif

	  break;
	}
#ifdef DYNAMIC_BUF
	push_string( low_free_buf( &buf ) ); /* rfc931 */
#else
	PUSHBUF(); /* rfc931 */
#endif
	state = (cls == CLS_WSPACE? 4:0);
	break;
      case 4:
	if(cls > CLS_CRLF) {
	  if(cls == CLS_HYPHEN) {
	    push_int(0);
	    state = 6;
	    break;
	  }
#ifdef DYNAMIC_BUF
	  buf.s.str = NULL;
	  initialize_buf( &buf );
	  low_my_putchar( c, &buf );
#else
	  bufpos = 0;
	  BUFSET(c);
#endif

	  state=5;
	} else if(cls == CLS_CRLF)
	  state=0;
	break;
      case 5:
	if(cls > CLS_CRLF) {
#ifdef DYNAMIC_BUF
	  low_my_putchar( c, &buf );
#else
	  BUFSET(c);
#endif

	  break;
	}
#ifdef DYNAMIC_BUF
	push_string( low_free_buf( &buf ) ); /* authuser */
#else
	PUSHBUF(); /* authuser */
#endif
	state = (cls == CLS_WSPACE? 6:0);
	break;
      case 6:
	if(cls == CLS_LBRACK)
	  state = 15;
	else if(cls == CLS_CRLF)
	  state = 0;
	else if(cls == CLS_HYPHEN) {
	  push_int(0);
	  push_int(0);
	  push_int(0);
	  state = 7;
	}
	break;
      case 7:
	if(cls == CLS_QUOTE) {
#ifdef DYNAMIC_BUF
	  buf.s.str = NULL;
	  initialize_buf( &buf );
#else
	  bufpos = 0;
#endif
	  state = 31;
	} else if(cls == CLS_CRLF)
	  state = 0;
	else if(cls == CLS_HYPHEN) {
	  push_int(0);
	  push_int(0);
	  push_int(0);
	  state = 10;
	}
	break;
      case 8:
	if(cls == CLS_QUOTE)
	  state = 9;
	else if(cls == CLS_CRLF) {
#ifdef DYNAMIC_BUF
	  push_string( low_free_buf( &buf ) );
#else
	  PUSHBUF();
#endif
	  state = 0;
	} else
#ifdef DYNAMIC_BUF
	  low_my_putchar( c, &buf );
#else
	  BUFSET(c);
#endif

	break;
      case 9:
	if(cls > CLS_CRLF) {
#ifdef DYNAMIC_BUF
	  low_my_putchar( '"', &buf);
	  low_my_putchar( c, &buf);
#else
	  BUFSET('"');
	  BUFSET(c);
#endif
	  state = 8;
	  break;
	}
#ifdef DYNAMIC_BUF
	push_string( low_free_buf( &buf ) ); /* protocol */
#else
	PUSHBUF(); /* protoocl */
#endif
	state = (cls == CLS_CRLF? 0 : 10);
	break;
      case 10:
	if(cls == CLS_DIGIT) {
	  v = c&0xf;
	  state = 11;
	} else if(cls == CLS_CRLF)
	  state = 0;
	else if(cls == CLS_HYPHEN) {
	  push_int(0);
	  state = 12;
	}
	break;
      case 11:
	if(cls == CLS_DIGIT)
	  v = v*10+(c&0xf);
	else if(cls == CLS_WSPACE) {
	  push_int(v); /* status */
	  state = 12;
	} else state = 0;
	break;
      case 12:
	if(cls == CLS_DIGIT) {
	  v = c&0xf;
	  state = 13;
	} else if(cls == CLS_CRLF)
	  state = 0;
	else if(cls == CLS_HYPHEN) {
	  push_int(0);
	  state = 14;
	}
	break;
      case 13:
	if(cls == CLS_DIGIT)
	  v = v*10+(c&0xf);
	else {
	  push_int(v); /* bytes */
	  state = (cls == CLS_CRLF? 0:14);
	}
	break;
      case 14:
	if(cls == CLS_CRLF)
	  state = 0;
	break;

      case 15:
	if(cls == CLS_DIGIT) {
	  dd = c&0xf;
	  state = 16;
	} else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 16:
	/* getting day */
	if(cls == CLS_DIGIT)
	  dd = dd*10+(c&0xf);
	else if(cls == CLS_SLASH)
	  state = 17;
	else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 17:
	if(cls == CLS_DIGIT) {
	  mm = c&0xf;
	  state = 18;
	} else if(cls == CLS_TOKEN) {
	  mm = c|0x20;
	  state = 21;
	} else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 18:
	/* getting numeric month */
	if(cls == CLS_DIGIT)
	  mm = mm*10+(c&0xf);
	else if(cls == CLS_SLASH)
	  state = 19;
	else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 19:
	if(cls == CLS_DIGIT) {
	  yy = c&0xf;
	  state = 20;
	} else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 20:
	/* getting year */
	if(cls == CLS_DIGIT)
	  yy = yy*10+(c&0xf);
	else if(cls == CLS_COLON)
	  state = 22;
	else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 21:
	/* getting textual month */
	if(cls == CLS_TOKEN)
	  mm = (mm<<8)|c|0x20;
	else if(cls == CLS_SLASH) {
	  state = 19;
	  switch(mm) {
	  case ('j'<<16)|('a'<<8)|'n': mm=1; break;
	  case ('f'<<16)|('e'<<8)|'b': mm=2; break;
	  case ('m'<<16)|('a'<<8)|'r': mm=3; break;
	  case ('a'<<16)|('p'<<8)|'r': mm=4; break;
	  case ('m'<<16)|('a'<<8)|'y': mm=5; break;
	  case ('j'<<16)|('u'<<8)|'n': mm=6; break;
	  case ('j'<<16)|('u'<<8)|'l': mm=7; break;
	  case ('a'<<16)|('u'<<8)|'g': mm=8; break;
	  case ('s'<<16)|('e'<<8)|'p': mm=9; break;
	  case ('o'<<16)|('c'<<8)|'t': mm=10; break;
	  case ('n'<<16)|('o'<<8)|'v': mm=11; break;
	  case ('d'<<16)|('e'<<8)|'c': mm=12; break;
	  default:
	    state = 14;
	  }
	}
	break;
      case 22:
	if(cls == CLS_DIGIT) {
	  h = c&0xf;
	  state = 23;
	} else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 23:
	/* getting hour */
	if(cls == CLS_DIGIT)
	  h = h*10+(c&0xf);
	else if(cls == CLS_COLON)
	  state = 24;
	else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 24:
	if(cls == CLS_DIGIT) {
	  m = c&0xf;
	  state = 25;
	} else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 25:
	/* getting minute */
	if(cls == CLS_DIGIT)
	  m = m*10+(c&0xf);
	else if(cls == CLS_COLON)
	  state = 26;
	else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 26:
	if(cls == CLS_DIGIT) {
	  s = c&0xf;
	  state = 27;
	} else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 27:
	/* getting second */
	if(cls == CLS_DIGIT)
	  s = s*10+(c&0xf);
	else if(cls == CLS_WSPACE)
	  state = 28;
	else
	  state = (cls == CLS_CRLF? 0:14);
	break;
      case 28:
	if(cls>=CLS_HYPHEN) {
	  state = 29;
	  tzs = cls!=CLS_PLUS;
	  tz = 0;
	} else if(cls == CLS_DIGIT) {
	  state = 29;
	  tzs = 0;
	  tz = c&0xf;
	} else if(cls==CLS_CRLF)
	  state = 0;
	break;
      case 29:
	/* getting timezone */
	if(cls == CLS_DIGIT)
	  tz = tz*10+(c&0xf);
	else {
	  if(tzs)
	    tz = -tz;
	  push_int(yy);
	  push_int(mm);
	  push_int(dd);
	  push_int(h);
	  push_int(m);
	  push_int(s);
	  push_int(tz);
	  if(cls == CLS_RBRACK)
	    state = 7;
	  else
	    state = (cls == CLS_CRLF? 0 : 30);
	}
	break;
      case 30:
	if(cls == CLS_RBRACK)
	  state = 7;
	else if(cls == CLS_CRLF)
	  state = 0;
	break;
      case 31:
	if(cls == CLS_QUOTE) {
#ifdef DYNAMIC_BUF
	  push_string( low_free_buf( &buf ) );
#else
	  PUSHBUF();
#endif
	  push_int(0);
	  push_int(0);
	  state = 10;
	} else if(cls >= CLS_TOKEN)
#ifdef DYNAMIC_BUF
	  low_my_putchar( c, &buf );
#else
	  BUFSET(c);
#endif

	else {
#ifdef DYNAMIC_BUF
	  push_string( low_free_buf( &buf ) ); /* method */
#else
	  PUSHBUF(); /* method */
#endif
	  state = (cls == CLS_CRLF? 0 : 32);
	}
	break;
      case 32:
	if(cls == CLS_QUOTE) {
	  push_int(0);
	  push_int(0);
	  state = 10;
	} else if(cls >= CLS_TOKEN) {
#ifdef DYNAMIC_BUF
	  buf.s.str = NULL;
	  initialize_buf( &buf );
	  low_my_putchar( c, &buf );
#else
	  bufpos = 0;
	  BUFSET(c);
#endif

	  state = 33;
	} else
	  if(cls == CLS_CRLF)
	    state = 0;
	break;
      case 33:
	if(cls == CLS_QUOTE)
	  state = 34;
	else if(cls == CLS_CRLF) {
#ifdef DYNAMIC_BUF
	  push_string( low_free_buf( &buf ) ); 
#else
	  PUSHBUF(); 
#endif
	  state = 0;
	} else if(cls == CLS_WSPACE) {
#ifdef DYNAMIC_BUF
	  push_string( low_free_buf( &buf ) );  /* path */
#else
	  PUSHBUF();  /* path */
#endif
	  state = 35;
	} else	
#ifdef DYNAMIC_BUF
	  low_my_putchar( c, &buf );
#else
	  BUFSET(c);
#endif

	break;
      case 34:
	if(cls >= CLS_TOKEN) {
#ifdef DYNAMIC_BUF
	  low_my_putchar( '"', &buf );
	  low_my_putchar( c, &buf );
#else
	  BUFSET('"');
	  BUFSET(c);
#endif

	  state = 33;
	} else if(cls == CLS_CRLF) {
#ifdef DYNAMIC_BUF
	  push_string( low_free_buf( &buf ) ); 
#else
	  PUSHBUF(); 
#endif
	  state = 0;
	} else {
#ifdef DYNAMIC_BUF
	  push_string( low_free_buf( &buf ) ); 
#else
	  PUSHBUF(); 
#endif
	  push_text("HTTP/0.9");
	  state = 10;
	}
	break;
      case 35:
	if(cls == CLS_QUOTE) {
	  push_text("HTTP/0.9");
	  state = 10;
	} else if(cls >= CLS_TOKEN) {
#ifdef DYNAMIC_BUF
	  buf.s.str = NULL;
	  initialize_buf( &buf );
	  low_my_putchar( c, &buf );
#else
	  bufpos = 0;
	  BUFSET(c);
#endif

	  state = 8;
	} else
	  if(cls == CLS_CRLF)
	    state = 0;
	break;
      }
    }
  }
Пример #4
0
/*! @decl void output(object obj, int|void start_pos)
 *!
 *!   Add an output file object.
 */
static void pipe_output(INT32 args)
{
  struct object *obj;
  struct output *o;
  int fd;
  struct stat s;
  struct buffer *b;

  if (args<1 || 
      sp[-args].type != T_OBJECT ||
      !sp[-args].u.object ||
      !sp[-args].u.object->prog)
    Pike_error("Bad/missing argument 1 to pipe->output().\n");

  if (args==2 &&
      sp[1-args].type != T_INT)
    Pike_error("Bad argument 2 to pipe->output().\n");
       
  if (THIS->fd==-1)		/* no buffer */
  {
    /* test if usable as buffer */ 
    apply(sp[-args].u.object,"query_fd",0);

    if ((sp[-1].type==T_INT)
	&& (fd=sp[-1].u.integer)>=0
	&& (fstat(fd,&s)==0)
	&& S_ISREG(s.st_mode)
	&& (THIS->fd=fd_dup(fd))!=-1 )
    {
      /* keep the file pointer of the duped fd */
      THIS->pos=fd_lseek(fd, 0L, SEEK_CUR);

      THIS->living_outputs++;

      while (THIS->firstbuffer)
      {
	b=THIS->firstbuffer;
	THIS->firstbuffer=b->next;
	fd_lseek(THIS->fd, THIS->pos, SEEK_SET);
	fd_write(THIS->fd,b->s->str,b->s->len);
	sbuffers-=b->s->len;
	nbuffers--;
	free_string(b->s);
	free((char *)b);
      }
      THIS->lastbuffer=NULL;

      /* keep the file pointer of the duped fd
	 THIS->pos=0; */
      push_int(0);
      apply(sp[-args-2].u.object,"set_id", 1);
      pop_n_elems(args+2);	/* ... and from apply x 2  */
      return;
    }
    pop_stack();		/* from apply */
  } 

  THIS->living_outputs++;
  /* add_ref(THISOBJ); */	/* Weird */

  /* Allocate a new struct output */
  obj=clone_object(output_program,0);
  o=(struct output *)(obj->storage);
  o->next=THIS->firstoutput;
  THIS->firstoutput=obj;
  noutputs++;
  o->obj=NULL;

  add_ref(o->obj=sp[-args].u.object);

  o->write_offset=find_identifier("write",o->obj->prog);
  o->set_nonblocking_offset=find_identifier("set_nonblocking",o->obj->prog);
  o->set_blocking_offset=find_identifier("set_blocking",o->obj->prog);

  if (o->write_offset<0 || o->set_nonblocking_offset<0 ||
      o->set_blocking_offset<0) 
  {
    free_object(o->obj);
    Pike_error("illegal file object%s%s%s\n",
	  ((o->write_offset<0)?"; no write":""),
	  ((o->set_nonblocking_offset<0)?"; no set_nonblocking":""),
	  ((o->set_blocking_offset<0)?"; no set_blocking":""));
  }

  o->mode=O_RUN;
  /* keep the file pointer of the duped fd
     o->pos=0; */
  /* allow start position as 2nd argument for additional outputs
  o->pos=THIS->pos; */

  if(args>=2)
    o->pos=sp[1-args].u.integer;
  else
    o->pos=THIS->pos;

  push_object(obj); /* Ok, David, this is probably correct, but I dare you to explain why :) */
  apply(o->obj,"set_id",1);
  pop_stack();

  push_int(0);
  push_callback(offset_output_write_callback);
  push_callback(offset_output_close_callback);
  apply_low(o->obj,o->set_nonblocking_offset,3);
  pop_stack();
   
  pop_n_elems(args-1);
}
Пример #5
0
/*! @decl void input(object obj)
 *!
 *! Add an input file to this pipe.
 */
static void pipe_input(INT32 args)
{
   struct input *i;
   int fd=-1;			/* Per, one less warning to worry about... */
   struct object *obj;

   if (args<1 || sp[-args].type != T_OBJECT)
     Pike_error("Bad/missing argument 1 to pipe->input().\n");

   obj=sp[-args].u.object;
   if(!obj || !obj->prog)
     Pike_error("pipe->input() on destructed object.\n");

   push_int(0);
   apply(sp[-args-1].u.object,"set_id", 1);
   pop_stack();

   i=new_input();

#if defined(HAVE_MMAP) && defined(HAVE_MUNMAP)

   /* We do not handle mmaps if we have a buffer */
   if(THIS->fd == -1)
   {
     char *m;
     struct stat s;

     apply(obj, "query_fd", 0);
     if(sp[-1].type == T_INT) fd=sp[-1].u.integer;
     pop_stack();

     if (fd != -1 && fstat(fd,&s)==0)
     {
       off_t filep=fd_lseek(fd, 0L, SEEK_CUR); /* keep the file pointer */
       size_t len = s.st_size - filep;
       if(S_ISREG(s.st_mode)	/* regular file */
	  && ((m=(char *)mmap(0, len, PROT_READ,
			      MAP_FILE|MAP_SHARED,fd,filep))+1))
       {
	 mmapped += len;

	 i->type=I_MMAP;
	 i->len = len;
	 i->u.mmap=m;
#if defined(HAVE_MADVISE) && defined(MADV_SEQUENTIAL)
	 /* Mark the pages as sequential read only access... */
	 madvise(m, len, MADV_SEQUENTIAL);
#endif
	 pop_n_elems(args);
	 push_int(0);
	 return;
       }
     }
   }
#endif

   i->u.obj=obj;
   nobjects++;
   i->type=I_OBJ;
   add_ref(i->u.obj);
   i->set_nonblocking_offset=find_identifier("set_nonblocking",i->u.obj->prog);
   i->set_blocking_offset=find_identifier("set_blocking",i->u.obj->prog);

   if (i->set_nonblocking_offset<0 ||
       i->set_blocking_offset<0) 
   {
      if (find_identifier("read", i->u.obj->prog) < 0) {
	 /* Not even a read function */
	 free_object(i->u.obj);
	 i->u.obj=NULL;
	 i->type=I_NONE;

	 nobjects--;
	 Pike_error("illegal file object%s%s\n",
	       ((i->set_nonblocking_offset<0)?"; no set_nonblocking":""),
	       ((i->set_blocking_offset<0)?"; no set_blocking":""));
      } else {
	 /* Try blocking mode */
	 i->type = I_BLOCKING_OBJ;
	 if (i==THIS->firstinput) {
	   /*
	    * FIXME: What if read_som_data() returns 0?
	    */
	   read_some_data();
	 }
	 return;
      }
   }
  
   if (i==THIS->firstinput)
   {
     push_callback(offset_input_read_callback);
     push_int(0);
     push_callback(offset_input_close_callback);
     apply_low(i->u.obj,i->set_nonblocking_offset,3);
     pop_stack();
   }
   else
   {
     /* DOESN'T WORK!!! */
     push_int(0);
     push_int(0);
     push_callback(offset_input_close_callback);
     apply_low(i->u.obj,i->set_nonblocking_offset,3);
     pop_stack();
   }

   pop_n_elems(args);
   push_int(0);
}
Пример #6
0
/* This function reads some data from the file cache..
 * Called when we want some data to send.
 */
static INLINE struct pike_string* gimme_some_data(size_t pos)
{
   struct buffer *b;
   ptrdiff_t len;
   struct pipe *this = THIS;

   /* We have a file cache, read from it */
   if (this->fd!=-1)
   {
     char buffer[READ_BUFFER_SIZE];

      if (this->pos<=pos) return NULL; /* no data */
      len=this->pos-pos;
      if (len>READ_BUFFER_SIZE) len=READ_BUFFER_SIZE;
      THREADS_ALLOW();
      fd_lseek(this->fd, pos, SEEK_SET);
      THREADS_DISALLOW();
      do {
	THREADS_ALLOW();
	len = fd_read(this->fd, buffer, len);
	THREADS_DISALLOW();
	if (len < 0) {
	  if (errno != EINTR) {
	    return(NULL);
	  }
	  check_threads_etc();
	}
      } while(len < 0);
      /*
       * FIXME: What if len is 0?
       */
      return make_shared_binary_string(buffer,len);
   }

   if (pos<this->pos)
     return make_shared_string("buffer underflow"); /* shit */

   /* We want something in the next buffer */
   while (this->firstbuffer && pos>=this->pos+this->firstbuffer->s->len) 
   {
     /* Free the first buffer, and update THIS->pos */
      b=this->firstbuffer;
      this->pos+=b->s->len;
      this->bytes_in_buffer-=b->s->len;
      this->firstbuffer=b->next;
      if (!b->next)
	this->lastbuffer=NULL;
      sbuffers-=b->s->len;
      nbuffers--;
      free_string(b->s);
      free((char *)b);

      /* Wake up first input if it was sleeping and we
       * have room for more in the buffer.
       */
      if (this->sleeping &&
	  this->firstinput &&
	  this->bytes_in_buffer<MAX_BYTES_IN_BUFFER)
      {
	if (this->firstinput->type == I_BLOCKING_OBJ) {
	  if (!read_some_data()) {
	    this->sleeping = 0;
	    input_finish();
	  }
	} else {
	  this->sleeping=0;
	  push_callback(offset_input_read_callback);
	  push_int(0);
	  push_callback(offset_input_close_callback);
	  apply(this->firstinput->u.obj, "set_nonblocking", 3);
	  pop_stack();
	}
      }
   }

   while (!this->firstbuffer)
   {
     if (this->firstinput)
     {
#if defined(HAVE_MMAP) && defined(HAVE_MUNMAP)
       if (this->firstinput->type==I_MMAP)
       {
	 char *src;
	 struct pike_string *tmp;

	 if (pos >= this->firstinput->len + this->pos) /* end of mmap */
	 {
	   this->pos += this->firstinput->len;
	   input_finish();
	   continue;
	 }
	 len = this->firstinput->len + this->pos - pos;
	 if (len > READ_BUFFER_SIZE) len=READ_BUFFER_SIZE;
	 tmp = begin_shared_string( len );
	 src = this->firstinput->u.mmap + pos - this->pos;
/* This thread_allow/deny is at the cost of one extra memory copy */
	 THREADS_ALLOW();
	 MEMCPY(tmp->str, src, len);
	 THREADS_DISALLOW();
	 return end_shared_string(tmp);
       }
       else
#endif
       if (this->firstinput->type!=I_OBJ)
       {
	 /* FIXME: What about I_BLOCKING_OBJ? */
	 input_finish();       /* shouldn't be anything else ... maybe a finished object */
       }
     }
     return NULL;		/* no data */
   } 

   if (pos==this->pos)
   {
      add_ref(this->firstbuffer->s);
      return this->firstbuffer->s;
   }
   return make_shared_binary_string(this->firstbuffer->s->str+
				    pos-this->pos,
				    this->firstbuffer->s->len-
				    pos+this->pos);
}