Example #1
0
static int
on_headers_complete_close(http_parser *parser)
{
	http_request *req = (http_request*) parser->data;
	task        *task = (struct task*)req->task;

	http_request_close(req);

	struct block *block1 = create_block(task, 0, task->total_size / 2);
	dispatcher_block(task, block1);

	struct block *block2 = create_block(task, task->total_size / 2 + 1, task->total_size);
	dispatcher_block(task, block2);

	return 0;
}
Example #2
0
int
rss_board_update(Board *board, char *path) {
  HttpRequest r;
  //int http_err_flag = 0;
  //char *errmsg = NULL;
  char *rsstxt = NULL, *p;
  char *rss_title = NULL;
  XMLBlock xmlb;
  int pos, refresh_request = -1, cest_bon_je_connais_la_suite = 0;
  time_t temps_debut = time(NULL), temps_last_modified;

  prelog_clear();
  clear_XMLBlock(&xmlb);
  wmcc_init_http_request(&r, board->site->prefs, path);
  /* Triton>    Tant qu'a faire de mettre un header "Accept:", autant le mettre partout
                Hooo, c'est cool, y'en a un prevu pour les flux rss au lieu d'un bete
                text/xml generique et banal [:freekill]
     SeeSchloß> ouais ouais sauf qu'il y a plein de serveurs de merde qui ne comprennent
                pas ce type, alors non [:benou] */
  //r.accept = strdup("application/rss+xml");
  if (board->site->prefs->use_if_modified_since) { r.p_last_modified = &board->last_modified; }
  http_request_send(&r);
  if (!http_is_ok(&r)) { http_request_close(&r);return 1; }
  wmcc_log_http_request(board->site, &r);
  rsstxt = http_read_all(&r, path);
  http_request_close(&r);
  if (!http_is_ok(&r)) goto ratai;
  if (!rsstxt || !http_is_ok(&r)) return 1; /* "not modified" */
  
  if (strlen(rsstxt)==0) goto RAS;

  /* tentative de conversion vers utf8 */
  if ((pos = get_XMLBlock(rsstxt, strlen(rsstxt), "?xml", &xmlb))>=0) {
    XMLAttr *a;
    int found = 0;
    if (board->encoding) { free(board->encoding); board->encoding = NULL; }
    for (a = xmlb.attr; a; a = a->next) {
      if (str_case_startswith(a->name, "encoding")) {
        board->encoding = str_ndup(a->value,a->value_len);
        BLAHBLAH(1,printf("%s: found encoding: value = '%s'\n", board->site->prefs->site_name, board->encoding));
        found = 1;
        break;
      }
    }
    if (!found) board->encoding = strdup("UTF-8"); /* defaut si pas d'encoding specifie */
    convert_to_utf8(board->encoding, &rsstxt);
  }

  pos = get_XMLBlock(rsstxt, strlen(rsstxt), "title", &xmlb);
  if (pos < 0 || xmlb.content_len == 0) goto ratai;
  /*if (board->rss_title) free(board->rss_title);
    board->rss_title = str_ndup(xmlb.content, xmlb.content_len);*/
  rss_title = str_ndup(xmlb.content, xmlb.content_len);
  BLAHBLAH(1, myprintf("got TITLE: '%<YEL %s>'\n", rss_title));

  if (board->rss_title) {
    free(board->rss_title);
  }
  board->rss_title = str_ndup(rss_title, 100);

  if (get_XMLBlock(rsstxt, strlen(rsstxt), "ttl", &xmlb) >= 0) {
    refresh_request = atoi(xmlb.content) * 60; /* en minutes */
    //printf("ttl detected, %d\n", refresh_request);
  } if (get_XMLBlock(rsstxt, strlen(rsstxt), "*:updatePeriod", &xmlb) >= 0) {
    int period = 1;
    if (str_case_startswith(xmlb.content, "hour")) period = 3600;
    else if (str_case_startswith(xmlb.content, "min")) period = 60;
    if (get_XMLBlock(rsstxt, strlen(rsstxt), "*:updateFrequency", &xmlb) >= 0) {
      refresh_request = period * atoi(xmlb.content);
    }
  }
  if (refresh_request != -1 && board->site->prefs->board_check_delay < refresh_request) {
    BLAHBLAH(0, myprintf("Changing update frequency for %<grn %s> to %<MAG %d> sec.\n", rss_title, refresh_request));
    board->site->prefs->board_check_delay = refresh_request;
  }

  p = rsstxt;

  temps_last_modified = temps_debut;
  if (board->last_modified) {
    str_to_time_t(board->last_modified, &temps_last_modified);
    //printf("last_modified='%s' -> time_t = %ld\n", board->last_modified, temps_last_modified);
  }
  do {
    int pos_next_item;
    pos_next_item = get_XMLBlock(p, strlen(p), "item", &xmlb);
    if (pos_next_item >= 0) {
      XMLBlock b2;
      char *title, *link, *description, *msg, *author, *comments_url, *pubdate, *fake_ua;
      char msgd[BOARD_MSG_MAX_LEN];
      char stimestamp[15];
      time_t timestamp = time(NULL);
      title = link = description = msg = author = comments_url = pubdate = fake_ua = NULL;

      //time_t_to_tstamp(temps_debut, stimestamp); 

      //temps_debut--; /* pour eviter d'avoir un paquet de news avec le meme tstamp */
      clear_XMLBlock(&b2);
      if (get_XMLBlock(xmlb.content, xmlb.content_len, "title", &b2) &&  b2.content_len) {
        title = str_ndup(b2.content, b2.content_len);
        //printf("found title: '%s'\n", title);        
      }
      if (get_XMLBlock(xmlb.content, xmlb.content_len, "link", &b2) &&  b2.content_len) {
        link = str_ndup(b2.content, b2.content_len);
        //printf("found link: '%s'\n", link);
      }
      if (!board->site->prefs->rss_ignore_description &&
          get_XMLBlock(xmlb.content, xmlb.content_len, "description", &b2) &&  b2.content_len) {
        description = str_ndup(b2.content, b2.content_len);
      }
      if (get_XMLBlock(xmlb.content, xmlb.content_len, "author", &b2) &&  b2.content_len) {
        author = str_ndup(b2.content, b2.content_len);
        //printf("found author: '%s'\n", author);
      }
      if (get_XMLBlock(xmlb.content, xmlb.content_len, "comments", &b2) &&  b2.content_len) {
        comments_url = str_ndup(b2.content, b2.content_len);
      }
      /* format date: http://www.w3.org/TR/NOTE-datetime */
      if (get_XMLBlock(xmlb.content, xmlb.content_len, "pubDate", &b2) &&  b2.content_len) {
        pubdate = str_ndup(b2.content, b2.content_len);
      }
      if (pubdate == NULL && get_XMLBlock(xmlb.content, xmlb.content_len, "*:date", &b2) &&  b2.content_len) {
        pubdate = str_ndup(b2.content, b2.content_len);        
      }

      /* une petite remarque pour poser la problematique calmement:
         Comment determiner raisonnablement la date de publication d'une news
          - <pubDate>date_format_rfc_822</pubDate>

          - <dc:date>date_iso_8601</dc:date>

          - sinon :
            . si la news était connue par wmcc lors de sa precedente execution,
            on reprend la meme date sans paniquer.

            . sinon, on prend l'heure courante.
               * si  le serveur web a renvoye un last-modified, on prend cette valeur.

         Pour un fun toujours plus extreme, il faut bien gérer tous les
         problemes de timezone:
           PUTAIN DE BORDERL DE MARDE
      */

      /* c'est trop la merde avec les decalages horaires.. */
      if (pubdate) {
        if (str_to_time_t(pubdate, &timestamp)) {
          time_t_to_tstamp(timestamp, stimestamp);
          BLAHBLAH(3,myprintf("converted %<YEL %s> to %<YEL %s> !\n", pubdate, stimestamp));
        } else BLAHBLAH(0, printf("could not convert '%s' to a valid date..\n", pubdate));
      }

      timestamp = MIN(timestamp, temps_debut);
      timestamp = MIN(timestamp, temps_last_modified);
      time_t_to_tstamp(timestamp, stimestamp);

      destroy_XMLBlock(&b2);
      
      str_trunc_nice(description, 512);
      if (link) {
        char *p = strstr(link, "*http://"); // enleve une couche de merde dans les liens yahoo
        if (p) { p++; memmove(link, p, strlen(p)+1); }
      }

      msg = NULL;
      if (title && link) msg = str_cat_printf(msg, "{&lt;a href=&quot;%s&quot;&gt;&lt;u&gt;&lt;b&gt;%s&lt;/b&gt;&lt;/u&gt;&lt;/a&gt;}", link, title);
      else if (title) msg = str_cat_printf(msg, "{&lt;b&gt;%s&lt;/b;&gt}", title);
      else if (link) msg = str_cat_printf(msg, "{&lt;a href=&quot;%s&quot;&gt;[News]&lt;/a&gt;}", link);
      if (description) msg = str_cat_printf(msg, " %s", description);
      if (comments_url) msg = str_cat_printf(msg, " &lt;a href=&quot;%s&quot;&gt;[comments]&lt;/a&gt;", comments_url);
      if (msg) {
        md5_byte_t md5[16];
        md5_state_t ms; md5_init(&ms);
        int was_already_viewed = 0;
        if (title) md5_append(&ms, title, strlen(title));
        if (link) md5_append(&ms, link, strlen(link));
        if (description) md5_append(&ms, description, strlen(description));
        md5_finish(&ms,md5);

        /* cherche le news dans le cache (au premier dl uniquement) */
        if (board->oldmd5) {
          md5_and_time *m = find_md5_in_md5_array(md5,board->oldmd5);
          if (m && strlen(m->tstamp) == 14) {
            was_already_viewed = m->viewed;
            strcpy(stimestamp, m->tstamp); str_to_time_t(stimestamp, &timestamp);
            BLAHBLAH(1, myprintf("the news '%<GRN %s>' was found in the cache!\n", title));
          }
        }

        /* cherche dans la liste des news dejà lues (après le premier dl) */
        if (board_find_md5(board, md5)) {
          BLAHBLAH(1,myprintf("the news %<MAG %s>/%<CYA %s> is already known\n", rss_title, md5txt(md5)));
          //cest_bon_je_connais_la_suite = 1; // si on suppose que les rss se remplissent toujours par le haut..
        } else {
          /* nettoyage des codes < 32 dans le message */
          {
            int i; 
            for (i=0; i < BOARD_MSG_MAX_LEN && msg[i]; ++i)
              if ((unsigned char)msg[i] < ' ') msg[i] = ' ';
          }          
          fake_ua = str_printf("%s", rss_title ? rss_title : "?");
          if (pubdate) { fake_ua = str_cat_printf(fake_ua, " pubDate: %s", pubdate); }

          /* attention, les '&lt;' deviennent '\t<' et les '&amp;lt;' devienne '<' */
          board_decode_message(board, msgd, msg);

          {
            char *soupe = rss_nettoie_la_soupe_de_tags(msgd);
            strncpy(msgd, soupe, sizeof msgd); free(soupe); msgd[(sizeof msgd) - 1] = 0;
          }
          if (author && strlen(author)) {
            author = str_cat_printf(author, "@%s", rss_title);
          } else {
            FREE_STRING(author); author = strdup(rss_title);
          }
          {
            char author_tmp[1024];
            convert_to_ascii(author_tmp, author, sizeof author_tmp);
            FREE_STRING(author); author = strdup(author_tmp);
          }
          prelog_add(fake_ua, author, timestamp, msgd, link, md5, was_already_viewed);
          board->nb_msg_at_last_check++;
          if (!was_already_viewed) board->nb_msg_since_last_viewed++;
        }
      }
      FREE_STRING(title); FREE_STRING(link); FREE_STRING(description); FREE_STRING(author); 
      FREE_STRING(comments_url); FREE_STRING(msg); FREE_STRING(pubdate); FREE_STRING(fake_ua);
    } else { 
      BLAHBLAH(1,printf("fin de '%s'\n", rss_title));
      break;
    }
    
    p += pos_next_item;
  } while (!cest_bon_je_connais_la_suite);

 RAS:
  if (board->oldmd5 && board->last_post_id > 0) release_md5_array(board);
  destroy_XMLBlock(&xmlb);
  FREE_STRING(rss_title);
  FREE_STRING(rsstxt); 
  prelog_commit(board);
  return 0;
 ratai:
  if (board->oldmd5 && board->last_post_id > 0) release_md5_array(board);
  destroy_XMLBlock(&xmlb);
  FREE_STRING(rss_title);
  FREE_STRING(rsstxt);
  prelog_commit(board);
  return 1;
}