Exemplo n.º 1
0
void load_stat(hash_t *ht, int type)
{
    top_t buf;
    char file[HOMELEN];
    sprintf(file, BASEPATH"/%s.0", files[type]);
    FILE *fp = fopen(file, "r");
    if (fp) {
        while (fread(&buf, sizeof(buf), 1, fp) == 1) {
            top_t *top = top_alloc();
            memcpy(top, &buf, sizeof(*top));
            if (type == DAY)
                top->count = 0;
            hash_set(ht, (char *)top, HASH_KEY_STRING, top);
        }
        fclose(fp);
    }
}
Exemplo n.º 2
0
void process(hash_t *ht, const struct boardheader *bp)
{
    if (!should_stat(bp))
        return;
    time_t recent = time(NULL) - 24 * 60 * 60;
    char file[HOMELEN];
    setwbdir(file, bp->filename);
    FILE *fp = fopen(file, "r");
    if (!fp)
        return;
    struct fileheader fh;
    top_t top;
    fseek(fp, -sizeof(fh), SEEK_END);
    while (fread(&fh, sizeof(fh), 1, fp) == 1) {
        time_t last = (time_t)strtol(fh.filename + 2, NULL, 10);
        if (last < recent)
            break;
        top.gid = fh.gid;
        strlcpy(top.board, bp->filename, sizeof(top.board));
        top_t *entry = hash_get(ht, &top, HASH_KEY_STRING);
        if (entry) {
            entry->count++;
            if (last > entry->last)
                entry->last = last;
            if (fh.id == fh.gid) {
                strlcpy(entry->title, fh.title, sizeof(entry->title));
                strlcpy(entry->owner, fh.owner, sizeof(entry->owner));
            }
        } else {
            entry = top_alloc();
            entry->gid = fh.gid;
            strlcpy(entry->board, bp->filename, sizeof(entry->board));
            strlcpy(entry->title, fh.title, sizeof(entry->title));
            strlcpy(entry->owner, fh.owner, sizeof(entry->owner));
            entry->count = 1;
            entry->last = last;
            hash_set(ht, (char *)entry, HASH_KEY_STRING, entry);
        }
        if (ftell(fp) < 2 * sizeof(fh))
            break;
        fseek(fp, -2 * sizeof(fh), SEEK_CUR);
    }
    fclose(fp);
}
Exemplo n.º 3
0
int32_t load_clusters(char* fname, uintptr_t unfiltered_indiv_ct, uintptr_t* indiv_exclude, uintptr_t indiv_ct, char* person_ids, uintptr_t max_person_id_len, uint32_t mwithin_col, uint32_t keep_na, uintptr_t* cluster_ct_ptr, uint32_t** cluster_map_ptr, uint32_t** cluster_starts_ptr, char** cluster_ids_ptr, uintptr_t* max_cluster_id_len_ptr) {
  unsigned char* wkspace_mark = wkspace_base;
  FILE* infile = NULL;
  uintptr_t indiv_ctl = (indiv_ct + (BITCT - 1)) / BITCT;
  uintptr_t topsize = 0;
  int32_t retval = 0;
  char* idbuf = &(tbuf[MAXLINELEN]);
  uintptr_t max_cluster_id_len = 0;
  uintptr_t assigned_ct = 0;
  uintptr_t cluster_ct = 0;
  Ll_str* cluster_names = NULL;
  uintptr_t* already_seen;
  char* cluster_ids;
  uint32_t* cluster_map;
  uint32_t* cluster_starts;
  uint32_t* tmp_cluster_starts;
  uintptr_t topsize_bak;
  Ll_str* llptr;
  char* sorted_ids;
  uint32_t* id_map;
  char* fam_id;
  char* indiv_id;
  char* cluster_name_ptr;
  uintptr_t ulii;
  int32_t sorted_idx;
  uint32_t indiv_uidx;
  uint32_t slen;
  uint32_t uii;

  sorted_ids = (char*)top_alloc(&topsize, indiv_ct * max_person_id_len);
  if (!sorted_ids) {
    goto load_clusters_ret_NOMEM;
  }
  id_map = (uint32_t*)top_alloc(&topsize, indiv_ct * sizeof(int32_t));
  if (!id_map) {
    goto load_clusters_ret_NOMEM;
  }
  topsize_bak = topsize;
  already_seen = (uintptr_t*)top_alloc(&topsize, indiv_ctl * sizeof(intptr_t));
  if (!already_seen) {
    goto load_clusters_ret_NOMEM;
  }
  fill_ulong_zero(already_seen, indiv_ctl);
  memcpy(sorted_ids, person_ids, indiv_ct * max_person_id_len);
  wkspace_left -= topsize;
  retval = sort_item_ids_noalloc(sorted_ids, id_map, unfiltered_indiv_ct, indiv_exclude, indiv_ct, person_ids, max_person_id_len, 0, 0, strcmp_deref);
  wkspace_left += topsize;
  if (retval) {
    goto load_clusters_ret_1;
  }

  // two-pass load
  // 1. load cluster names, track longest length, validate format, verify no
  //    individual ID appears multiple times
  // intermission. sort cluster names, purge duplicates, allocate memory for
  //               return values
  // 2. populate return name arrays
  if (fopen_checked(&infile, fname, "r")) {
    goto load_clusters_ret_OPEN_FAIL;
  }
  tbuf[MAXLINELEN - 1] = ' ';
  if (!mwithin_col) {
    mwithin_col = 1;
  }
  while (fgets(tbuf, MAXLINELEN, infile)) {
    if (!tbuf[MAXLINELEN - 1]) {
      logprint("Error: Pathologically long line in --within file.\n");
      goto load_clusters_ret_INVALID_FORMAT;
    }
    fam_id = skip_initial_spaces(tbuf);
    if (is_eoln_kns(*fam_id)) {
      continue;
    }
    indiv_id = next_item(fam_id);
    cluster_name_ptr = next_item_mult(indiv_id, mwithin_col);
    if (no_more_items_kns(cluster_name_ptr)) {
      logprint("Error: Fewer entries than expected in --within file line.\n");
      goto load_clusters_ret_INVALID_FORMAT;
    }
    sorted_idx = bsearch_fam_indiv(idbuf, sorted_ids, max_person_id_len, indiv_ct, fam_id, indiv_id);
    if (sorted_idx == -1) {
      continue;
    }
    if (is_set(already_seen, sorted_idx)) {
      idbuf[strlen_se(fam_id)] = ' ';
      sprintf(logbuf, "Error: Duplicate individual %s in --within file.\n", idbuf);
      logprintb();
      goto load_clusters_ret_INVALID_FORMAT;
    }
    set_bit_noct(already_seen, sorted_idx);
    slen = strlen_se(cluster_name_ptr);
    if ((!keep_na) && (slen == 2) && (!memcmp(cluster_name_ptr, "NA", 2))) {
      // postponed to here because, even without 'keep-NA', we do not want to
      // ignore cluster=NA lines for the purpose of detecting duplicate indivs
      continue;
    }
    if (slen >= max_cluster_id_len) {
      max_cluster_id_len = slen + 1;
    }
    llptr = top_alloc_llstr(&topsize, slen + 1);
    llptr->next = cluster_names;
    memcpyx(llptr->ss, cluster_name_ptr, slen, '\0');
    cluster_names = llptr;
    assigned_ct++;
  }
  if (!feof(infile)) {
    goto load_clusters_ret_READ_FAIL;
  }
  if (cluster_names) {
    *max_cluster_id_len_ptr = max_cluster_id_len;
    wkspace_left -= topsize;
    if (wkspace_alloc_c_checked(cluster_ids_ptr, assigned_ct * max_cluster_id_len)) {
      goto load_clusters_ret_NOMEM2;
    }
    cluster_ids = *cluster_ids_ptr;
    for (ulii = 0; ulii < assigned_ct; ulii++) {
      strcpy(&(cluster_ids[ulii * max_cluster_id_len]), cluster_names->ss);
      cluster_names = cluster_names->next;
    }
    // deallocate cluster ID linked list and duplicate indiv ID detector from
    // top of stack, allocate cluster size tracker
    wkspace_left += topsize;
    topsize = topsize_bak;
    tmp_cluster_starts = (uint32_t*)top_alloc(&topsize, assigned_ct * sizeof(int32_t));
    if (!tmp_cluster_starts) {
      goto load_clusters_ret_NOMEM;
    }
    wkspace_left -= topsize;
    // may as well use natural sort of cluster names
    qsort(cluster_ids, assigned_ct, max_cluster_id_len, strcmp_natural);
    cluster_ct = collapse_duplicate_ids(cluster_ids, assigned_ct, max_cluster_id_len, tmp_cluster_starts);
    *cluster_ct_ptr = cluster_ct;
    // shrink
    wkspace_reset(cluster_ids);
    wkspace_alloc_c_checked(cluster_ids_ptr, cluster_ct * max_cluster_id_len);
    if (wkspace_alloc_ui_checked(cluster_map_ptr, assigned_ct * sizeof(int32_t)) ||
        wkspace_alloc_ui_checked(cluster_starts_ptr, (cluster_ct + 1) * sizeof(int32_t))) {
      goto load_clusters_ret_NOMEM2;
    }
    wkspace_left += topsize;
    cluster_map = *cluster_map_ptr;
    cluster_starts = *cluster_starts_ptr;
    memcpy(cluster_starts, tmp_cluster_starts, cluster_ct * sizeof(int32_t));
    cluster_starts[cluster_ct] = assigned_ct;
    rewind(infile);
    // second pass
    while (fgets(tbuf, MAXLINELEN, infile)) {
      fam_id = skip_initial_spaces(tbuf);
      if (is_eoln_kns(*fam_id)) {
	continue;
      }
      indiv_id = next_item(fam_id);
      cluster_name_ptr = next_item_mult(indiv_id, mwithin_col);
      slen = strlen_se(cluster_name_ptr);
      if ((!keep_na) && (slen == 2) && (!memcmp(cluster_name_ptr, "NA", 2))) {
	continue;
      }
      sorted_idx = bsearch_fam_indiv(idbuf, sorted_ids, max_person_id_len, indiv_ct, fam_id, indiv_id);
      if (sorted_idx == -1) {
	continue;
      }
      indiv_uidx = id_map[(uint32_t)sorted_idx];
      cluster_name_ptr[slen] = '\0';
      sorted_idx = bsearch_str_natural(cluster_name_ptr, cluster_ids, max_cluster_id_len, 0, cluster_ct - 1);
      uii = tmp_cluster_starts[(uint32_t)sorted_idx];
      tmp_cluster_starts[(uint32_t)sorted_idx] += 1;
      cluster_map[uii] = indiv_uidx;
    }
    if (!feof(infile)) {
      goto load_clusters_ret_READ_FAIL;
    }
    for (ulii = 0; ulii < cluster_ct; ulii++) {
      if (cluster_starts[ulii + 1] - cluster_starts[ulii] > 1) {
#ifdef __cplusplus
	std::sort(&(cluster_map[cluster_starts[ulii]]), &(cluster_map[cluster_starts[ulii + 1]]));
#else
	qsort(&(cluster_map[cluster_starts[ulii]]), cluster_starts[ulii + 1] - cluster_starts[ulii], sizeof(int32_t), intcmp);
#endif
      }
    }
    sprintf(logbuf, "--within: %" PRIuPTR " cluster%s loaded, covering a total of %" PRIuPTR " %s.\n", cluster_ct, (cluster_ct == 1)? "" : "s", assigned_ct, species_str(assigned_ct));
    logprintb();
  } else {
    logprint("Warning: No individuals named in --within file remain in the current analysis.\n");
  }

  while (0) {
  load_clusters_ret_NOMEM2:
    wkspace_left += topsize;
  load_clusters_ret_NOMEM:
    retval = RET_NOMEM;
    break;
  load_clusters_ret_OPEN_FAIL:
    retval = RET_OPEN_FAIL;
    break;
  load_clusters_ret_READ_FAIL:
    retval = RET_READ_FAIL;
    break;
  load_clusters_ret_INVALID_FORMAT:
    retval = RET_INVALID_FORMAT;
    break;
  }
 load_clusters_ret_1:
  if (retval) {
    wkspace_reset(wkspace_mark);
  }
  fclose_cond(infile);
  return retval;
}