コード例 #1
0
ファイル: hashmap.c プロジェクト: gabrielvv/NoSqlLite
// OK
// Ex :  hashmap_put(map, 'student.rate', 56);
//       hashmap_traverse(map, 'student.rate')   56
void hashmap_put(t_hashmap* map, char* path, void* value, Type type) {

  printf("\n---------------MAP_PUT-------------------");
  printf("\nmap->size: %d\tmap->slots*load_factor: %1.1f\tpath: %s\tvalue: %s\ttype: %s", map->size, map->slots * map->load_factor, path, (char*)value,printType(type));
  if(map->size >= (map->slots * map->load_factor)){
    hashmap_resize(map);
    printf("\nDEBUG: after resize map->slots: %d\tmap->size: %d\n", map->slots, map->size);
  }
  // printf("DEBUG: not resizing\n");
  int slot = hashmap_hashcode(path, map->slots);
  printf("\thashcode: %d\n", slot);
  t_hashmap_entry** entries = &(map->entries[slot]);
  // printf("DEBUG: before while\nvalue=%s", );
  while ((*entries) != NULL) {
    if (strcmp((*entries)->key, path) == 0) {
      (*entries)->value = value;
      (*entries)->type = type;
      return;
    }
    entries = &((*entries)->next);
  }
  (*entries) = hashmap_entry_create(path, value,type);
  map->size++;
}
コード例 #2
0
ファイル: sequential_search.c プロジェクト: AndreasSong/femto
error_t run_sequential_search( sequential_search_state_t* state, 
                    int64_t doc_len,
                    const unsigned char* doc_contents,
                    uint64_t doc_num )
{
  error_t err;
  int got;
  int j;
  size_t chunk = 1*1024*1024; // 1 MB chunk.
  size_t overlap = 1024; // handle extra length of up to 1024.
  result_type_t type = state->type;
  int keepmatches = state->keepmatches;

  memset(state->docmatched, 0, state->n_regexps);

  // start results writers for all of our regexps.
  for( j = 0; j < state->n_regexps; j++ ) {
    if( type != 0 ) {
      err = results_writer_create(&state->regexps[j]->results_writer, type);
      if( err ) return err;
    }
  }

  for( int64_t i = 0; i < doc_len; i += chunk ) {
    // scan chunk+overlap bytes with each regular expression.

    // Run all of the regexps on the input.
    for( j = 0; j < state->n_regexps; j++ ) {
      sequential_regexp_query_t* s = state->regexps[j];
      size_t cur = 0;
      size_t end = chunk + overlap;
      size_t end_no_overlap = chunk;
      size_t match_start, match_len;
      if( i + end > doc_len ) end = doc_len - i;
      if( i + end_no_overlap > doc_len ) end_no_overlap = doc_len - i;

      if( type == RESULT_TYPE_DOCUMENTS && state->docmatched[j] ) {
        break; // no matching necessary.
      }

      while( cur < end ) {
        const unsigned char* base = doc_contents + i + cur;
        size_t len = end - cur;
        size_t len_no_overlap = end_no_overlap - cur;

        got = seq_match_regexp(s->matcher,
                           base,
                           len,
                           &match_start, &match_len);
        if( got && match_start < len_no_overlap ) {
          // report the result.
          s->query.count++;
          
          if( type == RESULT_TYPE_DOCUMENTS ) {
            
            state->docmatched[j] = 1;
            
            err = results_writer_append(&s->results_writer, doc_num, 0);
            if( err ) return err;
            // once the 1st match is found for a document, we're done!
            return ERR_NOERR;
          } else if( type == RESULT_TYPE_DOC_OFFSETS ) {
            err = results_writer_append(&s->results_writer, doc_num, i + cur + match_start);
            if( err ) return err;
          }
          
          // if we're doing the matches, add to the matches.
          if( keepmatches ) {
            // look for the match in our hashmap.
            seq_search_match_key_t search_key;
            hm_entry_t entry;

            search_key.data = base + match_start;
            search_key.len = match_len;
            entry.key = &search_key;
            entry.value = NULL;

            if( hashmap_retrieve(& s->matches, &entry) ) {
              seq_search_match_value_t* v = (seq_search_match_value_t*) entry.value;
              v->num_matches++;
            } else {
              seq_search_match_key_t* key = malloc(sizeof(seq_search_match_key_t));
              seq_search_match_value_t* v = malloc(sizeof(seq_search_match_value_t));
              unsigned char* data = malloc(search_key.len);
              
              if( ! key ) return ERR_MEM;
              if( ! v ) return ERR_MEM;
              if( ! data ) return ERR_MEM;

              v->num_matches = 1;
              memcpy(data, search_key.data, search_key.len);

              key->data = data;
              key->len = search_key.len;


              // make room for more entries..
              err = hashmap_resize(& s->matches);
              if( err ) return err;

              entry.key = key;
              entry.value = v;
              err = hashmap_insert(& s->matches, &entry);
              if( err ) return err;
            }
          }

          // advance by one.
          cur += match_start + 1;
        } else {
          cur = end;
          // no more matches for this pattern.
          break; 
        } 
      }
    }
  }

  // Boolean query processing will happen in finish query.
  return ERR_NOERR;
}
コード例 #3
0
static
error_t do_dedup_file(const char* path)
{
  struct stat st;
  error_t err;
  int rc;
  void* data;
  int fd;
  MYHASH_key h;
  hm_entry_t entry;

  if( NULL != strchr(path, GLOM_CHAR) ) return ERR_IO_STR_OBJ("Path contains glom character ", path);

  // Otherwise, get the document length, etc.
  rc = stat(path, &st);
  if( rc != 0 ) {
   return ERR_IO_STR_OBJ("Could not stat", path);
  }

  if( ! S_ISREG(st.st_mode) ) {
    return ERR_IO_STR_OBJ("Not regular file", path);
  }

  fd = open(path, O_RDONLY);
  if( fd < 0 ) {
    return ERR_IO_STR_OBJ("Could not open", path);
  }

  if( st.st_size > 0 ) {
    data = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);

    if( data == NULL || data == MAP_FAILED ) {
      return ERR_IO_STR_OBJ("Could not mmap", path);
    }

    // madvise sequential.
    err = advise_sequential_pages(data, st.st_size);
    warn_if_err(err);
    // failed madvise does not cause total failure.
 
  } else {
    // A size 0 file!
    data = NULL;
  }

  // Deduplicate this file!
  memset(&h, 0, sizeof(MYHASH_key));
  SHA1(data, st.st_size, &h.h[0]);
  //printf("chk "); MYHASH_print(&h, path);
  // Populate the hashtable.
  entry.key = &h;
  entry.value = NULL;
  if( hashmap_retrieve(&dedup_table, &entry) ) {
   MYHASH_key* k = (MYHASH_key*) entry.key;
    MYHASH_value* v = (MYHASH_value*) entry.value;
    // Got an entry
    // Glom path into the existing hash entry.
    // foo\0 -> foo|bar\0
    err = append_array(&v->npaths, &v->paths, sizeof(char*), &path);
    if( err ) return err;
    // No need to reinsert since we just updated the value.

    // Add this path to the dups hashtable.
    entry.key = (void*) path;
    entry.value = NULL;
    err = hashmap_resize(&dups);
    if( err ) return err;
    err = hashmap_insert(&dups, &entry);
    if( err ) return err;

    printf("dup "); MYHASH_print(k, path);
    entry.key = (void*) path;
    entry.value = NULL;
    assert( hashmap_retrieve(&dups, &entry) );
    assert(entry.value == NULL);

  } else {
    MYHASH_key* k = malloc(sizeof(MYHASH_key));
    MYHASH_value* v = malloc(sizeof(MYHASH_value));
    if( !k ) return ERR_MEM;
    if( !v ) return ERR_MEM;
    *k = h;
    v->npaths = 0;
    v->paths = NULL;
    err = append_array(&v->npaths, &v->paths, sizeof(char*), &path);
    if( err ) return err;
    // Add this hash to the dedup table.
    entry.key = k;
    entry.value = v;
    err = hashmap_resize(&dedup_table);
    if( err ) return err;
    err = hashmap_insert(&dedup_table, &entry);
    if( err ) return err;

    entry.key = k;
    entry.value = NULL;
    assert( hashmap_retrieve(&dedup_table, &entry) );
    assert(entry.value == v);

    // Add this path to the dups hashtable.
    entry.key = (void*) path;
    entry.value = v;
    err = hashmap_resize(&dups);
    if( err ) return err;
    err = hashmap_insert(&dups, &entry);
    if( err ) return err;

    printf("new "); MYHASH_print(k, path);
    entry.key = (void*) path;
    entry.value = NULL;
    assert( hashmap_retrieve(&dups, &entry) );
    assert(entry.value == v);
  }

  if( data ) {
    rc = munmap(data, st.st_size);
    if( rc ) {
      return ERR_IO_STR("Could not munmap");
    }
  }

  rc = close(fd);
  if( rc ) {
    return ERR_IO_STR_OBJ("Could not close", path);
  }

  return ERR_NOERR;
}