示例#1
0
文件: page_db.c 项目: plafl/aduana
/** Close database */
PageDBError
page_db_delete(PageDB *db) {
     if (!db)
          return 0;

     mdb_env_close(db->txn_manager->env);
     if (txn_manager_delete(db->txn_manager) != 0) {
          page_db_set_error(db, page_db_error_internal, __func__);
          page_db_add_error(db, "deleting transaction manager");
          page_db_add_error(db, db->txn_manager->error->message);
          return db->error->code;
     }

     if (!db->persist) {
          char *data = build_path(db->path, "data.mdb");
          char *lock = build_path(db->path, "lock.mdb");

          // proceeed even the data files cannot be deleted from disk
          (void)remove(data);
          (void)remove(lock);
          (void)remove(db->path);

          free(data);
          free(lock);
     }
     free(db->path);
     domain_temp_delete(db->domain_temp);
     error_delete(db->error);
     free(db);
     return 0;
}
示例#2
0
PageRankScorerError
page_rank_scorer_delete(PageRankScorer *prs) {
     if (page_rank_delete(prs->page_rank) != 0) {
          page_rank_scorer_set_error(prs,  page_rank_scorer_error_internal, __func__);
          page_rank_scorer_add_error(prs, "deleting PageRank");
          page_rank_scorer_add_error(prs,
                                     prs->page_rank?
                                     prs->page_rank->error->message
                                     : "unknown error");
          return prs->error->code;
     }
     error_delete(prs->error);
     free(prs);
     return 0;
}
示例#3
0
文件: error.c 项目: Oneiroi/haka
FINI static void _error_fini()
{
	{
		struct local_error *context = (struct local_error *)local_storage_get(&local_error_key);
		if (context) {
			error_delete(context);
		}
	}

	error_is_valid = false;

	{
		UNUSED const bool ret = local_storage_destroy(&local_error_key);
		assert(ret);
	}
}
示例#4
0
FreqSchedulerError
freq_scheduler_cursor_write(FreqScheduler *sch,
			    MDB_cursor *cursor,
			    uint64_t hash,
			    float freq) {
     if (freq <= 0)
	  return 0;

     ScheduleKey sk = {
	  .score = 0,
	  .hash  = hash
     };
     MDB_val key = {
	  .mv_size = sizeof(sk),
	  .mv_data = &sk,
     };

     MDB_val val = {
	  .mv_size = sizeof(float),
	  .mv_data = &freq,
     };
     int mdb_rc;
     if ((mdb_rc = mdb_cursor_put(cursor, &key, &val, 0)) != 0) {
	  freq_scheduler_set_error(sch, freq_scheduler_error_internal, __func__);
	  freq_scheduler_add_error(sch, "adding page to schedule");
	  freq_scheduler_add_error(sch, mdb_strerror(mdb_rc));
     }
     return sch->error->code;

}

FreqSchedulerError
freq_scheduler_load_simple(FreqScheduler *sch,
                           float freq_default,
                           float freq_scale) {
     char *error1 = 0;
     char *error2 = 0;
     MDB_cursor *cursor = 0;

     HashInfoStream *st;
     if (hashinfo_stream_new(&st, sch->page_db) != 0) {
          error1 = "creating stream";
          error2 = st? sch->page_db->error->message: "NULL";
          goto on_error;
     }

     if (freq_scheduler_cursor_open(sch, &cursor) != 0)
	  goto on_error;

     StreamState ss;
     uint64_t hash;
     PageInfo *pi;

     while ((ss = hashinfo_stream_next(st, &hash, &pi)) == stream_state_next) {
          if ((pi->n_crawls > 0) &&
	      ((sch->max_n_crawls == 0) || (pi->n_crawls < sch->max_n_crawls)) &&
	      !page_info_is_seed(pi)){

               float freq = freq_default;
               if (freq_scale > 0) {
                    float rate = page_info_rate(pi);
                    if (rate > 0) {
                         freq = freq_scale * rate;
                    }
               }
	       if (freq_scheduler_cursor_write(sch, cursor, hash, freq) != 0)
		    goto on_error;
          }
          page_info_delete(pi);
     }
     if (ss != stream_state_end) {
          error1 = "incorrect stream state";
          error2 = 0;
          hashinfo_stream_delete(st);
          goto on_error;
     }
     hashinfo_stream_delete(st);

     if (freq_scheduler_cursor_commit(sch, cursor) != 0)
	  goto on_error;

     return sch->error->code;

on_error:
     freq_scheduler_cursor_abort(sch, cursor);

     freq_scheduler_set_error(sch, freq_scheduler_error_internal, __func__);
     freq_scheduler_add_error(sch, error1);
     freq_scheduler_add_error(sch, error2);

     return sch->error->code;
}

FreqSchedulerError
freq_scheduler_load_mmap(FreqScheduler *sch, MMapArray *freqs) {
     char *error1 = 0;
     char *error2 = 0;
     MDB_cursor *cursor = 0;

     if (txn_manager_expand(
              sch->txn_manager,
              2*freqs->n_elements*freqs->element_size) != 0) {
          error1 = "resizing database";
          error2 = sch->txn_manager->error->message;
          goto on_error;
     }

     if (freq_scheduler_cursor_open(sch, &cursor) != 0)
	  goto on_error;

     for (size_t i=0; i<freqs->n_elements; ++i) {
          PageFreq *f = mmap_array_idx(freqs, i);
          ScheduleKey sk = {
               .score = 1.0/f->freq,
               .hash = f->hash
          };
          MDB_val key = {
               .mv_size = sizeof(sk),
               .mv_data = &sk,
          };
          MDB_val val = {
               .mv_size = sizeof(float),
               .mv_data = &f->freq,
          };
	  int mdb_rc;
          if ((mdb_rc = mdb_cursor_put(cursor, &key, &val, 0)) != 0) {
               error1 = "adding page to schedule";
               error2 = mdb_strerror(mdb_rc);
               goto on_error;
          }
     }
     if (freq_scheduler_cursor_commit(sch, cursor) != 0)
	  goto on_error;

     return sch->error->code;

on_error:
     freq_scheduler_cursor_abort(sch, cursor);

     freq_scheduler_set_error(sch, freq_scheduler_error_internal, __func__);
     freq_scheduler_add_error(sch, error1);
     freq_scheduler_add_error(sch, error2);

     return sch->error->code;
}

FreqSchedulerError
freq_scheduler_request(FreqScheduler *sch,
                       size_t max_requests,
                       PageRequest **request) {
     char *error1 = 0;
     char *error2 = 0;

     MDB_cursor *cursor = 0;

     if (freq_scheduler_cursor_open(sch, &cursor) != 0)
	  goto on_error;

     PageRequest *req = *request = page_request_new(max_requests);
     if (!req) {
          error1 = "allocating memory";
          goto on_error;
     }

     int interrupt_requests = 0;
     while ((req->n_urls < max_requests) && !interrupt_requests) {
          MDB_val key;
          MDB_val val;
          ScheduleKey sk;
          float freq;
	  int mdb_rc;

	  int crawl = 0;
          switch (mdb_rc = mdb_cursor_get(cursor, &key, &val, MDB_FIRST)) {
          case 0:
	       // copy data before deleting cursor
               sk = *(ScheduleKey*)key.mv_data;
               freq = *(float*)val.mv_data;


               PageInfo *pi = 0;
               if (page_db_get_info(sch->page_db, sk.hash, &pi) != 0) {
                    error1 = "retrieving PageInfo from PageDB";
                    error2 = sch->page_db->error->message;
                    goto on_error;
               }

               if (pi) {
                    if (sch->margin >= 0) {
                         double elapsed = difftime(time(0), 0) - pi->last_crawl;
                         if (elapsed < 1.0/(freq*(1.0 + sch->margin)))
                              interrupt_requests = 1;
                    }
		    crawl = (sch->max_n_crawls == 0) || (pi->n_crawls < sch->max_n_crawls);
	       }
	       if (!interrupt_requests) {
		    if ((mdb_rc = mdb_cursor_del(cursor, 0)) != 0) {
			 error1 = "deleting head of schedule";
			 error2 = mdb_strerror(mdb_rc);
			 goto on_error;
		    }
		    if (crawl) {
			 if (page_request_add_url(req, pi->url) != 0) {
			      error1 = "adding url to request";
			      goto on_error;
			 }

			 sk.score += 1.0/freq;

			 val.mv_data = &freq;
			 key.mv_data = &sk;
			 if ((mdb_rc = mdb_cursor_put(cursor, &key, &val, 0)) != 0) {
			      error1 = "moving element inside schedule";
			      error2 = mdb_strerror(mdb_rc);
			      goto on_error;
			 }
		    }
	       }
	       page_info_delete(pi);

               break;

          case MDB_NOTFOUND: // no more pages left
               interrupt_requests = 1;
               break;
          default:
               error1 = "getting head of schedule";
               error2 = mdb_strerror(mdb_rc);
               goto on_error;
          }
     }
     if (freq_scheduler_cursor_commit(sch, cursor) != 0)
	  goto on_error;

     return sch->error->code;
on_error:
     freq_scheduler_cursor_abort(sch, cursor);

     freq_scheduler_set_error(sch, freq_scheduler_error_internal, __func__);
     freq_scheduler_add_error(sch, error1);
     freq_scheduler_add_error(sch, error2);

     return sch->error->code;
}

FreqSchedulerError
freq_scheduler_add(FreqScheduler *sch, const CrawledPage *page) {
     if (page_db_add(sch->page_db, page, 0) != 0) {
          freq_scheduler_set_error(sch, freq_scheduler_error_internal, __func__);
          freq_scheduler_add_error(sch, "adding crawled page");
          freq_scheduler_add_error(sch, sch->page_db->error->message);
     }
     return sch->error->code;
}

void
freq_scheduler_delete(FreqScheduler *sch) {
     mdb_env_close(sch->txn_manager->env);
     (void)txn_manager_delete(sch->txn_manager);
     if (!sch->persist) {
          char *data = build_path(sch->path, "data.mdb");
          char *lock = build_path(sch->path, "lock.mdb");
          remove(data);
          remove(lock);
          free(data);
          free(lock);

          remove(sch->path);
     }
     free(sch->path);
     error_delete(sch->error);
     free(sch);
}

FreqSchedulerError
freq_scheduler_dump(FreqScheduler *sch, FILE *output) {
     MDB_cursor *cursor;
     if (freq_scheduler_cursor_open(sch, &cursor) != 0)
	  return sch->error->code;

     int end = 0;
     MDB_cursor_op cursor_op = MDB_FIRST;
     do {
	  int mdb_rc;
	  MDB_val key;
	  MDB_val val;
	  ScheduleKey *key_data;
	  float *val_data;
	  switch (mdb_rc = mdb_cursor_get(cursor, &key, &val, cursor_op)) {
	  case 0:
	       key_data = (ScheduleKey*)key.mv_data;
	       val_data = (float*)val.mv_data;
	       fprintf(output, "%.2e %016"PRIx64" %.2e\n",
		       key_data->score, key_data->hash, *val_data);
	       break;
	  case MDB_NOTFOUND:
	       end = 1;
	       break;
	  default:
	       freq_scheduler_set_error(sch, freq_scheduler_error_internal, __func__);
	       freq_scheduler_add_error(sch, "iterating over database");
	       freq_scheduler_add_error(sch, mdb_strerror(mdb_rc));
	       end = 1;
	       break;
	  }
	  cursor_op = MDB_NEXT;
     } while (!end);
     freq_scheduler_cursor_abort(sch, cursor);

     return sch->error->code;
}