예제 #1
0
int
main(int argc, char **argv)
{
	g_start_sec = cf_get_seconds();

	// Initialize cf_thread wrapper.
	cf_thread_init();

	// Initialize memory allocation.
	cf_alloc_init();

	// Initialize fault management framework.
	cf_fault_init();

	// Setup signal handlers.
	as_signal_setup();

	// Initialize TLS library.
	tls_check_init();

	int opt;
	int opt_i;
	const char *config_file = DEFAULT_CONFIG_FILE;
	bool run_in_foreground = false;
	bool new_style_daemon = false;
	bool cold_start_cmd = false;
	uint32_t instance = 0;

	// Parse command line options.
	while ((opt = getopt_long(argc, argv, "", CMD_OPTS, &opt_i)) != -1) {
		switch (opt) {
		case 'h':
			// printf() since we want stdout and don't want cf_fault's prefix.
			printf("%s\n", HELP);
			return 0;
		case 'v':
			// printf() since we want stdout and don't want cf_fault's prefix.
			printf("%s build %s\n", aerospike_build_type, aerospike_build_id);
			return 0;
		case 'f':
			config_file = cf_strdup(optarg);
			break;
		case 'F':
			// As a "new-style" daemon(*), asd runs in the foreground and
			// ignores the following configuration items:
			//  - user ('user')
			//	- group ('group')
			//  - PID file ('pidfile')
			//
			// If ignoring configuration items, or if the 'console' sink is not
			// specified, warnings will appear in stderr.
			//
			// (*) http://0pointer.de/public/systemd-man/daemon.html#New-Style%20Daemons
			run_in_foreground = true;
			new_style_daemon = true;
			break;
		case 'd':
			run_in_foreground = true;
			break;
		case 'c':
			cold_start_cmd = true;
			break;
		case 'n':
			instance = (uint32_t)strtol(optarg, NULL, 0);
			break;
		default:
			// fprintf() since we don't want cf_fault's prefix.
			fprintf(stderr, "%s\n", USAGE);
			return 1;
		}
	}

	// Set all fields in the global runtime configuration instance. This parses
	// the configuration file, and creates as_namespace objects. (Return value
	// is a shortcut pointer to the global runtime configuration instance.)
	as_config *c = as_config_init(config_file);

	// Detect NUMA topology and, if requested, prepare for CPU and NUMA pinning.
	cf_topo_config(c->auto_pin, (cf_topo_numa_node_index)instance,
			&c->service.bind);

	// Perform privilege separation as necessary. If configured user & group
	// don't have root privileges, all resources created or reopened past this
	// point must be set up so that they are accessible without root privileges.
	// If not, the process will self-terminate with (hopefully!) a log message
	// indicating which resource is not set up properly.
	cf_process_privsep(c->uid, c->gid);

	//
	// All resources such as files, devices, and shared memory must be created
	// or reopened below this line! (The configuration file is the only thing
	// that must be opened above, in order to parse the user & group.)
	//==========================================================================

	// A "new-style" daemon expects console logging to be configured. (If not,
	// log messages won't be seen via the standard path.)
	if (new_style_daemon) {
		if (! cf_fault_console_is_held()) {
			cf_warning(AS_AS, "in new-style daemon mode, console logging is not configured");
		}
	}

	// Activate log sinks. Up to this point, 'cf_' log output goes to stderr,
	// filtered according to NO_SINKS_LIMIT in fault.c. After this point, 'cf_'
	// log output will appear in all log file sinks specified in configuration,
	// with specified filtering. If console sink is specified in configuration,
	// 'cf_' log output will continue going to stderr, but filtering will switch
	// from NO_SINKS_LIMIT to that specified in console sink configuration.
	if (0 != cf_fault_sink_activate_all_held()) {
		// Specifics of failure are logged in cf_fault_sink_activate_all_held().
		cf_crash_nostack(AS_AS, "can't open log sink(s)");
	}

	// Daemonize asd if specified. After daemonization, output to stderr will no
	// longer appear in terminal. Instead, check /tmp/aerospike-console.<pid>
	// for console output.
	if (! run_in_foreground && c->run_as_daemon) {
		// Don't close any open files when daemonizing. At this point only log
		// sink files are open - instruct cf_process_daemonize() to ignore them.
		int open_fds[CF_FAULT_SINKS_MAX];
		int num_open_fds = cf_fault_sink_get_fd_list(open_fds);

		cf_process_daemonize(open_fds, num_open_fds);
	}

	// Log which build this is - should be the first line in the log file.
	cf_info(AS_AS, "<><><><><><><><><><>  %s build %s  <><><><><><><><><><>",
			aerospike_build_type, aerospike_build_id);

	// Includes echoing the configuration file to log.
	as_config_post_process(c, config_file);

	xdr_config_post_process();

	// If we allocated a non-default config file name, free it.
	if (config_file != DEFAULT_CONFIG_FILE) {
		cf_free((void*)config_file);
	}

	// Write the pid file, if specified.
	if (! new_style_daemon) {
		write_pidfile(c->pidfile);
	}
	else {
		if (c->pidfile) {
			cf_warning(AS_AS, "will not write PID file in new-style daemon mode");
		}
	}

	// Check that required directories are set up properly.
	validate_directory(c->work_directory, "work");
	validate_directory(c->mod_lua.user_path, "Lua user");
	validate_smd_directory();

	// Initialize subsystems. At this point we're allocating local resources,
	// starting worker threads, etc. (But no communication with other server
	// nodes or clients yet.)

	as_json_init();				// Jansson JSON API used by System Metadata
	as_index_tree_gc_init();	// thread to purge dropped index trees
	as_sindex_thr_init();		// defrag secondary index (ok during population)
	as_nsup_init();				// load previous evict-void-time(s)

	// Initialize namespaces. Each namespace decides here whether it will do a
	// warm or cold start. Index arenas, partition structures and index tree
	// structures are initialized. Secondary index system metadata is restored.
	as_namespaces_init(cold_start_cmd, instance);

	// Initialize the storage system. For warm and cool restarts, this includes
	// fully resuming persisted indexes - this may take a few minutes.
	as_storage_init();

	// Migrate memory to correct NUMA node (includes resumed index arenas).
	cf_topo_migrate_memory();

	// Drop capabilities that we kept only for initialization.
	cf_process_drop_startup_caps();

	// Activate the storage system. For cold starts and cool restarts, this
	// includes full drive scans - this may take several hours. The defrag
	// subsystem starts operating at the end of this call.
	as_storage_load();

	// Populate all secondary indexes. This may block for a long time.
	as_sindex_boot_populateall();

	cf_info(AS_AS, "initializing services...");

	cf_dns_init();				// DNS resolver
	as_netio_init();			// query responses
	as_security_init();			// security features
	as_tsvc_init();				// all transaction handling
	as_hb_init();				// inter-node heartbeat
	as_skew_monitor_init();		// clock skew monitor
	as_fabric_init();			// inter-node communications
	as_exchange_init();			// initialize the cluster exchange subsystem
	as_clustering_init();		// clustering-v5 start
	as_info_init();				// info transaction handling
	as_migrate_init();			// move data between nodes
	as_proxy_init();			// do work on behalf of others
	as_rw_init();				// read & write service
	as_query_init();			// query transaction handling
	as_udf_init();				// user-defined functions
	as_scan_init();				// scan a namespace or set
	as_batch_init();			// batch transaction handling
	as_xdr_init();				// cross data-center replication
	as_mon_init();				// monitor

	// Wait for enough available storage. We've been defragging all along, but
	// here we wait until it's enough. This may block for a long time.
	as_storage_wait_for_defrag();

	// Start subsystems. At this point we may begin communicating with other
	// cluster nodes, and ultimately with clients.

	as_smd_start();				// enables receiving cluster state change events
	as_health_start();			// starts before fabric and hb to capture them
	as_fabric_start();			// may send & receive fabric messages
	as_xdr_start();				// XDR should start before it joins other nodes
	as_hb_start();				// start inter-node heartbeat
	as_exchange_start();		// start the cluster exchange subsystem
	as_clustering_start();		// clustering-v5 start
	as_nsup_start();			// may send evict-void-time(s) to other nodes
	as_service_start();			// server will now receive client transactions
	as_info_port_start();		// server will now receive info transactions
	as_ticker_start();			// only after everything else is started

	// Relevant for enterprise edition only.
	as_storage_start_tomb_raider();

	// Log a service-ready message.
	cf_info(AS_AS, "service ready: soon there will be cake!");

	//--------------------------------------------
	// Startup is done. This thread will now wait
	// quietly for a shutdown signal.
	//

	// Stop this thread from finishing. Intentionally deadlocking on a mutex is
	// a remarkably efficient way to do this.
	pthread_mutex_lock(&g_main_deadlock);
	g_startup_complete = true;
	pthread_mutex_lock(&g_main_deadlock);

	// When the service is running, you are here (deadlocked) - the signals that
	// stop the service (yes, these signals always occur in this thread) will
	// unlock the mutex, allowing us to continue.

	g_shutdown_started = true;
	pthread_mutex_unlock(&g_main_deadlock);
	pthread_mutex_destroy(&g_main_deadlock);

	//--------------------------------------------
	// Received a shutdown signal.
	//

	as_storage_shutdown(instance);
	as_xdr_shutdown();

	cf_info(AS_AS, "finished clean shutdown - exiting");

	// If shutdown was totally clean (all threads joined) we could just return,
	// but for now we exit to make sure all threads die.
#ifdef DOPROFILE
	exit(0); // exit(0) so profile build actually dumps gmon.out
#else
	_exit(0);
#endif

	return 0;
}
예제 #2
0
int
main(int argc, char **argv)
{
#ifdef USE_ASM
	as_mallocation_t asm_array[MAX_NUM_MALLOCATIONS];

	// Zero-out the statically-allocated array of memory allocation locations.
	memset(asm_array, 0, sizeof(asm_array));

	// Set the ASMalloc callback user data.
	g_my_cb_udata = asm_array;

	// This must come first to allow initialization of the ASMalloc library.
	asm_init();
#endif // defined(USE_ASM)

#ifdef USE_JEM
	// Initialize the JEMalloc interface.
	jem_init(true);
#endif

	// Initialize ref-counting system.
	cf_rc_init(NULL);

	// Initialize fault management framework.
	cf_fault_init();

	// Setup signal handlers.
	as_signal_setup();

	// Initialize the Jansson JSON API.
	as_json_init();

	int i;
	int cmd_optidx;
	const char *config_file = DEFAULT_CONFIG_FILE;
	bool run_in_foreground = false;
	bool cold_start_cmd = false;
	uint32_t instance = 0;

	// Parse command line options.
	while (-1 != (i = getopt_long(argc, argv, "", cmd_opts, &cmd_optidx))) {
		switch (i) {
		case 'h':
			// printf() since we want stdout and don't want cf_fault's prefix.
			printf("%s\n", HELP);
			return 0;
		case 'v':
			// printf() since we want stdout and don't want cf_fault's prefix.
			printf("%s build %s\n", aerospike_build_type, aerospike_build_id);
			return 0;
		case 'f':
			config_file = cf_strdup(optarg);
			cf_assert(config_file, AS_AS, CF_CRITICAL, "config filename cf_strdup failed");
			break;
		case 'd':
			run_in_foreground = true;
			break;
		case 'c':
			cold_start_cmd = true;
			break;
		case 'n':
			instance = (uint32_t)strtol(optarg, NULL, 0);
			break;
		default:
			// fprintf() since we don't want cf_fault's prefix.
			fprintf(stderr, "%s\n", USAGE);
			return 1;
		}
	}

	// Set all fields in the global runtime configuration instance. This parses
	// the configuration file, and creates as_namespace objects. (Return value
	// is a shortcut pointer to the global runtime configuration instance.)
	as_config *c = as_config_init(config_file);

#ifdef USE_ASM
	g_asm_hook_enabled = g_asm_cb_enabled = c->asmalloc_enabled;

	long initial_tid = syscall(SYS_gettid);
#endif

#ifdef MEM_COUNT
	// [Note: This should ideally be at the very start of the "main()" function,
	//        but we need to wait until after the config file has been parsed in
	//        order to support run-time configurability.]
	mem_count_init(c->memory_accounting ? MEM_COUNT_ENABLE : MEM_COUNT_DISABLE);
#endif

	// Perform privilege separation as necessary. If configured user & group
	// don't have root privileges, all resources created or reopened past this
	// point must be set up so that they are accessible without root privileges.
	// If not, the process will self-terminate with (hopefully!) a log message
	// indicating which resource is not set up properly.
	if (0 != c->uid && 0 == geteuid()) {
		// To see this log, change NO_SINKS_LIMIT in fault.c:
		cf_info(AS_AS, "privsep to %d %d", c->uid, c->gid);
		cf_process_privsep(c->uid, c->gid);
	}

	//
	// All resources such as files, devices, and shared memory must be created
	// or reopened below this line! (The configuration file is the only thing
	// that must be opened above, in order to parse the user & group.)
	//==========================================================================

	// Activate log sinks. Up to this point, 'cf_' log output goes to stderr,
	// filtered according to NO_SINKS_LIMIT in fault.c. After this point, 'cf_'
	// log output will appear in all log file sinks specified in configuration,
	// with specified filtering. If console sink is specified in configuration,
	// 'cf_' log output will continue going to stderr, but filtering will switch
	// from NO_SINKS_LIMIT to that specified in console sink configuration.
	if (0 != cf_fault_sink_activate_all_held()) {
		// Specifics of failure are logged in cf_fault_sink_activate_all_held().
		cf_crash_nostack(AS_AS, "can't open log sink(s)");
	}

	// Daemonize asd if specified. After daemonization, output to stderr will no
	// longer appear in terminal. Instead, check /tmp/aerospike-console.<pid>
	// for console output.
	if (! run_in_foreground && c->run_as_daemon) {
		// Don't close any open files when daemonizing. At this point only log
		// sink files are open - instruct cf_process_daemonize() to ignore them.
		int open_fds[CF_FAULT_SINKS_MAX];
		int num_open_fds = cf_fault_sink_get_fd_list(open_fds);

		cf_process_daemonize(open_fds, num_open_fds);
	}

#ifdef USE_ASM
	// Log the main thread's Linux Task ID (pre- and post-fork) to the console.
	fprintf(stderr, "Initial main thread tid: %lu\n", initial_tid);

	if (! run_in_foreground && c->run_as_daemon) {
		fprintf(stderr, "Post-daemonize main thread tid: %lu\n",
				syscall(SYS_gettid));
	}
#endif

	// Log which build this is - should be the first line in the log file.
	cf_info(AS_AS, "<><><><><><><><><><>  %s build %s  <><><><><><><><><><>",
			aerospike_build_type, aerospike_build_id);

	// Includes echoing the configuration file to log.
	as_config_post_process(c, config_file);

	// If we allocated a non-default config file name, free it.
	if (config_file != DEFAULT_CONFIG_FILE) {
		cf_free((void*)config_file);
	}

	// Write the pid file, if specified.
	write_pidfile(c->pidfile);

	// Check that required directories are set up properly.
	validate_directory(c->work_directory, "work");
	validate_directory(c->mod_lua.system_path, "Lua system");
	validate_directory(c->mod_lua.user_path, "Lua user");
	validate_smd_directory();

	// Initialize subsystems. At this point we're allocating local resources,
	// starting worker threads, etc. (But no communication with other server
	// nodes or clients yet.)

	as_smd_init();				// System Metadata first - others depend on it
	ai_init();					// before as_storage_init() populates indexes
	as_sindex_thr_init();		// defrag secondary index (ok during population)

	// Initialize namespaces. Each namespace decides here whether it will do a
	// warm or cold start. Index arenas, partition structures and index tree
	// structures are initialized. Secondary index system metadata is restored.
	as_namespaces_init(cold_start_cmd, instance);

	// Initialize the storage system. For cold starts, this includes reading
	// all the objects off the drives. This may block for a long time. The
	// defrag subsystem starts operating at the end of this call.
	as_storage_init();

	// Populate all secondary indexes. This may block for a long time.
	as_sindex_boot_populateall();

	cf_info(AS_AS, "initializing services...");

	as_netio_init();
	as_security_init();			// security features
	as_tsvc_init();				// all transaction handling
	as_hb_init();				// inter-node heartbeat
	as_fabric_init();			// inter-node communications
	as_info_init();				// info transaction handling
	as_paxos_init();			// cluster consensus algorithm
	as_migrate_init();			// move data between nodes
	as_proxy_init();			// do work on behalf of others
	as_write_init();			// write service
	as_query_init();			// query transaction handling
	as_udf_init();				// apply user-defined functions
	as_scan_init();				// scan a namespace or set
	as_batch_init();			// batch transaction handling
	as_batch_direct_init();		// low priority transaction handling        
	as_xdr_init();				// cross data-center replication
	as_mon_init();				// monitor

	// Wait for enough available storage. We've been defragging all along, but
	// here we wait until it's enough. This may block for a long time.
	as_storage_wait_for_defrag();

	// Start subsystems. At this point we may begin communicating with other
	// cluster nodes, and ultimately with clients.

	as_smd_start(c->smd);		// enables receiving paxos state change events
	as_fabric_start();			// may send & receive fabric messages
	as_hb_start();				// start inter-node heatbeat
	as_paxos_start();			// blocks until cluster membership is obtained
	as_nsup_start();			// may send delete transactions to other nodes
	as_demarshal_start();		// server will now receive client transactions
	as_info_port_start();		// server will now receive info transactions
	info_debug_ticker_start();	// only after everything else is started

	// Log a service-ready message.
	cf_info(AS_AS, "service ready: soon there will be cake!");

	//--------------------------------------------
	// Startup is done. This thread will now wait
	// quietly for a shutdown signal.
	//

	// Stop this thread from finishing. Intentionally deadlocking on a mutex is
	// a remarkably efficient way to do this.
	pthread_mutex_init(&g_NONSTOP, NULL);
	pthread_mutex_lock(&g_NONSTOP);
	g_startup_complete = true;
	pthread_mutex_lock(&g_NONSTOP);

	// When the service is running, you are here (deadlocked) - the signals that
	// stop the service (yes, these signals always occur in this thread) will
	// unlock the mutex, allowing us to continue.

	g_shutdown_started = true;
	pthread_mutex_unlock(&g_NONSTOP);
	pthread_mutex_destroy(&g_NONSTOP);

	//--------------------------------------------
	// Received a shutdown signal.
	//

	as_storage_shutdown();
	as_xdr_shutdown();
	as_smd_shutdown(c->smd);

	cf_info(AS_AS, "finished clean shutdown - exiting");

	// If shutdown was totally clean (all threads joined) we could just return,
	// but for now we exit to make sure all threads die.
#ifdef DOPROFILE
	exit(0); // exit(0) so profile build actually dumps gmon.out
#else
	_exit(0);
#endif

	return 0;
}