Example #1
0
int
main ( int argc, char *argv[] )
{
   int rc;
   struct stat struct_stat;
   pthread_t pid;
   pthread_attr_t attr;
   int i, num_sources;
   uid_t gmetad_uid;
   mode_t rrd_umask;
   char * gmetad_username;
   struct passwd *pw;
   gmetad_config_t *c = &gmetad_config;
   apr_interval_time_t sleep_time;
   apr_time_t last_metadata;
   double random_sleep_factor;
   unsigned int rand_seed;

   rc = apr_initialize();
   if (rc != APR_SUCCESS) {
      return -1;
   }

   /* create a memory pool. */
   apr_pool_create(&global_context, NULL);

   /* Ignore SIGPIPE */
   signal( SIGPIPE, SIG_IGN );

   initialize_scoreboard();

   /* Mark the time this gmetad started */
   started = apr_time_now();

   if (cmdline_parser(argc, argv, &args_info) != 0)
      err_quit("command-line parser error");

   num_sources = number_of_datasources( args_info.conf_arg );
   if(!num_sources)
      {
         err_quit("%s doesn't have any data sources specified", args_info.conf_arg);
      }

   memset(&root, 0, sizeof(root));
   root.id = ROOT_NODE;

   /* Get the real number of data sources later */
   sources = hash_create( num_sources + 10 );
   if (! sources )
      {
         err_quit("Unable to create sources hash\n");
      }

   root.authority = hash_create( num_sources + 10 );
   if (!root.authority)
      {
         err_quit("Unable to create root authority (our grids and clusters) hash\n");
      }

   root.metric_summary = hash_create (DEFAULT_METRICSIZE);
   if (!root.metric_summary)
      {
         err_quit("Unable to create root summary hash");
      }

   parse_config_file ( args_info.conf_arg );
    /* If given, use command line directives over config file ones. */
   if (args_info.debug_given)
      {
         c->debug_level = args_info.debug_arg;
      }
   debug_level = c->debug_level;
   set_debug_msg_level(debug_level);

   /* Setup our default authority pointer if the conf file hasnt yet.
    * Done in the style of hash node strings. */
   if (!root.stringslen)
      {
         gethostname(hostname, HOSTNAMESZ);
         root.authority_ptr = 0;
         sprintf(root.strings, "http://%s/ganglia/", hostname);
         root.stringslen += strlen(root.strings) + 1;
      }

   rand_seed = apr_time_now() * (int)pthread_self();
   for(i = 0; i < root.stringslen; rand_seed = rand_seed * root.strings[i++]);

   /* Debug level 1 is error output only, and no daemonizing. */
   if (!debug_level)
      {
         rrd_umask = c->umask;
         daemon_init (argv[0], 0, rrd_umask);
      }

   if (args_info.pid_file_given)
     {
       update_pidfile (args_info.pid_file_arg);
     }

   /* The rrd_rootdir must be writable by the gmetad process */
   if( c->should_setuid )
      {
         if(! (pw = getpwnam(c->setuid_username)))
            {
               err_sys("Getpwnam error");
            }
         gmetad_uid = pw->pw_uid;
         gmetad_username = c->setuid_username;
      }
   else
      {
         gmetad_uid = getuid();
         if(! (pw = getpwuid(gmetad_uid)))
            {
               err_sys("Getpwnam error");
            } 
         gmetad_username = strdup(pw->pw_name);
      }

   debug_msg("Going to run as user %s", gmetad_username);
   if( c->should_setuid )
      {
         become_a_nobody(c->setuid_username);
      }

   if( c->write_rrds )
      {
         if( stat( c->rrd_rootdir, &struct_stat ) )
            {
                err_sys("Please make sure that %s exists", c->rrd_rootdir);
            }
         if ( struct_stat.st_uid != gmetad_uid )
            {
                err_quit("Please make sure that %s is owned by %s", c->rrd_rootdir, gmetad_username);
            }
         if (! (struct_stat.st_mode & S_IWUSR) )
            {
                err_quit("Please make sure %s has WRITE permission for %s", gmetad_username, c->rrd_rootdir);
            }
      }

   if(debug_level)
      {
         fprintf(stderr,"Sources are ...\n");
         hash_foreach( sources, print_sources, NULL);
      }

#ifdef WITH_MEMCACHED
   if (c->memcached_parameters != NULL)
      {
         memcached_connection_pool = memcached_pool(c->memcached_parameters, strlen(c->memcached_parameters));
      }
#endif /* WITH_MEMCACHED */

   server_socket = g_tcp_socket_server_new( c->xml_port );
   if (server_socket == NULL)
      {
         err_quit("tcp_listen() on xml_port failed");
      }
   debug_msg("xml listening on port %d", c->xml_port);
   
   interactive_socket = g_tcp_socket_server_new( c->interactive_port );
   if (interactive_socket == NULL)
      {
         err_quit("tcp_listen() on interactive_port failed");
      }
   debug_msg("interactive xml listening on port %d", c->interactive_port);

    /* Forward metrics to Graphite using carbon protocol */
    if (c->carbon_server != NULL)
      {
         if (!strcmp(c->carbon_protocol, "udp"))
            {
               carbon_udp_socket = init_carbon_udp_socket (c->carbon_server, c->carbon_port);

               if (carbon_udp_socket == NULL)
                  err_quit("carbon %s socket failed for %s:%d", c->carbon_protocol, c->carbon_server, c->carbon_port);
            }
         debug_msg("carbon forwarding ready to send via %s to %s:%d", c->carbon_protocol, c->carbon_server, c->carbon_port);
      }

#ifdef WITH_RIEMANN
    if (c->riemann_server !=NULL)
      {
         if (!strcmp(c->riemann_protocol, "udp"))
            {
               riemann_udp_socket = init_riemann_udp_socket (c->riemann_server, c->riemann_port);

               if (riemann_udp_socket == NULL)
                  err_quit("[riemann] %s socket failed for %s:%d", c->riemann_protocol, c->riemann_server, c->riemann_port);
            } else {
               err_quit("[riemann] TCP transport not supported yet.");
            }
         debug_msg("[riemann] ready to forward metrics via %s to %s:%d", c->riemann_protocol, c->riemann_server, c->riemann_port);
      }
#endif /* WITH_RIEMANN */

   /* initialize summary mutex */
   root.sum_finished = (pthread_mutex_t *) 
                          malloc(sizeof(pthread_mutex_t));
   pthread_mutex_init(root.sum_finished, NULL);

   pthread_attr_init( &attr );
   pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_DETACHED );

   /* Spin off the non-interactive server threads. (Half as many as interactive). */
   for (i=0; i < c->server_threads/2; i++)
      pthread_create(&pid, &attr, server_thread, (void*) 0);

   /* Spin off the interactive server threads. */
   for (i=0; i < c->server_threads; i++)
      pthread_create(&pid, &attr, server_thread, (void*) 1);

   hash_foreach( sources, spin_off_the_data_threads, NULL );

   /* A thread to cleanup old metrics and hosts */
   pthread_create(&pid, &attr, cleanup_thread, (void *) NULL);
   debug_msg("cleanup thread has been started");

    /* Meta data */
   last_metadata = 0;
   for(;;)
      {
         /* Do at a random interval, between 
                 (shortest_step/2) +/- METADATA_SLEEP_RANDOMIZE percent */
         random_sleep_factor = (1 + (METADATA_SLEEP_RANDOMIZE / 50.0) * ((rand_r(&rand_seed) - RAND_MAX/2)/(float)RAND_MAX));
         sleep_time = random_sleep_factor * apr_time_from_sec(c->shortest_step) / 2;
         /* Make sure the sleep time is at least 1 second */
         if(apr_time_sec(apr_time_now() + sleep_time) < (METADATA_MINIMUM_SLEEP + apr_time_sec(apr_time_now())))
            sleep_time += apr_time_from_sec(METADATA_MINIMUM_SLEEP);
         apr_sleep(sleep_time);

         /* Need to be sure root is locked while doing summary */
         pthread_mutex_lock(root.sum_finished);

         /* Flush the old values */
         hash_foreach(root.metric_summary, zero_out_summary, NULL);
         root.hosts_up = 0;
         root.hosts_down = 0;

         /* Sum the new values */
         hash_foreach(root.authority, do_root_summary, NULL );

         /* summary completed */
         pthread_mutex_unlock(root.sum_finished);

         /* Save them to RRD */
         hash_foreach(root.metric_summary, write_root_summary, NULL);

         /* Remember our last run */
         last_metadata = apr_time_now();
      }

   apr_pool_destroy(global_context);

   apr_terminate();
   return 0;
}
Example #2
0
int main(int argc, char *argv[])
{
    int rval;
    gexec_cluster_t cluster;
    gexec_host_t *host;
    llist_entry *li;

    debug_level = 1;
    set_debug_msg_level(debug_level);

    if (cmdline_parser (argc, argv, &args_info) != 0)
        exit(1) ;

    rval = gexec_cluster(&cluster, args_info.gmond_ip_arg, args_info.gmond_port_arg );
    if ( rval != 0)
    {
        printf("Unable to get hostlist from %s %d!\n", args_info.gmond_ip_arg, args_info.gmond_port_arg);
        exit(-1);
    }

    if( args_info.mpifile_flag )
    {
        if( args_info.all_flag )
            li = cluster.hosts;
        else
            li = cluster.gexec_hosts;

        for( ; li != NULL; li = li->next )
        {
            host = li->val;
            if( host->name_resolved && ! args_info.numeric_flag )
            {
                if(!strcmp(host->domain, "unspecified"))
                    printf("%s:%d\n", host->name, host->cpu_num);
                else
                    printf("%s.%s:%d\n", host->name, host->domain, host->cpu_num);
            }
            else
            {
                printf("%s:%d\n", host->ip, host->cpu_num);
            }
        }
        exit(0);
    }

    if(! args_info.list_flag )
    {
        printf("CLUSTER INFORMATION\n");
        printf("       Name: %s\n", cluster.name);
        printf("      Hosts: %d\n", cluster.num_hosts);
        printf("Gexec Hosts: %d\n", cluster.num_gexec_hosts);
        printf(" Dead Hosts: %d\n", cluster.num_dead_hosts);
        printf("  Localtime: %s\n", ctime(&(cluster.localtime)) );
    }

    if( args_info.dead_flag)
    {
        if(! args_info.list_flag )
            printf("DEAD CLUSTER HOSTS\n");
        if(cluster.num_dead_hosts)
        {
            if(! args_info.list_flag )
                printf("%32.32s   Last Reported\n", "Hostname");
            for( li = cluster.dead_hosts; li != NULL; li = li->next)
            {
                host= li->val;
                printf("%32.32s   %s", host->name, ctime(&(host->last_reported)));
            }
        }
        else
        {
            printf("There are no hosts down at this time\n");
        }
        gexec_cluster_free(&cluster);
        exit(0);
    }

    if( args_info.all_flag )
    {
        li = cluster.hosts;
        if(! cluster.num_hosts )
        {
            printf("There are no hosts up at this time\n");
            gexec_cluster_free(&cluster);
            exit(0);
        }
    }
    else
    {
        li = cluster.gexec_hosts;
        if(! cluster.num_gexec_hosts)
        {
            printf("There are no hosts running gexec at this time\n");
            gexec_cluster_free(&cluster);
            exit(0);
        }
    }

    if(! args_info.list_flag )
    {
        printf("CLUSTER HOSTS\n");
        printf("Hostname                     LOAD                       CPU              Gexec\n");
        printf(" CPUs (Procs/Total) [     1,     5, 15min] [  User,  Nice, System, Idle, Wio]\n\n");
    }
    for(; li != NULL; li = li->next)
    {
        host = li->val;
        if( host->name_resolved && ! args_info.numeric_flag )
        {
            if(!strcmp(host->domain, "unspecified"))
                printf("%s", host->name);
            else
                printf("%s.%s", host->name, host->domain);
        }
        else
        {
            printf("%s", host->ip);
        }
        if( args_info.single_line_flag )
            printf(" ");
        else
            printf("\n");

        printf(" %4d (%5d/%5d) [%6.2f,%6.2f,%6.2f] [%6.1f,%6.1f,%6.1f,%6.1f,%6.1f] ",
               host->cpu_num, host->proc_run, host->proc_total,
               host->load_one, host->load_five, host->load_fifteen,
               host->cpu_user, host->cpu_nice, host->cpu_system, host->cpu_idle, host->cpu_wio);
        if(host->gexec_on)
            printf("%s\n", "ON");
        else
            printf("%s\n", "OFF");
    }

    gexec_cluster_free(&cluster);
    return 0;
}