/* Open a connection to the Slurm DBD and set slurmdbd_conn */ static void _open_slurmdbd_conn(bool need_db) { bool try_backup = true; int rc; if (slurmdbd_conn && slurmdbd_conn->fd >= 0) { debug("Attempt to re-open slurmdbd socket"); /* clear errno (checked after this for errors) */ errno = 0; return; } slurm_persist_conn_close(slurmdbd_conn); if (!slurmdbd_conn) { slurmdbd_conn = xmalloc(sizeof(slurm_persist_conn_t)); slurmdbd_conn->flags = PERSIST_FLAG_DBD | PERSIST_FLAG_RECONNECT; slurmdbd_conn->persist_type = PERSIST_TYPE_DBD; if (!slurmdbd_cluster) slurmdbd_cluster = slurm_get_cluster_name(); slurmdbd_conn->cluster_name = xstrdup(slurmdbd_cluster); slurmdbd_conn->timeout = (slurm_get_msg_timeout() + 35) * 1000; slurmdbd_conn->rem_port = slurm_get_accounting_storage_port(); if (!slurmdbd_conn->rem_port) { slurmdbd_conn->rem_port = SLURMDBD_PORT; slurm_set_accounting_storage_port( slurmdbd_conn->rem_port); } } slurmdbd_shutdown = 0; slurmdbd_conn->shutdown = &slurmdbd_shutdown; slurmdbd_conn->version = SLURM_PROTOCOL_VERSION; xfree(slurmdbd_conn->rem_host); slurmdbd_conn->rem_host = slurm_get_accounting_storage_host(); if (!slurmdbd_conn->rem_host) { slurmdbd_conn->rem_host = xstrdup(DEFAULT_STORAGE_HOST); slurm_set_accounting_storage_host( slurmdbd_conn->rem_host); } again: if (((rc = slurm_persist_conn_open(slurmdbd_conn)) != SLURM_SUCCESS) && try_backup) { xfree(slurmdbd_conn->rem_host); try_backup = false; if ((slurmdbd_conn->rem_host = slurm_get_accounting_storage_backup_host())) goto again; } if (rc == SLURM_SUCCESS) { /* set the timeout to the timeout to be used for all other * messages */ slurmdbd_conn->timeout = SLURMDBD_TIMEOUT * 1000; if (slurmdbd_conn->trigger_callbacks.dbd_resumed) (slurmdbd_conn->trigger_callbacks.dbd_resumed)(); if (slurmdbd_conn->trigger_callbacks.db_resumed) (slurmdbd_conn->trigger_callbacks.db_resumed)(); } if ((!need_db && (rc == ESLURM_DB_CONNECTION)) || (rc == SLURM_SUCCESS)) { debug("slurmdbd: Sent PersistInit msg"); /* clear errno (checked after this for errors) */ errno = 0; } else { if ((rc == ESLURM_DB_CONNECTION) && slurmdbd_conn->trigger_callbacks.db_fail) (slurmdbd_conn->trigger_callbacks.db_fail)(); error("slurmdbd: Sending PersistInit msg: %m"); slurm_persist_conn_close(slurmdbd_conn); } }
/* Open a connection to the Slurm DBD and set slurmdbd_conn */ static void _open_slurmdbd_conn(bool need_db) { char *backup_host = NULL; int rc; if (slurmdbd_conn && slurmdbd_conn->fd >= 0) { debug("Attempt to re-open slurmdbd socket"); /* clear errno (checked after this for errors) */ errno = 0; return; } slurm_persist_conn_close(slurmdbd_conn); if (!slurmdbd_conn) { slurmdbd_conn = xmalloc(sizeof(slurm_persist_conn_t)); slurmdbd_conn->flags = PERSIST_FLAG_DBD | PERSIST_FLAG_RECONNECT; slurmdbd_conn->persist_type = PERSIST_TYPE_DBD; if (!slurmdbd_cluster) slurmdbd_cluster = slurm_get_cluster_name(); slurmdbd_conn->cluster_name = xstrdup(slurmdbd_cluster); slurmdbd_conn->timeout = (slurm_get_msg_timeout() + 35) * 1000; slurmdbd_conn->rem_port = slurm_get_accounting_storage_port(); if (!slurmdbd_conn->rem_port) { slurmdbd_conn->rem_port = SLURMDBD_PORT; slurm_set_accounting_storage_port( slurmdbd_conn->rem_port); } } slurmdbd_shutdown = 0; slurmdbd_conn->shutdown = &slurmdbd_shutdown; slurmdbd_conn->version = SLURM_PROTOCOL_VERSION; xfree(slurmdbd_conn->rem_host); slurmdbd_conn->rem_host = slurm_get_accounting_storage_host(); if (!slurmdbd_conn->rem_host) { slurmdbd_conn->rem_host = xstrdup(DEFAULT_STORAGE_HOST); slurm_set_accounting_storage_host( slurmdbd_conn->rem_host); } // See if a backup slurmdbd is configured backup_host = slurm_get_accounting_storage_backup_host(); again: // A connection failure is only an error if backup dne or also fails if (backup_host) slurmdbd_conn->flags |= PERSIST_FLAG_SUPPRESS_ERR; else slurmdbd_conn->flags &= (~PERSIST_FLAG_SUPPRESS_ERR); if (((rc = slurm_persist_conn_open(slurmdbd_conn)) != SLURM_SUCCESS) && backup_host) { xfree(slurmdbd_conn->rem_host); // Force the next error to display slurmdbd_conn->comm_fail_time = 0; slurmdbd_conn->rem_host = backup_host; backup_host = NULL; goto again; } xfree(backup_host); if (rc == SLURM_SUCCESS) { /* set the timeout to the timeout to be used for all other * messages */ slurmdbd_conn->timeout = SLURMDBD_TIMEOUT * 1000; if (slurmdbd_conn->trigger_callbacks.dbd_resumed) (slurmdbd_conn->trigger_callbacks.dbd_resumed)(); if (slurmdbd_conn->trigger_callbacks.db_resumed) (slurmdbd_conn->trigger_callbacks.db_resumed)(); } if ((!need_db && (rc == ESLURM_DB_CONNECTION)) || (rc == SLURM_SUCCESS)) { debug("slurmdbd: Sent PersistInit msg"); /* clear errno (checked after this for errors) */ errno = 0; } else { if ((rc == ESLURM_DB_CONNECTION) && slurmdbd_conn->trigger_callbacks.db_fail) (slurmdbd_conn->trigger_callbacks.db_fail)(); error("slurmdbd: Sending PersistInit msg: %m"); slurm_persist_conn_close(slurmdbd_conn); } }