FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Cluster Development

 
 
LinkBack Thread Tools
 
Old 09-07-2011, 12:40 PM
"Fabio M. Di Nitto"
 
Default cman: improve cman/qdisk interactions

- libcman/cman: add new quorum API call to update name and votes of a quorum device
- cman: simplify common code to free quorum_device infrastructure and handle quorum recalculation
- cman: do better logging/error reports of the quorum API usage
- cman: use strdup instead of malloc+strcpy (code is more readable)
- libcman: perform better error checking in register_quorum_device/update_quorum_device

- Allow qdisk to update device name in cman using a new libcman quorum API call
- Switch all but one votes update in qdisk to use the new API
- Perform slight better error checking of some update opertaions

Resolves: rhbz#735917

Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
---
cman/daemon/cnxman-socket.h | 1 +
cman/daemon/commands.c | 138 +++++++++++++++++++++++++++++++++----------
cman/lib/libcman.c | 19 +++++-
cman/lib/libcman.h | 4 +
cman/qdisk/main.c | 36 ++++++++++-
5 files changed, 159 insertions(+), 39 deletions(-)

diff --git a/cman/daemon/cnxman-socket.h b/cman/daemon/cnxman-socket.h
index e8b7378..d243b40 100644
--- a/cman/daemon/cnxman-socket.h
+++ b/cman/daemon/cnxman-socket.h
@@ -32,6 +32,7 @@
#define CMAN_CMD_REG_QUORUMDEV 0x800000b5
#define CMAN_CMD_UNREG_QUORUMDEV 0x800000b6
#define CMAN_CMD_POLL_QUORUMDEV 0x800000b7
+#define CMAN_CMD_UPDATE_QUORUMDEV 0x800000b8
#define CMAN_CMD_TRY_SHUTDOWN 0x800000bb
#define CMAN_CMD_SHUTDOWN_REPLY 0x000000bc
#define CMAN_CMD_UPDATE_FENCE_INFO 0x800000bd
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 2948952..567ff96 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -1080,27 +1080,69 @@ static int do_cmd_try_shutdown(struct connection *con, char *cmdbuf)
return 0;
}

+static void free_quorum_device(void)
+{
+ if (!quorum_device)
+ return;
+
+ if (quorum_device->name)
+ free(quorum_device->name);
+
+ free(quorum_device);
+
+ quorum_device = NULL;
+
+ return;
+}
+
+
+static void quorum_device_update_votes(int votes)
+{
+ int oldvotes;
+
+ /* Update votes even if it existed before */
+ oldvotes = quorum_device->votes;
+ quorum_device->votes = votes;
+
+ /* If it is a member and votes decreased, recalculate quorum */
+ if (quorum_device->state == NODESTATE_MEMBER &&
+ oldvotes != votes) {
+ recalculate_quorum(1, 0);
+ }
+}
+
static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
{
int votes;
- int oldvotes;
char *name = cmdbuf+sizeof(int);

- if (!ais_running)
+ if (!ais_running) {
+ log_printf(LOG_ERR, "unable to register quorum device: corosync is not running
");
return -ENOTCONN;
+ }

- if (!we_are_a_cluster_member)
+ if (!we_are_a_cluster_member) {
+ log_printf(LOG_ERR, "unable to register quorum device: this node is not part of a cluster
");
return -ENOENT;
+ }

- if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+ if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) {
+ log_printf(LOG_ERR, "unable to register quorum device: name is too long
");
+ /* this should probably return -E2BIG? */
return -EINVAL;
+ }

/* Allow re-registering of a quorum device if the name is the same */
- if (quorum_device && strcmp(name, quorum_device->name))
- return -EBUSY;
+ if (quorum_device && strcmp(name, quorum_device->name)) {
+ log_printf(LOG_ERR, "unable to re-register quorum device: device names do not match
");
+ log_printf(LOG_DEBUG, "memb: old name: %s new name: %s
", quorum_device->name, name);
+ return -EBUSY;
+ }

- if (find_node_by_name(name))
- return -EALREADY;
+ if (find_node_by_name(name)) {
+ log_printf(LOG_ERR, "unable to register quorum device: a node with the same name (%s) already exists
", name);
+ return -EALREADY;
+ }

memcpy(&votes, cmdbuf, sizeof(int));

@@ -1108,18 +1150,19 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
if (!quorum_device)
{
quorum_device = malloc(sizeof(struct cluster_node));
- if (!quorum_device)
+ if (!quorum_device) {
+ log_printf(LOG_ERR, "unable to register quorum device: not enough memory
");
return -ENOMEM;
+ }
memset(quorum_device, 0, sizeof(struct cluster_node));

- quorum_device->name = malloc(strlen(name) + 1);
+ quorum_device->name = strdup(name);
if (!quorum_device->name) {
- free(quorum_device);
- quorum_device = NULL;
+ log_printf(LOG_ERR, "unable to register quorum device: not enough memory
");
+ free_quorum_device();
return -ENOMEM;
}

- strcpy(quorum_device->name, name);
quorum_device->state = NODESTATE_DEAD;
gettimeofday(&quorum_device->join_time, NULL);

@@ -1132,34 +1175,63 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
log_printf(LOG_INFO, "quorum device re-registered
");
}

- /* Update votes even if it existed before */
- oldvotes = quorum_device->votes;
- quorum_device->votes = votes;
+ quorum_device_update_votes(votes);

- /* If it is a member and votes decreased, recalculate quorum */
- if (quorum_device->state == NODESTATE_MEMBER &&
- oldvotes != votes) {
- recalculate_quorum(1, 0);
+ return 0;
+}
+
+static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+{
+ if (!quorum_device) {
+ log_printf(LOG_DEBUG, "memb: failed to unregister a non existing quorum device
");
+ return -EINVAL;
}

- return 0;
+ if (quorum_device->state == NODESTATE_MEMBER) {
+ log_printf(LOG_DEBUG, "memb: failed to unregister: quorum device still active.
");
+ return -EBUSY;
+ }
+
+ free_quorum_device();
+
+ log_printf(LOG_INFO, "quorum device unregistered
");
+ return 0;
}

-static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+static int do_cmd_update_quorum_device(char *cmdbuf, int *retlen)
{
- if (!quorum_device)
- return -EINVAL;
+ int votes, ret = 0;
+ char *name = cmdbuf+sizeof(int);

- if (quorum_device->state == NODESTATE_MEMBER)
- return -EBUSY;
+ if (!quorum_device) {
+ log_printf(LOG_DEBUG, "memb: failed to update a non-existing quorum device
");
+ return -EINVAL;
+ }

- free(quorum_device->name);
- free(quorum_device);
+ memcpy(&votes, cmdbuf, sizeof(int));

- quorum_device = NULL;
+ /* allow name change of the quorum device */
+ if (quorum_device && strcmp(name, quorum_device->name)) {
+ char *newname = NULL;
+ char *oldname = NULL;

- log_printf(LOG_INFO, "quorum device unregistered
");
- return 0;
+ log_printf(LOG_DEBUG, "memb: old name: %s new name: %s
", quorum_device->name, name);
+ newname = strdup(name);
+ if (!newname) {
+ log_printf(LOG_ERR, "memb: unable to update quorum device name: out of memory
");
+ ret = -ENOMEM;
+ goto out;
+ }
+ log_printf(LOG_INFO, "quorum device name changed to %s
", name);
+ oldname = quorum_device->name;
+ quorum_device->name = newname;
+ free(oldname);
+ }
+
+out:
+ quorum_device_update_votes(votes);
+
+ return ret;
}

static int reload_config(int new_version, int should_broadcast)
@@ -1560,6 +1632,10 @@ int process_command(struct connection *con, int cmd, char *cmdbuf,
err = do_cmd_unregister_quorum_device(cmdbuf, retlen);
break;

+ case CMAN_CMD_UPDATE_QUORUMDEV:
+ err = do_cmd_update_quorum_device(cmdbuf, retlen);
+ break;
+
case CMAN_CMD_POLL_QUORUMDEV:
err = do_cmd_poll_quorum_device(cmdbuf, retlen);
break;
diff --git a/cman/lib/libcman.c b/cman/lib/libcman.c
index daaad07..a89c731 100644
--- a/cman/lib/libcman.c
+++ b/cman/lib/libcman.c
@@ -1002,14 +1002,15 @@ int cman_replyto_shutdown(cman_handle_t handle, int yesno)
return 0;
}

-
-int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+static int cman_set_quorum_device(cman_handle_t handle,
+ int ops,
+ char *name, int votes)
{
struct cman_handle *h = (struct cman_handle *)handle;
char buf[strlen(name)+1 + sizeof(int)];
VALIDATE_HANDLE(h);

- if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+ if ((!name) || (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) || (votes < 0))
{
errno = EINVAL;
return -1;
@@ -1017,7 +1018,12 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)

memcpy(buf, &votes, sizeof(int));
strcpy(buf+sizeof(int), name);
- return info_call(h, CMAN_CMD_REG_QUORUMDEV, buf, strlen(name)+1+sizeof(int), NULL, 0);
+ return info_call(h, ops, buf, strlen(name)+1+sizeof(int), NULL, 0);
+}
+
+int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+ return cman_set_quorum_device(handle, CMAN_CMD_REG_QUORUMDEV, name, votes);
}

int cman_unregister_quorum_device(cman_handle_t handle)
@@ -1053,6 +1059,11 @@ int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info)
return ret;
}

+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+ return cman_set_quorum_device(handle, CMAN_CMD_UPDATE_QUORUMDEV, name, votes);
+}
+
int cman_get_fenceinfo(cman_handle_t handle, int nodeid, uint64_t *time, int *fenced, char *agent)
{
struct cman_handle *h = (struct cman_handle *)handle;
diff --git a/cman/lib/libcman.h b/cman/lib/libcman.h
index feb10a2..9f97875 100644
--- a/cman/lib/libcman.h
+++ b/cman/lib/libcman.h
@@ -420,6 +420,9 @@ int cman_barrier_delete(cman_handle_t handle, const char *name);
/*
* Add your own quorum device here, needs an admin socket
*
+ * register_quorum and update_quorum arguments are mandatory.
+ * name has to be a valid null-terminated string and votes >= 0.
+ *
* After creating a quorum device you will need to call 'poll_quorum_device'
* at least once every (default) 10 seconds (this can be changed in CCS)
* otherwise it will time-out and the cluster will lose its vote.
@@ -428,6 +431,7 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes);
int cman_unregister_quorum_device(cman_handle_t handle);
int cman_poll_quorum_device(cman_handle_t handle, int isavailable);
int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info);
+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes);

/*
* Sets the dirty bit inside cman. This indicates that the node has
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index c1598fa..effc8f2 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -690,6 +690,17 @@ register_device(qd_ctx *ctx)
ctx->qc_votes : 0);
}

+static int
+update_device(qd_ctx *ctx)
+{
+ return cman_update_quorum_device(
+ ctx->qc_cman_admin,
+ (ctx->qc_flags&RF_CMAN_LABEL) ?
+ ctx->qc_cman_label : ctx->qc_device,
+ (!(ctx->qc_flags & RF_MASTER_WINS) ||
+ ctx->qc_status == S_MASTER) ?
+ ctx->qc_votes : 0);
+}

static int
adjust_votes(qd_ctx *ctx)
@@ -697,7 +708,7 @@ adjust_votes(qd_ctx *ctx)
if (!(ctx->qc_flags & RF_MASTER_WINS))
return 0;

- return register_device(ctx);
+ return update_device(ctx);
}


@@ -1457,7 +1468,7 @@ get_dynamic_config_data(qd_ctx *ctx, int ccsfd)
"Quorum device removed from the configuration."
" Shutting down.
");
ctx->qc_votes = 0;
- register_device(ctx);
+ update_device(ctx);
_running = 0;
return -1;
}
@@ -1623,6 +1634,10 @@ get_dynamic_config_data(qd_ctx *ctx, int ccsfd)
*
* This only works after we have already gotten static
* configuration data during initial startup.
+ *
+ * this call cannot be converted to update_device
+ * because it would change the device name in cman while
+ * qdisk has not switched
*/
register_device(ctx);
}
@@ -2119,9 +2134,22 @@ main(int argc, char **argv)

if (!_running)
goto out;
-
+
/* This registers the quorum device */
- register_device(&ctx);
+ ret = register_device(&ctx);
+ if (ret) {
+ if (errno == EBUSY) {
+ logt_print(LOG_NOTICE, "quorum device is already registered, updating
");
+ ret = update_device(&ctx);
+ if (ret) {
+ logt_print(LOG_ERR, "DEBUG: unable to update quorum device info
");
+ goto out;
+ }
+ } else {
+ logt_print(LOG_ERR, "Unable to register quorum device!
");
+ goto out;
+ }
+ }

io_nanny_start(ch_user, ctx.qc_tko * ctx.qc_interval);

--
1.7.4.4
 
Old 09-07-2011, 01:10 PM
"Fabio M. Di Nitto"
 
Default cman: improve cman/qdisk interactions

- libcman/cman: add new quorum API call to update name and votes of a quorum device
- cman: simplify common code to free quorum_device infrastructure and handle quorum recalculation
- cman: do better logging/error reports of the quorum API usage
- cman: use strdup instead of malloc+strcpy (code is more readable)
- libcman: perform better error checking in register_quorum_device/update_quorum_device

- Allow qdisk to update device name in cman using a new libcman quorum API call
- Perform slight better error checking of some update opertaions

Resolves: rhbz#735917

Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
---
cman/daemon/cnxman-socket.h | 1 +
cman/daemon/commands.c | 138 +++++++++++++++++++++++++++++++++----------
cman/lib/libcman.c | 19 +++++-
cman/lib/libcman.h | 4 +
cman/qdisk/main.c | 28 ++++++++-
5 files changed, 153 insertions(+), 37 deletions(-)

diff --git a/cman/daemon/cnxman-socket.h b/cman/daemon/cnxman-socket.h
index e8b7378..d243b40 100644
--- a/cman/daemon/cnxman-socket.h
+++ b/cman/daemon/cnxman-socket.h
@@ -32,6 +32,7 @@
#define CMAN_CMD_REG_QUORUMDEV 0x800000b5
#define CMAN_CMD_UNREG_QUORUMDEV 0x800000b6
#define CMAN_CMD_POLL_QUORUMDEV 0x800000b7
+#define CMAN_CMD_UPDATE_QUORUMDEV 0x800000b8
#define CMAN_CMD_TRY_SHUTDOWN 0x800000bb
#define CMAN_CMD_SHUTDOWN_REPLY 0x000000bc
#define CMAN_CMD_UPDATE_FENCE_INFO 0x800000bd
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 2948952..567ff96 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -1080,27 +1080,69 @@ static int do_cmd_try_shutdown(struct connection *con, char *cmdbuf)
return 0;
}

+static void free_quorum_device(void)
+{
+ if (!quorum_device)
+ return;
+
+ if (quorum_device->name)
+ free(quorum_device->name);
+
+ free(quorum_device);
+
+ quorum_device = NULL;
+
+ return;
+}
+
+
+static void quorum_device_update_votes(int votes)
+{
+ int oldvotes;
+
+ /* Update votes even if it existed before */
+ oldvotes = quorum_device->votes;
+ quorum_device->votes = votes;
+
+ /* If it is a member and votes decreased, recalculate quorum */
+ if (quorum_device->state == NODESTATE_MEMBER &&
+ oldvotes != votes) {
+ recalculate_quorum(1, 0);
+ }
+}
+
static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
{
int votes;
- int oldvotes;
char *name = cmdbuf+sizeof(int);

- if (!ais_running)
+ if (!ais_running) {
+ log_printf(LOG_ERR, "unable to register quorum device: corosync is not running
");
return -ENOTCONN;
+ }

- if (!we_are_a_cluster_member)
+ if (!we_are_a_cluster_member) {
+ log_printf(LOG_ERR, "unable to register quorum device: this node is not part of a cluster
");
return -ENOENT;
+ }

- if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+ if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) {
+ log_printf(LOG_ERR, "unable to register quorum device: name is too long
");
+ /* this should probably return -E2BIG? */
return -EINVAL;
+ }

/* Allow re-registering of a quorum device if the name is the same */
- if (quorum_device && strcmp(name, quorum_device->name))
- return -EBUSY;
+ if (quorum_device && strcmp(name, quorum_device->name)) {
+ log_printf(LOG_ERR, "unable to re-register quorum device: device names do not match
");
+ log_printf(LOG_DEBUG, "memb: old name: %s new name: %s
", quorum_device->name, name);
+ return -EBUSY;
+ }

- if (find_node_by_name(name))
- return -EALREADY;
+ if (find_node_by_name(name)) {
+ log_printf(LOG_ERR, "unable to register quorum device: a node with the same name (%s) already exists
", name);
+ return -EALREADY;
+ }

memcpy(&votes, cmdbuf, sizeof(int));

@@ -1108,18 +1150,19 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
if (!quorum_device)
{
quorum_device = malloc(sizeof(struct cluster_node));
- if (!quorum_device)
+ if (!quorum_device) {
+ log_printf(LOG_ERR, "unable to register quorum device: not enough memory
");
return -ENOMEM;
+ }
memset(quorum_device, 0, sizeof(struct cluster_node));

- quorum_device->name = malloc(strlen(name) + 1);
+ quorum_device->name = strdup(name);
if (!quorum_device->name) {
- free(quorum_device);
- quorum_device = NULL;
+ log_printf(LOG_ERR, "unable to register quorum device: not enough memory
");
+ free_quorum_device();
return -ENOMEM;
}

- strcpy(quorum_device->name, name);
quorum_device->state = NODESTATE_DEAD;
gettimeofday(&quorum_device->join_time, NULL);

@@ -1132,34 +1175,63 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
log_printf(LOG_INFO, "quorum device re-registered
");
}

- /* Update votes even if it existed before */
- oldvotes = quorum_device->votes;
- quorum_device->votes = votes;
+ quorum_device_update_votes(votes);

- /* If it is a member and votes decreased, recalculate quorum */
- if (quorum_device->state == NODESTATE_MEMBER &&
- oldvotes != votes) {
- recalculate_quorum(1, 0);
+ return 0;
+}
+
+static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+{
+ if (!quorum_device) {
+ log_printf(LOG_DEBUG, "memb: failed to unregister a non existing quorum device
");
+ return -EINVAL;
}

- return 0;
+ if (quorum_device->state == NODESTATE_MEMBER) {
+ log_printf(LOG_DEBUG, "memb: failed to unregister: quorum device still active.
");
+ return -EBUSY;
+ }
+
+ free_quorum_device();
+
+ log_printf(LOG_INFO, "quorum device unregistered
");
+ return 0;
}

-static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+static int do_cmd_update_quorum_device(char *cmdbuf, int *retlen)
{
- if (!quorum_device)
- return -EINVAL;
+ int votes, ret = 0;
+ char *name = cmdbuf+sizeof(int);

- if (quorum_device->state == NODESTATE_MEMBER)
- return -EBUSY;
+ if (!quorum_device) {
+ log_printf(LOG_DEBUG, "memb: failed to update a non-existing quorum device
");
+ return -EINVAL;
+ }

- free(quorum_device->name);
- free(quorum_device);
+ memcpy(&votes, cmdbuf, sizeof(int));

- quorum_device = NULL;
+ /* allow name change of the quorum device */
+ if (quorum_device && strcmp(name, quorum_device->name)) {
+ char *newname = NULL;
+ char *oldname = NULL;

- log_printf(LOG_INFO, "quorum device unregistered
");
- return 0;
+ log_printf(LOG_DEBUG, "memb: old name: %s new name: %s
", quorum_device->name, name);
+ newname = strdup(name);
+ if (!newname) {
+ log_printf(LOG_ERR, "memb: unable to update quorum device name: out of memory
");
+ ret = -ENOMEM;
+ goto out;
+ }
+ log_printf(LOG_INFO, "quorum device name changed to %s
", name);
+ oldname = quorum_device->name;
+ quorum_device->name = newname;
+ free(oldname);
+ }
+
+out:
+ quorum_device_update_votes(votes);
+
+ return ret;
}

static int reload_config(int new_version, int should_broadcast)
@@ -1560,6 +1632,10 @@ int process_command(struct connection *con, int cmd, char *cmdbuf,
err = do_cmd_unregister_quorum_device(cmdbuf, retlen);
break;

+ case CMAN_CMD_UPDATE_QUORUMDEV:
+ err = do_cmd_update_quorum_device(cmdbuf, retlen);
+ break;
+
case CMAN_CMD_POLL_QUORUMDEV:
err = do_cmd_poll_quorum_device(cmdbuf, retlen);
break;
diff --git a/cman/lib/libcman.c b/cman/lib/libcman.c
index daaad07..a89c731 100644
--- a/cman/lib/libcman.c
+++ b/cman/lib/libcman.c
@@ -1002,14 +1002,15 @@ int cman_replyto_shutdown(cman_handle_t handle, int yesno)
return 0;
}

-
-int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+static int cman_set_quorum_device(cman_handle_t handle,
+ int ops,
+ char *name, int votes)
{
struct cman_handle *h = (struct cman_handle *)handle;
char buf[strlen(name)+1 + sizeof(int)];
VALIDATE_HANDLE(h);

- if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+ if ((!name) || (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) || (votes < 0))
{
errno = EINVAL;
return -1;
@@ -1017,7 +1018,12 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)

memcpy(buf, &votes, sizeof(int));
strcpy(buf+sizeof(int), name);
- return info_call(h, CMAN_CMD_REG_QUORUMDEV, buf, strlen(name)+1+sizeof(int), NULL, 0);
+ return info_call(h, ops, buf, strlen(name)+1+sizeof(int), NULL, 0);
+}
+
+int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+ return cman_set_quorum_device(handle, CMAN_CMD_REG_QUORUMDEV, name, votes);
}

int cman_unregister_quorum_device(cman_handle_t handle)
@@ -1053,6 +1059,11 @@ int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info)
return ret;
}

+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+ return cman_set_quorum_device(handle, CMAN_CMD_UPDATE_QUORUMDEV, name, votes);
+}
+
int cman_get_fenceinfo(cman_handle_t handle, int nodeid, uint64_t *time, int *fenced, char *agent)
{
struct cman_handle *h = (struct cman_handle *)handle;
diff --git a/cman/lib/libcman.h b/cman/lib/libcman.h
index feb10a2..9f97875 100644
--- a/cman/lib/libcman.h
+++ b/cman/lib/libcman.h
@@ -420,6 +420,9 @@ int cman_barrier_delete(cman_handle_t handle, const char *name);
/*
* Add your own quorum device here, needs an admin socket
*
+ * register_quorum and update_quorum arguments are mandatory.
+ * name has to be a valid null-terminated string and votes >= 0.
+ *
* After creating a quorum device you will need to call 'poll_quorum_device'
* at least once every (default) 10 seconds (this can be changed in CCS)
* otherwise it will time-out and the cluster will lose its vote.
@@ -428,6 +431,7 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes);
int cman_unregister_quorum_device(cman_handle_t handle);
int cman_poll_quorum_device(cman_handle_t handle, int isavailable);
int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info);
+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes);

/*
* Sets the dirty bit inside cman. This indicates that the node has
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index c1598fa..2f0c2ca 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -690,6 +690,17 @@ register_device(qd_ctx *ctx)
ctx->qc_votes : 0);
}

+static int
+update_device(qd_ctx *ctx)
+{
+ return cman_update_quorum_device(
+ ctx->qc_cman_admin,
+ (ctx->qc_flags&RF_CMAN_LABEL) ?
+ ctx->qc_cman_label : ctx->qc_device,
+ (!(ctx->qc_flags & RF_MASTER_WINS) ||
+ ctx->qc_status == S_MASTER) ?
+ ctx->qc_votes : 0);
+}

static int
adjust_votes(qd_ctx *ctx)
@@ -2119,9 +2130,22 @@ main(int argc, char **argv)

if (!_running)
goto out;
-
+
/* This registers the quorum device */
- register_device(&ctx);
+ ret = register_device(&ctx);
+ if (ret) {
+ if (errno == EBUSY) {
+ logt_print(LOG_NOTICE, "quorum device is already registered, updating
");
+ ret = update_device(&ctx);
+ if (ret) {
+ logt_print(LOG_ERR, "DEBUG: unable to update quorum device info
");
+ goto out;
+ }
+ } else {
+ logt_print(LOG_ERR, "Unable to register quorum device!
");
+ goto out;
+ }
+ }

io_nanny_start(ch_user, ctx.qc_tko * ctx.qc_interval);

--
1.7.4.4
 
Old 09-07-2011, 06:07 PM
Lon Hohberger
 
Default cman: improve cman/qdisk interactions

Hi,

Good design -- couple of things...

On Wed, Sep 07, 2011 at 03:10:25PM +0200, Fabio M. Di Nitto wrote:
> - cman: use strdup instead of malloc+strcpy (code is more readable)

* Not really a necessary change, but ok.

> - libcman: perform better error checking in register_quorum_device/update_quorum_device
>
> - Allow qdisk to update device name in cman using a new libcman quorum API call
> - Perform slight better error checking of some update opertaions
>
> Resolves: rhbz#735917

* All in all, this seems like a lot of patch for a very tiny problem or
set of problems in a very narrow use case (renaming quorum device).


> + if (quorum_device->name)
> + free(quorum_device->name);
> +
> + free(quorum_device);
> +
> + quorum_device = NULL;
> +
> + return;

* return is implied in functions returning void (no big deal; stylistic
stuff)

> +static void quorum_device_update_votes(int votes)
> +{
> + int oldvotes;
> +
> + /* Update votes even if it existed before */
> + oldvotes = quorum_device->votes;
> + quorum_device->votes = votes;
> +
> + /* If it is a member and votes decreased, recalculate quorum */
> + if (quorum_device->state == NODESTATE_MEMBER &&
> + oldvotes != votes) {
> + recalculate_quorum(1, 0);
> + }
> +}

* Code does not match comments; 'oldvotes != votes' is not a check
for decrease, and recalculate_quorum(1, 0) allows both increases and
decreases to votes. Is the comment wrong or the code? (I think the
comment is wrong...)


> + if (errno == EBUSY) {
> + logt_print(LOG_NOTICE, "quorum device is already registered, updating
");
> + ret = update_device(&ctx);
> + if (ret) {
> + logt_print(LOG_ERR, "DEBUG: unable to update quorum device info
");
> + goto out;
> + }

* Is this a debug or an error message? An error message with DEBUG: in
it is confusing; can we have one or the other?

--
Lon Hohberger - Red Hat, Inc.
 
Old 09-08-2011, 06:39 AM
"Fabio M. Di Nitto"
 
Default cman: improve cman/qdisk interactions

On 9/7/2011 8:07 PM, Lon Hohberger wrote:
> Hi,
>
> Good design -- couple of things...
>
> On Wed, Sep 07, 2011 at 03:10:25PM +0200, Fabio M. Di Nitto wrote:
>> - cman: use strdup instead of malloc+strcpy (code is more readable)
>
> * Not really a necessary change, but ok.

Yeah I agree, I just didnīt like malloc + strcpy and lack of memset to
guarantee 0 byte end string.

>
>> - libcman: perform better error checking in register_quorum_device/update_quorum_device
>>
>> - Allow qdisk to update device name in cman using a new libcman quorum API call
>> - Perform slight better error checking of some update opertaions
>>
>> Resolves: rhbz#735917
>
> * All in all, this seems like a lot of patch for a very tiny problem or
> set of problems in a very narrow use case (renaming quorum device).

It looks big only because I turn a lot of code in register_quorum_device
shared with update_quorum_device

So far there are 2 problems clearly identified. One is purely cosmetic
(renaming the device and show it correctly), the other might be less
cosmetic, where, after renaming a device, qdiskd cannot update votes in
cman anylonger. This is because the vote updates are pushed via
register_quorum, that after a rename, will always return -EBUSY.

I agree that both are corner cases for most users, but itīs also easy to
fix.

>
>
>> + if (quorum_device->name)
>> + free(quorum_device->name);
>> +
>> + free(quorum_device);
>> +
>> + quorum_device = NULL;
>> +
>> + return;
>
> * return is implied in functions returning void (no big deal; stylistic
> stuff)

Yes, I like it explicit but either way I can drop it.

>
>> +static void quorum_device_update_votes(int votes)
>> +{
>> + int oldvotes;
>> +
>> + /* Update votes even if it existed before */
>> + oldvotes = quorum_device->votes;
>> + quorum_device->votes = votes;
>> +
>> + /* If it is a member and votes decreased, recalculate quorum */
>> + if (quorum_device->state == NODESTATE_MEMBER &&
>> + oldvotes != votes) {
>> + recalculate_quorum(1, 0);
>> + }
>> +}
>
> * Code does not match comments; 'oldvotes != votes' is not a check
> for decrease, and recalculate_quorum(1, 0) allows both increases and
> decreases to votes. Is the comment wrong or the code? (I think the
> comment is wrong...)

I believe the comment is incorrect. That code has been copied pristine
from register_quorum and shared with update_quorum.

>
>
>> + if (errno == EBUSY) {
>> + logt_print(LOG_NOTICE, "quorum device is already registered, updating
");
>> + ret = update_device(&ctx);
>> + if (ret) {
>> + logt_print(LOG_ERR, "DEBUG: unable to update quorum device info
");
>> + goto out;
>> + }
>
> * Is this a debug or an error message? An error message with DEBUG: in
> it is confusing; can we have one or the other?
>

Plain oversight... it should be an error message since we are aborting
startup.

Fabio
 
Old 09-08-2011, 06:59 AM
"Fabio M. Di Nitto"
 
Default cman: improve cman/qdisk interactions

- libcman/cman: add new quorum API call to update name and votes of a quorum device
- cman: simplify common code to free quorum_device infrastructure and handle quorum recalculation
- cman: do better logging/error reports of the quorum API usage
- cman: use strdup instead of malloc+strcpy (code is more readable)
- libcman: perform better error checking in register_quorum_device/update_quorum_device

- Allow qdisk to update device name in cman using a new libcman quorum API call
- Perform slight better error checking of some update opertaions

Resolves: rhbz#735917

Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
---
cman/daemon/cnxman-socket.h | 1 +
cman/daemon/commands.c | 135 +++++++++++++++++++++++++++++++++----------
cman/lib/libcman.c | 19 +++++-
cman/lib/libcman.h | 4 +
cman/qdisk/main.c | 28 ++++++++-
5 files changed, 150 insertions(+), 37 deletions(-)

diff --git a/cman/daemon/cnxman-socket.h b/cman/daemon/cnxman-socket.h
index e8b7378..d243b40 100644
--- a/cman/daemon/cnxman-socket.h
+++ b/cman/daemon/cnxman-socket.h
@@ -32,6 +32,7 @@
#define CMAN_CMD_REG_QUORUMDEV 0x800000b5
#define CMAN_CMD_UNREG_QUORUMDEV 0x800000b6
#define CMAN_CMD_POLL_QUORUMDEV 0x800000b7
+#define CMAN_CMD_UPDATE_QUORUMDEV 0x800000b8
#define CMAN_CMD_TRY_SHUTDOWN 0x800000bb
#define CMAN_CMD_SHUTDOWN_REPLY 0x000000bc
#define CMAN_CMD_UPDATE_FENCE_INFO 0x800000bd
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 2948952..02fe88d 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -1080,27 +1080,66 @@ static int do_cmd_try_shutdown(struct connection *con, char *cmdbuf)
return 0;
}

+static void free_quorum_device(void)
+{
+ if (!quorum_device)
+ return;
+
+ if (quorum_device->name)
+ free(quorum_device->name);
+
+ free(quorum_device);
+
+ quorum_device = NULL;
+}
+
+static void quorum_device_update_votes(int votes)
+{
+ int oldvotes;
+
+ /* Update votes even if it existed before */
+ oldvotes = quorum_device->votes;
+ quorum_device->votes = votes;
+
+ /* If it is a member and votes changed, recalculate quorum */
+ if (quorum_device->state == NODESTATE_MEMBER &&
+ oldvotes != votes) {
+ recalculate_quorum(1, 0);
+ }
+}
+
static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
{
int votes;
- int oldvotes;
char *name = cmdbuf+sizeof(int);

- if (!ais_running)
+ if (!ais_running) {
+ log_printf(LOG_ERR, "unable to register quorum device: corosync is not running
");
return -ENOTCONN;
+ }

- if (!we_are_a_cluster_member)
+ if (!we_are_a_cluster_member) {
+ log_printf(LOG_ERR, "unable to register quorum device: this node is not part of a cluster
");
return -ENOENT;
+ }

- if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+ if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) {
+ log_printf(LOG_ERR, "unable to register quorum device: name is too long
");
+ /* this should probably return -E2BIG? */
return -EINVAL;
+ }

/* Allow re-registering of a quorum device if the name is the same */
- if (quorum_device && strcmp(name, quorum_device->name))
- return -EBUSY;
+ if (quorum_device && strcmp(name, quorum_device->name)) {
+ log_printf(LOG_ERR, "unable to re-register quorum device: device names do not match
");
+ log_printf(LOG_DEBUG, "memb: old name: %s new name: %s
", quorum_device->name, name);
+ return -EBUSY;
+ }

- if (find_node_by_name(name))
- return -EALREADY;
+ if (find_node_by_name(name)) {
+ log_printf(LOG_ERR, "unable to register quorum device: a node with the same name (%s) already exists
", name);
+ return -EALREADY;
+ }

memcpy(&votes, cmdbuf, sizeof(int));

@@ -1108,18 +1147,19 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
if (!quorum_device)
{
quorum_device = malloc(sizeof(struct cluster_node));
- if (!quorum_device)
+ if (!quorum_device) {
+ log_printf(LOG_ERR, "unable to register quorum device: not enough memory
");
return -ENOMEM;
+ }
memset(quorum_device, 0, sizeof(struct cluster_node));

- quorum_device->name = malloc(strlen(name) + 1);
+ quorum_device->name = strdup(name);
if (!quorum_device->name) {
- free(quorum_device);
- quorum_device = NULL;
+ log_printf(LOG_ERR, "unable to register quorum device: not enough memory
");
+ free_quorum_device();
return -ENOMEM;
}

- strcpy(quorum_device->name, name);
quorum_device->state = NODESTATE_DEAD;
gettimeofday(&quorum_device->join_time, NULL);

@@ -1132,34 +1172,63 @@ static int do_cmd_register_quorum_device(char *cmdbuf, int *retlen)
log_printf(LOG_INFO, "quorum device re-registered
");
}

- /* Update votes even if it existed before */
- oldvotes = quorum_device->votes;
- quorum_device->votes = votes;
+ quorum_device_update_votes(votes);

- /* If it is a member and votes decreased, recalculate quorum */
- if (quorum_device->state == NODESTATE_MEMBER &&
- oldvotes != votes) {
- recalculate_quorum(1, 0);
+ return 0;
+}
+
+static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+{
+ if (!quorum_device) {
+ log_printf(LOG_DEBUG, "memb: failed to unregister a non existing quorum device
");
+ return -EINVAL;
+ }
+
+ if (quorum_device->state == NODESTATE_MEMBER) {
+ log_printf(LOG_DEBUG, "memb: failed to unregister: quorum device still active.
");
+ return -EBUSY;
}

- return 0;
+ free_quorum_device();
+
+ log_printf(LOG_INFO, "quorum device unregistered
");
+ return 0;
}

-static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
+static int do_cmd_update_quorum_device(char *cmdbuf, int *retlen)
{
- if (!quorum_device)
- return -EINVAL;
+ int votes, ret = 0;
+ char *name = cmdbuf+sizeof(int);

- if (quorum_device->state == NODESTATE_MEMBER)
- return -EBUSY;
+ if (!quorum_device) {
+ log_printf(LOG_DEBUG, "memb: failed to update a non-existing quorum device
");
+ return -EINVAL;
+ }

- free(quorum_device->name);
- free(quorum_device);
+ memcpy(&votes, cmdbuf, sizeof(int));

- quorum_device = NULL;
+ /* allow name change of the quorum device */
+ if (quorum_device && strcmp(name, quorum_device->name)) {
+ char *newname = NULL;
+ char *oldname = NULL;

- log_printf(LOG_INFO, "quorum device unregistered
");
- return 0;
+ log_printf(LOG_DEBUG, "memb: old name: %s new name: %s
", quorum_device->name, name);
+ newname = strdup(name);
+ if (!newname) {
+ log_printf(LOG_ERR, "memb: unable to update quorum device name: out of memory
");
+ ret = -ENOMEM;
+ goto out;
+ }
+ log_printf(LOG_INFO, "quorum device name changed to %s
", name);
+ oldname = quorum_device->name;
+ quorum_device->name = newname;
+ free(oldname);
+ }
+
+out:
+ quorum_device_update_votes(votes);
+
+ return ret;
}

static int reload_config(int new_version, int should_broadcast)
@@ -1560,6 +1629,10 @@ int process_command(struct connection *con, int cmd, char *cmdbuf,
err = do_cmd_unregister_quorum_device(cmdbuf, retlen);
break;

+ case CMAN_CMD_UPDATE_QUORUMDEV:
+ err = do_cmd_update_quorum_device(cmdbuf, retlen);
+ break;
+
case CMAN_CMD_POLL_QUORUMDEV:
err = do_cmd_poll_quorum_device(cmdbuf, retlen);
break;
diff --git a/cman/lib/libcman.c b/cman/lib/libcman.c
index daaad07..a89c731 100644
--- a/cman/lib/libcman.c
+++ b/cman/lib/libcman.c
@@ -1002,14 +1002,15 @@ int cman_replyto_shutdown(cman_handle_t handle, int yesno)
return 0;
}

-
-int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+static int cman_set_quorum_device(cman_handle_t handle,
+ int ops,
+ char *name, int votes)
{
struct cman_handle *h = (struct cman_handle *)handle;
char buf[strlen(name)+1 + sizeof(int)];
VALIDATE_HANDLE(h);

- if (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN)
+ if ((!name) || (strlen(name) > MAX_CLUSTER_MEMBER_NAME_LEN) || (votes < 0))
{
errno = EINVAL;
return -1;
@@ -1017,7 +1018,12 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)

memcpy(buf, &votes, sizeof(int));
strcpy(buf+sizeof(int), name);
- return info_call(h, CMAN_CMD_REG_QUORUMDEV, buf, strlen(name)+1+sizeof(int), NULL, 0);
+ return info_call(h, ops, buf, strlen(name)+1+sizeof(int), NULL, 0);
+}
+
+int cman_register_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+ return cman_set_quorum_device(handle, CMAN_CMD_REG_QUORUMDEV, name, votes);
}

int cman_unregister_quorum_device(cman_handle_t handle)
@@ -1053,6 +1059,11 @@ int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info)
return ret;
}

+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes)
+{
+ return cman_set_quorum_device(handle, CMAN_CMD_UPDATE_QUORUMDEV, name, votes);
+}
+
int cman_get_fenceinfo(cman_handle_t handle, int nodeid, uint64_t *time, int *fenced, char *agent)
{
struct cman_handle *h = (struct cman_handle *)handle;
diff --git a/cman/lib/libcman.h b/cman/lib/libcman.h
index feb10a2..9f97875 100644
--- a/cman/lib/libcman.h
+++ b/cman/lib/libcman.h
@@ -420,6 +420,9 @@ int cman_barrier_delete(cman_handle_t handle, const char *name);
/*
* Add your own quorum device here, needs an admin socket
*
+ * register_quorum and update_quorum arguments are mandatory.
+ * name has to be a valid null-terminated string and votes >= 0.
+ *
* After creating a quorum device you will need to call 'poll_quorum_device'
* at least once every (default) 10 seconds (this can be changed in CCS)
* otherwise it will time-out and the cluster will lose its vote.
@@ -428,6 +431,7 @@ int cman_register_quorum_device(cman_handle_t handle, char *name, int votes);
int cman_unregister_quorum_device(cman_handle_t handle);
int cman_poll_quorum_device(cman_handle_t handle, int isavailable);
int cman_get_quorum_device(cman_handle_t handle, struct cman_qdev_info *info);
+int cman_update_quorum_device(cman_handle_t handle, char *name, int votes);

/*
* Sets the dirty bit inside cman. This indicates that the node has
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index c1598fa..d613d84 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -690,6 +690,17 @@ register_device(qd_ctx *ctx)
ctx->qc_votes : 0);
}

+static int
+update_device(qd_ctx *ctx)
+{
+ return cman_update_quorum_device(
+ ctx->qc_cman_admin,
+ (ctx->qc_flags&RF_CMAN_LABEL) ?
+ ctx->qc_cman_label : ctx->qc_device,
+ (!(ctx->qc_flags & RF_MASTER_WINS) ||
+ ctx->qc_status == S_MASTER) ?
+ ctx->qc_votes : 0);
+}

static int
adjust_votes(qd_ctx *ctx)
@@ -2119,9 +2130,22 @@ main(int argc, char **argv)

if (!_running)
goto out;
-
+
/* This registers the quorum device */
- register_device(&ctx);
+ ret = register_device(&ctx);
+ if (ret) {
+ if (errno == EBUSY) {
+ logt_print(LOG_NOTICE, "quorum device is already registered, updating
");
+ ret = update_device(&ctx);
+ if (ret) {
+ logt_print(LOG_ERR, "Unable to update quorum device info!
");
+ goto out;
+ }
+ } else {
+ logt_print(LOG_ERR, "Unable to register quorum device!
");
+ goto out;
+ }
+ }

io_nanny_start(ch_user, ctx.qc_tko * ctx.qc_interval);

--
1.7.4.4
 
Old 09-08-2011, 07:02 AM
"Fabio M. Di Nitto"
 
Default cman: improve cman/qdisk interactions

On 9/8/2011 8:59 AM, Fabio M. Di Nitto wrote:
> - libcman/cman: add new quorum API call to update name and votes of a quorum device
> - cman: simplify common code to free quorum_device infrastructure and handle quorum recalculation
> - cman: do better logging/error reports of the quorum API usage
> - cman: use strdup instead of malloc+strcpy (code is more readable)
> - libcman: perform better error checking in register_quorum_device/update_quorum_device
>
> - Allow qdisk to update device name in cman using a new libcman quorum API call
> - Perform slight better error checking of some update opertaions
>
> Resolves: rhbz#735917
>
> Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>

This last version addresses the few comments you had in the other patch.

Fabio
 
Old 09-08-2011, 07:45 AM
Christine Caulfield
 
Default cman: improve cman/qdisk interactions

On 07/09/11 19:07, Lon Hohberger wrote:

Hi,

Good design -- couple of things...

On Wed, Sep 07, 2011 at 03:10:25PM +0200, Fabio M. Di Nitto wrote:

- cman: use strdup instead of malloc+strcpy (code is more readable)


* Not really a necessary change, but ok.


- libcman: perform better error checking in register_quorum_device/update_quorum_device

- Allow qdisk to update device name in cman using a new libcman quorum API call
- Perform slight better error checking of some update opertaions

Resolves: rhbz#735917


* All in all, this seems like a lot of patch for a very tiny problem or
set of problems in a very narrow use case (renaming quorum device).





I've chatted with Fabio about this. It's my opinion that this is
actually the least intrusive way of fixing the problem. The other ways
involve overloading existing API calls and adding state, which add not
much less code and also lot more in the way of potential confusions and
problems.


Chrissie
 
Old 09-08-2011, 02:04 PM
Lon Hohberger
 
Default cman: improve cman/qdisk interactions

On Thu, Sep 08, 2011 at 08:59:22AM +0200, Fabio M. Di Nitto wrote:
> - libcman/cman: add new quorum API call to update name and votes of a quorum device
> - cman: simplify common code to free quorum_device infrastructure and handle quorum recalculation
> - cman: do better logging/error reports of the quorum API usage
> - cman: use strdup instead of malloc+strcpy (code is more readable)
> - libcman: perform better error checking in register_quorum_device/update_quorum_device
>
> - Allow qdisk to update device name in cman using a new libcman quorum API call
> - Perform slight better error checking of some update opertaions

Thanks, looks good.

-- Lon
--
Lon Hohberger - Red Hat, Inc.
 

Thread Tools




All times are GMT. The time now is 02:02 PM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright Đ2007 - 2008, www.linux-archive.org