Linux Archive

Linux Archive (http://www.linux-archive.org/)
-   Cluster Development (http://www.linux-archive.org/cluster-development/)
-   -   rgmanager: Fix for deadlock (http://www.linux-archive.org/cluster-development/707938-rgmanager-fix-deadlock.html)

Ryan McCabe 09-27-2012 07:18 PM

rgmanager: Fix for deadlock
 
This patch fixes a deadlock in rgmanager that could occur when a node
starts rgmanager while a service is recovering.

Resolves: rhbz#861157

Signed-off-by: Ryan McCabe <rmccabe@redhat.com>
---
rgmanager/src/daemons/rg_state.c | 1 +
rgmanager/src/daemons/rg_thread.c | 19 ++++++++++++++++++-
rgmanager/src/daemons/service_op.c | 1 +
3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index 8c5af5b..80e8667 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -1963,6 +1963,7 @@ retry:
/* Deliberate */
case RG_EDEPEND:
case RG_EFAIL:
+ case RG_EDEADLCK:
/* Uh oh - we failed to relocate to this node.
ensure that we tell the next node to start it from
the 'recovering' state. */
diff --git a/rgmanager/src/daemons/rg_thread.c b/rgmanager/src/daemons/rg_thread.c
index 72b5f96..5e551c3 100644
--- a/rgmanager/src/daemons/rg_thread.c
+++ b/rgmanager/src/daemons/rg_thread.c
@@ -9,6 +9,8 @@
#include <rg_queue.h>
#include <assert.h>
#include <members.h>
+#include <liblogthread.h>
+

/**
* Resource thread list entry.
@@ -735,13 +737,28 @@ rt_enqueue_request(const char *resgroupname, int request,
ret = 0;
break;
}
- fprintf(stderr, "Failed to queue request: Would block
");
/* EWOULDBLOCK */
pthread_mutex_unlock(resgroup->rt_queue_mutex);
pthread_mutex_unlock(&reslist_mutex);
+ logt_print(LOG_DEBUG,
+ "Failed to queue %d request for %s: Would block
",
+ request, resgroupname);
return ret;
}

+ if (resgroup->rt_request == RG_START &&
+ (request == RG_START_REMOTE || request == RG_START_RECOVER)) {
+ send_ret(response_ctx, resgroup->rt_name, RG_EDEADLCK,
+ request, 0);
+ msg_free_ctx(response_ctx);
+ pthread_mutex_unlock(resgroup->rt_queue_mutex);
+ pthread_mutex_unlock(&reslist_mutex);
+ logt_print(LOG_DEBUG,
+ "Failed to queue %d request for %s: Would block
",
+ request, resgroupname);
+ return -1;
+ }
+
ret = rq_queue_request(resgroup->rt_queue, resgroup->rt_name,
request, 0, 0, response_ctx, 0, target,
arg0, arg1);
diff --git a/rgmanager/src/daemons/service_op.c b/rgmanager/src/daemons/service_op.c
index f094129..4b74427 100644
--- a/rgmanager/src/daemons/service_op.c
+++ b/rgmanager/src/daemons/service_op.c
@@ -62,6 +62,7 @@ service_op_start(char *svcName,
++dep;
continue;
case RG_EFAIL:
+ case RG_EDEADLCK:
++fail;
continue;
case RG_EABORT:
--
1.7.11.4

Lon Hohberger 09-28-2012 07:08 PM

rgmanager: Fix for deadlock
 
On 09/27/2012 03:18 PM, Ryan McCabe wrote:
> This patch fixes a deadlock in rgmanager that could occur when a node
> starts rgmanager while a service is recovering.


This basically prevents (where other places in the code try to avoid, it
looks like in the comments) cases where RG_START_REMOTE is passed while
a RG_START is already being processed for the same service on the same
node and one or more starts have failed.

ACK.

-- Lon


>
> Resolves: rhbz#861157
>
> Signed-off-by: Ryan McCabe <rmccabe@redhat.com>
> ---
> rgmanager/src/daemons/rg_state.c | 1 +
> rgmanager/src/daemons/rg_thread.c | 19 ++++++++++++++++++-
> rgmanager/src/daemons/service_op.c | 1 +
> 3 files changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
> index 8c5af5b..80e8667 100644
> --- a/rgmanager/src/daemons/rg_state.c
> +++ b/rgmanager/src/daemons/rg_state.c
> @@ -1963,6 +1963,7 @@ retry:
> /* Deliberate */
> case RG_EDEPEND:
> case RG_EFAIL:
> + case RG_EDEADLCK:
> /* Uh oh - we failed to relocate to this node.
> ensure that we tell the next node to start it from
> the 'recovering' state. */
> diff --git a/rgmanager/src/daemons/rg_thread.c b/rgmanager/src/daemons/rg_thread.c
> index 72b5f96..5e551c3 100644
> --- a/rgmanager/src/daemons/rg_thread.c
> +++ b/rgmanager/src/daemons/rg_thread.c
> @@ -9,6 +9,8 @@
> #include <rg_queue.h>
> #include <assert.h>
> #include <members.h>
> +#include <liblogthread.h>
> +
>
> /**
> * Resource thread list entry.
> @@ -735,13 +737,28 @@ rt_enqueue_request(const char *resgroupname, int request,
> ret = 0;
> break;
> }
> - fprintf(stderr, "Failed to queue request: Would block
");
> /* EWOULDBLOCK */
> pthread_mutex_unlock(resgroup->rt_queue_mutex);
> pthread_mutex_unlock(&reslist_mutex);
> + logt_print(LOG_DEBUG,
> + "Failed to queue %d request for %s: Would block
",
> + request, resgroupname);
> return ret;
> }
>
> + if (resgroup->rt_request == RG_START &&
> + (request == RG_START_REMOTE || request == RG_START_RECOVER)) {
> + send_ret(response_ctx, resgroup->rt_name, RG_EDEADLCK,
> + request, 0);
> + msg_free_ctx(response_ctx);
> + pthread_mutex_unlock(resgroup->rt_queue_mutex);
> + pthread_mutex_unlock(&reslist_mutex);
> + logt_print(LOG_DEBUG,
> + "Failed to queue %d request for %s: Would block
",
> + request, resgroupname);
> + return -1;
> + }
> +
> ret = rq_queue_request(resgroup->rt_queue, resgroup->rt_name,
> request, 0, 0, response_ctx, 0, target,
> arg0, arg1);
> diff --git a/rgmanager/src/daemons/service_op.c b/rgmanager/src/daemons/service_op.c
> index f094129..4b74427 100644
> --- a/rgmanager/src/daemons/service_op.c
> +++ b/rgmanager/src/daemons/service_op.c
> @@ -62,6 +62,7 @@ service_op_start(char *svcName,
> ++dep;
> continue;
> case RG_EFAIL:
> + case RG_EDEADLCK:
> ++fail;
> continue;
> case RG_EABORT:
>

Ryan McCabe 10-01-2012 03:29 PM

rgmanager: Fix for deadlock
 
This patch fixes a deadlock in rgmanager that could occur when a node
starts rgmanager while a service is recovering.

Resolves: rhbz#834459

Signed-off-by: Ryan McCabe <rmccabe@redhat.com>
---
rgmanager/src/daemons/rg_state.c | 1 +
rgmanager/src/daemons/rg_thread.c | 17 ++++++++++++++++-
rgmanager/src/daemons/service_op.c | 1 +
3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index 9000f1b..b447cb3 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -2009,6 +2009,7 @@ retry:
/* Deliberate */
case RG_EDEPEND:
case RG_EFAIL:
+ case RG_EDEADLCK:
/* Uh oh - we failed to relocate to this node.
ensure that we tell the next node to start it from
the 'recovering' state. */
diff --git a/rgmanager/src/daemons/rg_thread.c b/rgmanager/src/daemons/rg_thread.c
index 769ca1e..60bf0c6 100644
--- a/rgmanager/src/daemons/rg_thread.c
+++ b/rgmanager/src/daemons/rg_thread.c
@@ -23,6 +23,7 @@
#include <rg_queue.h>
#include <assert.h>
#include <members.h>
+#include <clulog.h>

/**
* Resource thread list entry.
@@ -749,13 +750,27 @@ rt_enqueue_request(const char *resgroupname, int request,
msg_free_ctx(response_ctx);
break;
}
- fprintf(stderr, "Failed to queue request: Would block
");
+ pthread_mutex_unlock(resgroup->rt_queue_mutex);
+ pthread_mutex_unlock(&reslist_mutex);
+ clulog(LOG_DEBUG, "Failed to queue request: Would block
");
/* EWOULDBLOCK */
+ return -1;
+ }
+
+ if (resgroup->rt_request == RG_START &&
+ (request == RG_START_REMOTE || request == RG_START_RECOVER)) {
+ send_ret(response_ctx, resgroup->rt_name, RG_EDEADLCK,
+ request, 0);
+ msg_free_ctx(response_ctx);
pthread_mutex_unlock(resgroup->rt_queue_mutex);
pthread_mutex_unlock(&reslist_mutex);
+ clulog(LOG_DEBUG,
+ "Failed to queue %d request for %s: Would block
",
+ request, resgroupname);
return -1;
}

+
ret = rq_queue_request(resgroup->rt_queue, resgroup->rt_name,
request, 0, 0, response_ctx, 0, target,
arg0, arg1);
diff --git a/rgmanager/src/daemons/service_op.c b/rgmanager/src/daemons/service_op.c
index 2ea59f9..bf0e1fc 100644
--- a/rgmanager/src/daemons/service_op.c
+++ b/rgmanager/src/daemons/service_op.c
@@ -82,6 +82,7 @@ service_op_start(char *svcName,
++dep;
continue;
case RG_EFAIL:
+ case RG_EDEADLCK:
++fail;
continue;
case RG_EABORT:
--
1.7.11.4

Lon Hohberger 10-02-2012 03:31 PM

rgmanager: Fix for deadlock
 
On 10/01/2012 11:29 AM, Ryan McCabe wrote:
> This patch fixes a deadlock in rgmanager that could occur when a node
> starts rgmanager while a service is recovering.
>
> Resolves: rhbz#834459
>
> Signed-off-by: Ryan McCabe <rmccabe@redhat.com>

That looks like a RHEL5 port of the same patch.

Ack.

-- Lon


All times are GMT. The time now is 02:41 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.