FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Cluster Development

 
 
LinkBack Thread Tools
 
Old 04-07-2011, 08:55 PM
Lon Hohberger
 
Default rhel5 rgmanager: Pause during exit if we stopped services

A difference between rgmanager 1.9.x and later versions is that
they rely on openais/corosync for messaging. This messaging is
quite reliable and has proved useful.

However, one drawback is that if you very quickly stop rgmanager
and corosync/cman, the other nodes in the cluster can not restart
services because message traffic is interrupted for the duration
of the token timeout.

There is no simple solution to this problem. Rgmanager could
(in theory) find new placements for services prior to stopping,
but this is a large amount of design work; it was never designed
to run policies in the exit path.

A far simpler idea is to simply give the other nodes time to
restart services.

NOTE: This solution does not and can not work with central
processing mode.

Resolves: rhbz#619468

Signed-off-by: Lon Hohberger <lhh@redhat.com>
---
rgmanager/include/event.h | 1 +
rgmanager/include/resgroup.h | 2 +-
rgmanager/src/daemons/groups.c | 15 +++++++++++++--
rgmanager/src/daemons/main.c | 14 ++++++++++++--
rgmanager/src/daemons/rg_event.c | 7 +++++++
5 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/rgmanager/include/event.h b/rgmanager/include/event.h
index 7e628d8..e63dffd 100644
--- a/rgmanager/include/event.h
+++ b/rgmanager/include/event.h
@@ -136,6 +136,7 @@ int slang_process_event(event_table_t *event_table, event_t *ev);

/* For distributed events. */
void set_transition_throttling(int nsecs);
+int get_transition_throttling(void);

/* Simplified service start. */
int service_op_start(char *svcName, int *target_list, int target_list_len,
diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h
index 793ad3b..4be4dbc 100644
--- a/rgmanager/include/resgroup.h
+++ b/rgmanager/include/resgroup.h
@@ -160,7 +160,7 @@ void send_ret(msgctx_t *ctx, char *name, int ret, int req, int newowner);

/* do this op on all resource groups. The handler for the request
will sort out whether or not it's a valid request given the state */
-void rg_doall(int request, int block, char *debugfmt);
+int rg_doall(int request, int block, const char *debugfmt);
void do_status_checks(void); /* Queue status checks for locally running
services */

diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index ecb7b85..b546421 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -1292,12 +1292,21 @@ svc_exists(char *svcname)
}


-void
-rg_doall(int request, int block, char *debugfmt)
+/*
+ * Perform an operation on all resources groups.
+ *
+ * Returns the number of requests queued. This value is
+ * only used during shutdown, where we queue RG_STOP_EXITING
+ * only for services we have running locally as an optimization.
+ */
+int
+rg_doall(int request, int block,
+ const char *debugfmt)
{
resource_node_t *curr;
rg_state_t svcblk;
char rg[64];
+ int queued = 0;

pthread_rwlock_rdlock(&resource_lock);
list_do(&_tree, curr) {
@@ -1322,6 +1331,7 @@ rg_doall(int request, int block, char *debugfmt)

rt_enqueue_request(rg, request, NULL, 0,
0, 0, 0);
+ ++queued;
} while (!list_done(&_tree, curr));

pthread_rwlock_unlock(&resource_lock);
@@ -1331,6 +1341,7 @@ rg_doall(int request, int block, char *debugfmt)
other rgmanagers to complete. */
if (block)
rg_wait_threads();
+ return queued;
}


diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index aa78cef..1c7f746 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -72,6 +72,7 @@ static int signalled = 0;
static int port = RG_PORT;
static char *rgmanager_lsname = "rgmanager"; /* XXX default */
static int status_poll_interval = DEFAULT_CHECK_INTERVAL;
+static int stops_queued = 0;

int next_node_id(cluster_member_list_t *membership, int me);

@@ -1041,7 +1042,7 @@ void *
shutdown_thread(void __attribute__ ((unused)) *arg)
{
rg_lockall(L_SYS|L_SHUTDOWN);
- rg_doall(RG_STOP_EXITING, 1, NULL);
+ stops_queued = rg_doall(RG_STOP_EXITING, 1, NULL);
running = 0;

pthread_exit(NULL);
@@ -1219,8 +1220,17 @@ out_cleanup:
clu_lock_finished(rgmanager_lsname);

out:
- clulog(LOG_NOTICE, "Shutdown complete, exiting
");
+ clulog(LOG_DEBUG, "Stopped %d services
", stops_queued);
+ clulog(LOG_NOTICE, "Disconnecting from CMAN
");
cman_finish(clu);
+
+ if (stops_queued && !central_events_enabled()) {
+ clulog(LOG_DEBUG, "Pausing to allow services to "
+ "start on other node(s)
");
+ sleep(get_transition_throttling() * 3);
+ }
+
+ clulog(LOG_NOTICE, "Exiting
");

/*malloc_dump_table(); */ /* Only works if alloc.c us used */
/*malloc_stats();*/
diff --git a/rgmanager/src/daemons/rg_event.c b/rgmanager/src/daemons/rg_event.c
index 82c20c0..606d41b 100644
--- a/rgmanager/src/daemons/rg_event.c
+++ b/rgmanager/src/daemons/rg_event.c
@@ -69,6 +69,13 @@ set_transition_throttling(int nsecs)
}


+int
+get_transition_throttling(void)
+{
+ return transition_throttling;
+}
+
+
void
set_central_events(int flag)
{
--
1.7.3.4
 
Old 04-08-2011, 02:46 AM
"Fabio M. Di Nitto"
 
Default rhel5 rgmanager: Pause during exit if we stopped services

ACK

On 04/07/2011 10:55 PM, Lon Hohberger wrote:
> A difference between rgmanager 1.9.x and later versions is that
> they rely on openais/corosync for messaging. This messaging is
> quite reliable and has proved useful.
>
> However, one drawback is that if you very quickly stop rgmanager
> and corosync/cman, the other nodes in the cluster can not restart
> services because message traffic is interrupted for the duration
> of the token timeout.
>
> There is no simple solution to this problem. Rgmanager could
> (in theory) find new placements for services prior to stopping,
> but this is a large amount of design work; it was never designed
> to run policies in the exit path.
>
> A far simpler idea is to simply give the other nodes time to
> restart services.
>
> NOTE: This solution does not and can not work with central
> processing mode.
>
> Resolves: rhbz#619468
>
> Signed-off-by: Lon Hohberger <lhh@redhat.com>
> ---
> rgmanager/include/event.h | 1 +
> rgmanager/include/resgroup.h | 2 +-
> rgmanager/src/daemons/groups.c | 15 +++++++++++++--
> rgmanager/src/daemons/main.c | 14 ++++++++++++--
> rgmanager/src/daemons/rg_event.c | 7 +++++++
> 5 files changed, 34 insertions(+), 5 deletions(-)
>
> diff --git a/rgmanager/include/event.h b/rgmanager/include/event.h
> index 7e628d8..e63dffd 100644
> --- a/rgmanager/include/event.h
> +++ b/rgmanager/include/event.h
> @@ -136,6 +136,7 @@ int slang_process_event(event_table_t *event_table, event_t *ev);
>
> /* For distributed events. */
> void set_transition_throttling(int nsecs);
> +int get_transition_throttling(void);
>
> /* Simplified service start. */
> int service_op_start(char *svcName, int *target_list, int target_list_len,
> diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h
> index 793ad3b..4be4dbc 100644
> --- a/rgmanager/include/resgroup.h
> +++ b/rgmanager/include/resgroup.h
> @@ -160,7 +160,7 @@ void send_ret(msgctx_t *ctx, char *name, int ret, int req, int newowner);
>
> /* do this op on all resource groups. The handler for the request
> will sort out whether or not it's a valid request given the state */
> -void rg_doall(int request, int block, char *debugfmt);
> +int rg_doall(int request, int block, const char *debugfmt);
> void do_status_checks(void); /* Queue status checks for locally running
> services */
>
> diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
> index ecb7b85..b546421 100644
> --- a/rgmanager/src/daemons/groups.c
> +++ b/rgmanager/src/daemons/groups.c
> @@ -1292,12 +1292,21 @@ svc_exists(char *svcname)
> }
>
>
> -void
> -rg_doall(int request, int block, char *debugfmt)
> +/*
> + * Perform an operation on all resources groups.
> + *
> + * Returns the number of requests queued. This value is
> + * only used during shutdown, where we queue RG_STOP_EXITING
> + * only for services we have running locally as an optimization.
> + */
> +int
> +rg_doall(int request, int block,
> + const char *debugfmt)
> {
> resource_node_t *curr;
> rg_state_t svcblk;
> char rg[64];
> + int queued = 0;
>
> pthread_rwlock_rdlock(&resource_lock);
> list_do(&_tree, curr) {
> @@ -1322,6 +1331,7 @@ rg_doall(int request, int block, char *debugfmt)
>
> rt_enqueue_request(rg, request, NULL, 0,
> 0, 0, 0);
> + ++queued;
> } while (!list_done(&_tree, curr));
>
> pthread_rwlock_unlock(&resource_lock);
> @@ -1331,6 +1341,7 @@ rg_doall(int request, int block, char *debugfmt)
> other rgmanagers to complete. */
> if (block)
> rg_wait_threads();
> + return queued;
> }
>
>
> diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
> index aa78cef..1c7f746 100644
> --- a/rgmanager/src/daemons/main.c
> +++ b/rgmanager/src/daemons/main.c
> @@ -72,6 +72,7 @@ static int signalled = 0;
> static int port = RG_PORT;
> static char *rgmanager_lsname = "rgmanager"; /* XXX default */
> static int status_poll_interval = DEFAULT_CHECK_INTERVAL;
> +static int stops_queued = 0;
>
> int next_node_id(cluster_member_list_t *membership, int me);
>
> @@ -1041,7 +1042,7 @@ void *
> shutdown_thread(void __attribute__ ((unused)) *arg)
> {
> rg_lockall(L_SYS|L_SHUTDOWN);
> - rg_doall(RG_STOP_EXITING, 1, NULL);
> + stops_queued = rg_doall(RG_STOP_EXITING, 1, NULL);
> running = 0;
>
> pthread_exit(NULL);
> @@ -1219,8 +1220,17 @@ out_cleanup:
> clu_lock_finished(rgmanager_lsname);
>
> out:
> - clulog(LOG_NOTICE, "Shutdown complete, exiting
");
> + clulog(LOG_DEBUG, "Stopped %d services
", stops_queued);
> + clulog(LOG_NOTICE, "Disconnecting from CMAN
");
> cman_finish(clu);
> +
> + if (stops_queued && !central_events_enabled()) {
> + clulog(LOG_DEBUG, "Pausing to allow services to "
> + "start on other node(s)
");
> + sleep(get_transition_throttling() * 3);
> + }
> +
> + clulog(LOG_NOTICE, "Exiting
");
>
> /*malloc_dump_table(); */ /* Only works if alloc.c us used */
> /*malloc_stats();*/
> diff --git a/rgmanager/src/daemons/rg_event.c b/rgmanager/src/daemons/rg_event.c
> index 82c20c0..606d41b 100644
> --- a/rgmanager/src/daemons/rg_event.c
> +++ b/rgmanager/src/daemons/rg_event.c
> @@ -69,6 +69,13 @@ set_transition_throttling(int nsecs)
> }
>
>
> +int
> +get_transition_throttling(void)
> +{
> + return transition_throttling;
> +}
> +
> +
> void
> set_central_events(int flag)
> {
 

Thread Tools




All times are GMT. The time now is 07:53 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org