These new callbacks notify the dlm user about lock recovery.
GFS2, and possibly others, need to be aware of when the dlm
will be doing lock recovery for a failed lockspace member.
In the past, this coordination has been done between dlm and
file system daemons in userspace, which then direct their
kernel counterparts. These callbacks allow the same
coordination directly, and more simply.
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 6cf72fc..e7e327d 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/dlmconstants.h>
#include <net/ipv6.h>
#include <net/sock.h>
struct dlm_comm {
struct config_item item;
+ int seq;
int nodeid;
int local;
int addr_count;
@@ -309,6 +340,7 @@ struct dlm_node {
int nodeid;
int weight;
int new;
+ int comm_seq; /* copy of cm->seq when nd->nodeid is set */
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 3099d0d..9f5e366 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -14,6 +14,13 @@
#ifndef __CONFIG_DOT_H__
#define __CONFIG_DOT_H__
+struct dlm_config_node {
+ int nodeid;
+ int weight;
+ int new;
+ uint32_t comm_seq;
+};
+
#define DLM_MAX_ADDR_COUNT 3
struct dlm_config_info {
@@ -29,15 +36,17 @@ struct dlm_config_info {
int ci_timewarn_cs;
int ci_waitwarn_us;
int ci_new_rsb_count;
+ int ci_recover_callbacks;
+ char ci_cluster_name[DLM_LOCKSPACE_LEN];
};
extern struct dlm_config_info dlm_config;
int dlm_config_init(void);
void dlm_config_exit(void);
-int dlm_node_weight(char *lsname, int nodeid);
-int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
- int **new_out, int *new_count_out);
+int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
+ int *count_out);
+int dlm_comm_seq(int nodeid, uint32_t *seq);
int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
int dlm_our_nodeid(void);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index f4d132c..cba3859 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -119,28 +119,18 @@ struct dlm_member {
int weight;
int slot;
int slot_prev;
+ int comm_seq;
uint32_t generation;
};
/*
- * low nodeid saves array of these in ls_slots
- */
-
-struct dlm_slot {
- int nodeid;
- int slot;
-};
-
-/*
* Save and manage recovery state for a lockspace.
*/
struct dlm_recover {
struct list_head list;
- int *nodeids; /* nodeids of all members */
- int node_count;
- int *new; /* nodeids of new members */
- int new_count;
+ struct dlm_config_node *nodes;
+ int nodes_count;
uint64_t seq;
};
+ struct dlm_lockspace_ops ls_ops;
+
int ls_namelen;
char ls_name[1];
};
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 1441f04..088ce1c 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -386,12 +386,14 @@ static void threads_stop(void)
dlm_lowcomms_stop();
}
-static int new_lockspace(const char *name, int namelen, void **lockspace,
- uint32_t flags, int lvblen)
+static int new_lockspace(const char *name, const char *cluster, uint32_t flags,
+ int lvblen, struct dlm_lockspace_ops *ops,
+ dlm_lockspace_t **lockspace)
{
struct dlm_ls *ls;
int i, size, error;
int do_unreg = 0;
+ int namelen = strlen(name);
if (namelen > DLM_LOCKSPACE_LEN)
return -EINVAL;
@@ -403,8 +405,23 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
return -EINVAL;
+ if (ops)
+ memcpy(&ls->ls_ops, ops, sizeof(struct dlm_lockspace_ops));
+
if (flags & DLM_LSFL_TIMEWARN)
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
@@ -619,8 +639,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
return error;
}
-int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
- uint32_t flags, int lvblen)
+int dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags,
+ int lvblen, struct dlm_lockspace_ops *ops,
+ dlm_lockspace_t **lockspace)
{
int error = 0;
@@ -630,7 +651,7 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
if (error)
goto out;
- error = new_lockspace(name, namelen, lockspace, flags, lvblen);
+ error = new_lockspace(name, cluster, flags, lvblen, ops, lockspace);
if (!error)
ls_count++;
if (error > 0)
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index fbbcda9..d2d5750 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
/************************************************** ****************************
************************************************** *****************************
**
-** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,7 @@ int dlm_slots_version(struct dlm_header *h)
}
+static void dlm_lsop_recover_prep(struct dlm_ls *ls)
+{
+ if (!ls->ls_ops.recover_prep)
+ return;
+ ls->ls_ops.recover_prep(ls->ls_ops.cb_arg);
+}
+
+static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
+{
+ struct dlm_slot slot;
+ uint32_t seq;
+ int error;
+
+ if (!ls->ls_ops.recover_slot)
+ return;
+
+ /* if there is no comms connection with this node
+ or the present comms connection is newer
+ than the one when this member was added, then
+ we consider the node to have failed (versus
+ being removed due to dlm_release_lockspace) */
+
+ error = dlm_comm_seq(memb->nodeid, &seq);
+
+ if (!error && seq == memb->comm_seq)
+ return;
+
+ slot.nodeid = memb->nodeid;
+ slot.slot = memb->slot;
+
+ ls->ls_ops.recover_slot(ls->ls_ops.cb_arg, &slot);
+}
+
+void dlm_lsop_recover_done(struct dlm_ls *ls)
+{
+ struct dlm_member *memb;
+ struct dlm_slot *slots;
+ int i, num;
+
+ if (!ls->ls_ops.recover_done)
+ return;
+
+ num = ls->ls_num_nodes;
+
+ slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL);
+ if (!slots)
+ return;
+
+ i = 0;
+ list_for_each_entry(memb, &ls->ls_nodes, list) {
+ if (i == num) {
+ log_error(ls, "dlm_lsop_recover_done bad num %d", num);
+ goto out;
+ }
+ slots[i].nodeid = memb->nodeid;
+ slots[i].slot = memb->slot;
+ i++;
+ }
+
+ ls->ls_ops.recover_done(ls->ls_ops.cb_arg, slots, num,
+ ls->ls_slot, ls->ls_generation);
+ out:
+ kfree(slots);
+}
+
+static struct dlm_config_node *find_config_node(struct dlm_recover *rv,
+ int nodeid)
+{
+ int i;
+
+ for (i = 0; i < rv->nodes_count; i++) {
+ if (rv->nodes[i].nodeid == nodeid)
+ return &rv->nodes[i];
+ }
+ return NULL;
+}
+
int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
{
struct dlm_member *memb, *safe;
- int i, error, found, pos = 0, neg = 0, low = -1;
+ struct dlm_config_node *node;
+ int i, error, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
count as a negative change so the "neg" recovery steps will happen */
@@ -466,46 +536,32 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
/* move departed members from ls_nodes to ls_nodes_gone */
list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
- found = 0;
- for (i = 0; i < rv->node_count; i++) {
- if (memb->nodeid == rv->nodeids[i]) {
- found = 1;
- break;
- }
- }
+ node = find_config_node(rv, memb->nodeid);
+ if (node && !node->new)
+ continue;
- if (!found) {
- neg++;
- dlm_remove_member(ls, memb);
+ if (!node) {
log_debug(ls, "remove member %d", memb->nodeid);
+ } else {
+ /* removed and re-added */
+ log_debug(ls, "remove member %d comm_seq %u %u",
+ memb->nodeid, memb->comm_seq, node->comm_seq);
}
- }
-
- /* Add an entry to ls_nodes_gone for members that were removed and
- then added again, so that previous state for these nodes will be
- cleared during recovery. */
- for (i = 0; i < rv->new_count; i++) {
- if (!dlm_is_member(ls, rv->new[i]))
- continue;
- log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
-
- memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
- if (!memb)
- return -ENOMEM;
- memb->nodeid = rv->new[i];
- list_add_tail(&memb->list, &ls->ls_nodes_gone);
neg++;
+ list_move(&memb->list, &ls->ls_nodes_gone);
+ ls->ls_num_nodes--;
+ dlm_lsop_recover_slot(ls, memb);
}
/* add new members to ls_nodes */
- for (i = 0; i < rv->node_count; i++) {
- if (dlm_is_member(ls, rv->nodeids[i]))
+ for (i = 0; i < rv->nodes_count; i++) {
+ node = &rv->nodes[i];
+ if (dlm_is_member(ls, node->nodeid))
continue;
- dlm_add_member(ls, rv->nodeids[i]);
- pos++;
- log_debug(ls, "add member %d", rv->nodeids[i]);
+ dlm_add_member(ls, node);
+ log_debug(ls, "add member %d", node->nodeid);
}
diff --git a/fs/dlm/member.h b/fs/dlm/member.h
index 7e87e8a..3deb706 100644
--- a/fs/dlm/member.h
+++ b/fs/dlm/member.h
@@ -1,7 +1,7 @@
/************************************************** ****************************
************************************************** *****************************
**
-** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -27,6 +27,7 @@ void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_slots_copy_in(struct dlm_ls *ls);
int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
struct dlm_slot **slots_out, uint32_t *gen_out);
+void dlm_lsop_recover_done(struct dlm_ls *ls);
#endif /* __MEMBER_DOT_H__ */
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 5a9e1a4..3780caf 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -227,11 +227,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
- rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
- &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
+ rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
+ NULL, &fsdlm);
if (rc) {
ocfs2_live_connection_drop(control);
goto out;
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index d4e02f5..27d96b3 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -74,15 +74,70 @@ struct dlm_lksb {
#ifdef __KERNEL__
+struct dlm_slot {
+ int nodeid; /* 1 to MAX_INT */
+ int slot; /* 1 to MAX_INT */
+};
+
+/*
+ * recover_prep: called before the dlm begins lock recovery.
+ * Notfies lockspace user that locks from failed members will be granted.
+ * recover_slot: called after recover_prep and before recover_done.
+ * Identifies a failed lockspace member.
+ * recover_done: called after the dlm completes lock recovery.
+ * Identifies lockspace members and lockspace generation number.
+ */
+
+struct dlm_lockspace_ops {
+ void *cb_arg;
+ void (*recover_prep) (void *cb_arg);
+ void (*recover_slot) (void *cb_arg, struct dlm_slot *slot);
+ void (*recover_done) (void *cb_arg, struct dlm_slot *slots,
+ int num_slots, int our_slot, uint32_t generation);
+};
+
/*
* dlm_new_lockspace
*
- * Starts a lockspace with the given name. If the named lockspace exists in
- * the cluster, the calling node joins it.
+ * Create/join a lockspace.
+ *
+ * name: lockspace name, null terminated, up to DLM_LOCKSPACE_LEN (not
+ * including terminating null).
+ *
+ * cluster: cluster name, null terminated, up to DLM_LOCKSPACE_LEN (not
+ * including terminating null). Optional. When cluster is null, it
+ * is not used. When set, dlm_new_lockspace() returns -EBADR if cluster
+ * is not equal to the dlm cluster name.
+ *
+ * flags:
+ * DLM_LSFL_NODIR
+ * The dlm should not use a resource directory, but statically assign
+ * resource mastery to nodes based on the name hash that is otherwise
+ * used to select the directory node. Must be the same on all nodes.
+ * DLM_LSFL_TIMEWARN
+ * The dlm should emit netlink messages if locks have been waiting
+ * for a configurable amount of time. (Unused.)
+ * DLM_LSFL_FS
+ * The lockspace user is in the kernel (i.e. filesystem). Enables
+ * direct bast/cast callbacks.
+ * DLM_LSFL_NEWEXCL
+ * dlm_new_lockspace() should return -EEXIST if the lockspace exists.
+ *
+ * lvblen: length of lvb in bytes. Must be multiple of 8.
+ * dlm_new_lockspace() returns an error if this does not match
+ * what other nodes are using.
+ *
+ * ops: callbacks that indicate lockspace recovery points so the
+ * caller can coordinate its recovery and know lockspace members.
+ * Optional. When set, dlm_new_lockspace() returns -EOPNOTSUPP if
+ * the dlm does not have recovery_callbacks enabled.
+ *
+ * lockspace: handle for dlm functions
*/
On Fri, 2011-12-16 at 16:03 -0600, David Teigland wrote:
> These new callbacks notify the dlm user about lock recovery.
> GFS2, and possibly others, need to be aware of when the dlm
> will be doing lock recovery for a failed lockspace member.
>
> In the past, this coordination has been done between dlm and
> file system daemons in userspace, which then direct their
> kernel counterparts. These callbacks allow the same
> coordination directly, and more simply.
>
> Signed-off-by: David Teigland <teigland@redhat.com>
> ---
> fs/dlm/config.c | 130 ++++++++++++++++++--------------
> fs/dlm/config.h | 17 +++-
> fs/dlm/dlm_internal.h | 20 ++----
> fs/dlm/lockspace.c | 37 +++++++--
> fs/dlm/member.c | 197 +++++++++++++++++++++++++++++++------------------
> fs/dlm/member.h | 3 +-
> fs/dlm/recoverd.c | 10 +-
> fs/dlm/user.c | 4 +-
> fs/gfs2/lock_dlm.c | 4 +-
> fs/ocfs2/stack_user.c | 4 +-
> include/linux/dlm.h | 65 +++++++++++++++-
> 11 files changed, 319 insertions(+), 172 deletions(-)
>
[snip]
> diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
> index f4d132c..cba3859 100644
> --- a/fs/dlm/dlm_internal.h
> +++ b/fs/dlm/dlm_internal.h
> @@ -2,7 +2,7 @@
> ************************************************** *****************************
> **
> ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
> -** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
> +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
> **
> ** This copyrighted material is made available to anyone wishing to use,
> ** modify, copy, or redistribute it subject to the terms and conditions
> @@ -119,28 +119,18 @@ struct dlm_member {
> int weight;
> int slot;
> int slot_prev;
> + int comm_seq;
> uint32_t generation;
> };
>
> /*
> - * low nodeid saves array of these in ls_slots
> - */
> -
> -struct dlm_slot {
> - int nodeid;
> - int slot;
> -};
> -
> -/*
> * Save and manage recovery state for a lockspace.
> */
>
> struct dlm_recover {
> struct list_head list;
> - int *nodeids; /* nodeids of all members */
> - int node_count;
> - int *new; /* nodeids of new members */
> - int new_count;
> + struct dlm_config_node *nodes;
> + int nodes_count;
> uint64_t seq;
> };
>
> @@ -584,6 +574,8 @@ struct dlm_ls {
> struct list_head ls_root_list; /* root resources */
> struct rw_semaphore ls_root_sem; /* protect root_list */
>
> + struct dlm_lockspace_ops ls_ops;
^^^^^^^^^^ I'd suggest just keeping a pointer to
this, see below.
> +
> int ls_namelen;
> char ls_name[1];
> };
> diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
> index 1441f04..088ce1c 100644
> --- a/fs/dlm/lockspace.c
> +++ b/fs/dlm/lockspace.c
> @@ -2,7 +2,7 @@
> ************************************************** *****************************
> **
> ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
> -** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
> +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
> **
> ** This copyrighted material is made available to anyone wishing to use,
> ** modify, copy, or redistribute it subject to the terms and conditions
> @@ -386,12 +386,14 @@ static void threads_stop(void)
> dlm_lowcomms_stop();
> }
>
> -static int new_lockspace(const char *name, int namelen, void **lockspace,
> - uint32_t flags, int lvblen)
> +static int new_lockspace(const char *name, const char *cluster, uint32_t flags,
> + int lvblen, struct dlm_lockspace_ops *ops,
^^^^ this should be const
> + dlm_lockspace_t **lockspace)
> {
> struct dlm_ls *ls;
> int i, size, error;
> int do_unreg = 0;
> + int namelen = strlen(name);
>
> if (namelen > DLM_LOCKSPACE_LEN)
> return -EINVAL;
> @@ -403,8 +405,23 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
> return -EINVAL;
>
> if (!dlm_user_daemon_available()) {
> - module_put(THIS_MODULE);
> - return -EUNATCH;
> + log_print("dlm user daemon not available");
> + error = -EUNATCH;
> + goto out;
> + }
> +
> + if (ops && !dlm_config.ci_recover_callbacks) {
> + log_print("dlm recover callbacks not enabled");
> + error = -EOPNOTSUPP;
> + goto out;
> + }
> +
> + if (cluster && strncmp(cluster, dlm_config.ci_cluster_name,
> + DLM_LOCKSPACE_LEN)) {
> + log_print("dlm cluster name %s mismatch %s",
> + dlm_config.ci_cluster_name, cluster);
> + error = -EBADR;
> + goto out;
> }
>
> error = 0;
> @@ -442,6 +459,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
> ls->ls_flags = 0;
> ls->ls_scan_time = jiffies;
>
> + if (ops)
> + memcpy(&ls->ls_ops, ops, sizeof(struct dlm_lockspace_ops));
> +
Why not just keep a pointer to the ops? There is no need to copy them.
Also - since the ops are specific to a user of the lockspace, this
implies that it is no longer possible to have multiple openers of the
same lockspace on the same node. I think that needs documenting
somewhere at least. The other possibility would be to introduce a
structure to represent a "user of a lockspace" I suppose...
> if (flags & DLM_LSFL_TIMEWARN)
> set_bit(LSFL_TIMEWARN, &ls->ls_flags);
>
> @@ -619,8 +639,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
> return error;
> }
>
> -int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
> - uint32_t flags, int lvblen)
> +int dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags,
> + int lvblen, struct dlm_lockspace_ops *ops,
^^^^ likewise this can also be const
> diff --git a/include/linux/dlm.h b/include/linux/dlm.h
> index d4e02f5..27d96b3 100644
> --- a/include/linux/dlm.h
> +++ b/include/linux/dlm.h
> @@ -2,7 +2,7 @@
> ************************************************** *****************************
> **
> ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
> -** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
> +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
> **
> ** This copyrighted material is made available to anyone wishing to use,
> ** modify, copy, or redistribute it subject to the terms and conditions
> @@ -74,15 +74,70 @@ struct dlm_lksb {
>
> #ifdef __KERNEL__
>
> +struct dlm_slot {
> + int nodeid; /* 1 to MAX_INT */
> + int slot; /* 1 to MAX_INT */
> +};
> +
> +/*
> + * recover_prep: called before the dlm begins lock recovery.
> + * Notfies lockspace user that locks from failed members will be granted.
> + * recover_slot: called after recover_prep and before recover_done.
> + * Identifies a failed lockspace member.
> + * recover_done: called after the dlm completes lock recovery.
> + * Identifies lockspace members and lockspace generation number.
> + */
> +
> +struct dlm_lockspace_ops {
> + void *cb_arg;
> + void (*recover_prep) (void *cb_arg);
> + void (*recover_slot) (void *cb_arg, struct dlm_slot *slot);
> + void (*recover_done) (void *cb_arg, struct dlm_slot *slots,
> + int num_slots, int our_slot, uint32_t generation);
> +};
> +
Please don't mix the callback arg and the functions in the same
structure. The functions will be identical for all filesystems, where as
the callback arg will be unique to each filesystem, so it would be
better to just add an extra cb_arg to the new lockspace function.
Otherwise it looks good to me,
Steve.
12-19-2011, 03:43 PM
Bob Peterson
dlm: add recovery callbacks
----- Original Message -----
| These new callbacks notify the dlm user about lock recovery.
| GFS2, and possibly others, need to be aware of when the dlm
| will be doing lock recovery for a failed lockspace member.
|
| In the past, this coordination has been done between dlm and
| file system daemons in userspace, which then direct their
| kernel counterparts. These callbacks allow the same
| coordination directly, and more simply.
|
| Signed-off-by: David Teigland <teigland@redhat.com>
| ---
Hi,
ACK,
Regards,
Bob Peterson
Red Hat File Systems
12-19-2011, 04:59 PM
David Teigland
dlm: add recovery callbacks
On Mon, Dec 19, 2011 at 12:36:57PM +0000, Steven Whitehouse wrote:
> > + struct dlm_lockspace_ops ls_ops;
> ^^^^^^^^^^ I'd suggest just keeping a pointer to
> this, see below.
> > +static int new_lockspace(const char *name, const char *cluster, uint32_t flags,
> > + int lvblen, struct dlm_lockspace_ops *ops,
> ^^^^ this should be const
> > + if (ops)
> > + memcpy(&ls->ls_ops, ops, sizeof(struct dlm_lockspace_ops));
> > +
> Why not just keep a pointer to the ops? There is no need to copy them.
>
> Also - since the ops are specific to a user of the lockspace, this
> implies that it is no longer possible to have multiple openers of the
> same lockspace on the same node. I think that needs documenting
> somewhere at least. The other possibility would be to introduce a
> structure to represent a "user of a lockspace" I suppose...
> > +int dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags,
> > + int lvblen, struct dlm_lockspace_ops *ops,
> ^^^^ likewise this can also be const
> Please don't mix the callback arg and the functions in the same
> structure. The functions will be identical for all filesystems, where as
> the callback arg will be unique to each filesystem, so it would be
> better to just add an extra cb_arg to the new lockspace function.
I'll change all those, thanks
01-05-2012, 03:46 PM
David Teigland
dlm: add recovery callbacks
These new callbacks notify the dlm user about lock recovery.
GFS2, and possibly others, need to be aware of when the dlm
will be doing lock recovery for a failed lockspace member.
In the past, this coordination has been done between dlm and
file system daemons in userspace, which then direct their
kernel counterparts. These callbacks allow the same
coordination directly, and more simply.
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 6cf72fc..e7e327d 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/dlmconstants.h>
#include <net/ipv6.h>
#include <net/sock.h>
struct dlm_comm {
struct config_item item;
+ int seq;
int nodeid;
int local;
int addr_count;
@@ -309,6 +340,7 @@ struct dlm_node {
int nodeid;
int weight;
int new;
+ int comm_seq; /* copy of cm->seq when nd->nodeid is set */
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 3099d0d..9f5e366 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -14,6 +14,13 @@
#ifndef __CONFIG_DOT_H__
#define __CONFIG_DOT_H__
+struct dlm_config_node {
+ int nodeid;
+ int weight;
+ int new;
+ uint32_t comm_seq;
+};
+
#define DLM_MAX_ADDR_COUNT 3
struct dlm_config_info {
@@ -29,15 +36,17 @@ struct dlm_config_info {
int ci_timewarn_cs;
int ci_waitwarn_us;
int ci_new_rsb_count;
+ int ci_recover_callbacks;
+ char ci_cluster_name[DLM_LOCKSPACE_LEN];
};
extern struct dlm_config_info dlm_config;
int dlm_config_init(void);
void dlm_config_exit(void);
-int dlm_node_weight(char *lsname, int nodeid);
-int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
- int **new_out, int *new_count_out);
+int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
+ int *count_out);
+int dlm_comm_seq(int nodeid, uint32_t *seq);
int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
int dlm_our_nodeid(void);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index f4d132c..3a564d1 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -119,28 +119,18 @@ struct dlm_member {
int weight;
int slot;
int slot_prev;
+ int comm_seq;
uint32_t generation;
};
/*
- * low nodeid saves array of these in ls_slots
- */
-
-struct dlm_slot {
- int nodeid;
- int slot;
-};
-
-/*
* Save and manage recovery state for a lockspace.
*/
struct dlm_recover {
struct list_head list;
- int *nodeids; /* nodeids of all members */
- int node_count;
- int *new; /* nodeids of new members */
- int new_count;
+ struct dlm_config_node *nodes;
+ int nodes_count;
uint64_t seq;
};
+ const struct dlm_lockspace_ops *ls_ops;
+ void *ls_ops_arg;
+
int ls_namelen;
char ls_name[1];
};
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 1441f04..a1ea25f 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -386,12 +386,15 @@ static void threads_stop(void)
dlm_lowcomms_stop();
}
-static int new_lockspace(const char *name, int namelen, void **lockspace,
- uint32_t flags, int lvblen)
+static int new_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops, void *ops_arg,
+ int *ops_result, dlm_lockspace_t **lockspace)
{
struct dlm_ls *ls;
int i, size, error;
int do_unreg = 0;
+ int namelen = strlen(name);
if (namelen > DLM_LOCKSPACE_LEN)
return -EINVAL;
@@ -403,8 +406,24 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
return -EINVAL;
@@ -619,8 +643,10 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
return error;
}
-int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
- uint32_t flags, int lvblen)
+int dlm_new_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops, void *ops_arg,
+ int *ops_result, dlm_lockspace_t **lockspace)
{
int error = 0;
@@ -630,7 +656,8 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
if (error)
goto out;
- error = new_lockspace(name, namelen, lockspace, flags, lvblen);
+ error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
+ ops_result, lockspace);
if (!error)
ls_count++;
if (error > 0)
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index eebc52a..862640a 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
/************************************************** ****************************
************************************************** *****************************
**
-** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,7 @@ int dlm_slots_version(struct dlm_header *h)
}
+static void dlm_lsop_recover_prep(struct dlm_ls *ls)
+{
+ if (!ls->ls_ops || !ls->ls_ops->recover_prep)
+ return;
+ ls->ls_ops->recover_prep(ls->ls_ops_arg);
+}
+
+static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
+{
+ struct dlm_slot slot;
+ uint32_t seq;
+ int error;
+
+ if (!ls->ls_ops || !ls->ls_ops->recover_slot)
+ return;
+
+ /* if there is no comms connection with this node
+ or the present comms connection is newer
+ than the one when this member was added, then
+ we consider the node to have failed (versus
+ being removed due to dlm_release_lockspace) */
+
+ error = dlm_comm_seq(memb->nodeid, &seq);
+
+ if (!error && seq == memb->comm_seq)
+ return;
+
+ slot.nodeid = memb->nodeid;
+ slot.slot = memb->slot;
+
+ ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot);
+}
+
+void dlm_lsop_recover_done(struct dlm_ls *ls)
+{
+ struct dlm_member *memb;
+ struct dlm_slot *slots;
+ int i, num;
+
+ if (!ls->ls_ops || !ls->ls_ops->recover_done)
+ return;
+
+ num = ls->ls_num_nodes;
+
+ slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL);
+ if (!slots)
+ return;
+
+ i = 0;
+ list_for_each_entry(memb, &ls->ls_nodes, list) {
+ if (i == num) {
+ log_error(ls, "dlm_lsop_recover_done bad num %d", num);
+ goto out;
+ }
+ slots[i].nodeid = memb->nodeid;
+ slots[i].slot = memb->slot;
+ i++;
+ }
+
+ ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num,
+ ls->ls_slot, ls->ls_generation);
+ out:
+ kfree(slots);
+}
+
+static struct dlm_config_node *find_config_node(struct dlm_recover *rv,
+ int nodeid)
+{
+ int i;
+
+ for (i = 0; i < rv->nodes_count; i++) {
+ if (rv->nodes[i].nodeid == nodeid)
+ return &rv->nodes[i];
+ }
+ return NULL;
+}
+
int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
{
struct dlm_member *memb, *safe;
- int i, error, found, pos = 0, neg = 0, low = -1;
+ struct dlm_config_node *node;
+ int i, error, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
count as a negative change so the "neg" recovery steps will happen */
@@ -476,46 +546,32 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
/* move departed members from ls_nodes to ls_nodes_gone */
list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
- found = 0;
- for (i = 0; i < rv->node_count; i++) {
- if (memb->nodeid == rv->nodeids[i]) {
- found = 1;
- break;
- }
- }
+ node = find_config_node(rv, memb->nodeid);
+ if (node && !node->new)
+ continue;
- if (!found) {
- neg++;
- dlm_remove_member(ls, memb);
+ if (!node) {
log_debug(ls, "remove member %d", memb->nodeid);
+ } else {
+ /* removed and re-added */
+ log_debug(ls, "remove member %d comm_seq %u %u",
+ memb->nodeid, memb->comm_seq, node->comm_seq);
}
- }
-
- /* Add an entry to ls_nodes_gone for members that were removed and
- then added again, so that previous state for these nodes will be
- cleared during recovery. */
- for (i = 0; i < rv->new_count; i++) {
- if (!dlm_is_member(ls, rv->new[i]))
- continue;
- log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
-
- memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
- if (!memb)
- return -ENOMEM;
- memb->nodeid = rv->new[i];
- list_add_tail(&memb->list, &ls->ls_nodes_gone);
neg++;
+ list_move(&memb->list, &ls->ls_nodes_gone);
+ ls->ls_num_nodes--;
+ dlm_lsop_recover_slot(ls, memb);
}
/* add new members to ls_nodes */
- for (i = 0; i < rv->node_count; i++) {
- if (dlm_is_member(ls, rv->nodeids[i]))
+ for (i = 0; i < rv->nodes_count; i++) {
+ node = &rv->nodes[i];
+ if (dlm_is_member(ls, node->nodeid))
continue;
- dlm_add_member(ls, rv->nodeids[i]);
- pos++;
- log_debug(ls, "add member %d", rv->nodeids[i]);
+ dlm_add_member(ls, node);
+ log_debug(ls, "add member %d", node->nodeid);
}
diff --git a/fs/dlm/member.h b/fs/dlm/member.h
index 7e87e8a..3deb706 100644
--- a/fs/dlm/member.h
+++ b/fs/dlm/member.h
@@ -1,7 +1,7 @@
/************************************************** ****************************
************************************************** *****************************
**
-** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -27,6 +27,7 @@ void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_slots_copy_in(struct dlm_ls *ls);
int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
struct dlm_slot **slots_out, uint32_t *gen_out);
+void dlm_lsop_recover_done(struct dlm_ls *ls);
#endif /* __MEMBER_DOT_H__ */
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 5a9e1a4..3780caf 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -227,11 +227,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
- rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
- &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
+ rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
+ NULL, NULL, NULL, &fsdlm);
if (rc) {
ocfs2_live_connection_drop(control);
goto out;
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index d4e02f5..6c7f6e9 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -2,7 +2,7 @@
************************************************** *****************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -74,15 +74,76 @@ struct dlm_lksb {
#ifdef __KERNEL__
+struct dlm_slot {
+ int nodeid; /* 1 to MAX_INT */
+ int slot; /* 1 to MAX_INT */
+};
+
+/*
+ * recover_prep: called before the dlm begins lock recovery.
+ * Notfies lockspace user that locks from failed members will be granted.
+ * recover_slot: called after recover_prep and before recover_done.
+ * Identifies a failed lockspace member.
+ * recover_done: called after the dlm completes lock recovery.
+ * Identifies lockspace members and lockspace generation number.
+ */
+
+struct dlm_lockspace_ops {
+ void (*recover_prep) (void *ops_arg);
+ void (*recover_slot) (void *ops_arg, struct dlm_slot *slot);
+ void (*recover_done) (void *ops_arg, struct dlm_slot *slots,
+ int num_slots, int our_slot, uint32_t generation);
+};
+
/*
* dlm_new_lockspace
*
- * Starts a lockspace with the given name. If the named lockspace exists in
- * the cluster, the calling node joins it.
+ * Create/join a lockspace.
+ *
+ * name: lockspace name, null terminated, up to DLM_LOCKSPACE_LEN (not
+ * including terminating null).
+ *
+ * cluster: cluster name, null terminated, up to DLM_LOCKSPACE_LEN (not
+ * including terminating null). Optional. When cluster is null, it
+ * is not used. When set, dlm_new_lockspace() returns -EBADR if cluster
+ * is not equal to the dlm cluster name.
+ *
+ * flags:
+ * DLM_LSFL_NODIR
+ * The dlm should not use a resource directory, but statically assign
+ * resource mastery to nodes based on the name hash that is otherwise
+ * used to select the directory node. Must be the same on all nodes.
+ * DLM_LSFL_TIMEWARN
+ * The dlm should emit netlink messages if locks have been waiting
+ * for a configurable amount of time. (Unused.)
+ * DLM_LSFL_FS
+ * The lockspace user is in the kernel (i.e. filesystem). Enables
+ * direct bast/cast callbacks.
+ * DLM_LSFL_NEWEXCL
+ * dlm_new_lockspace() should return -EEXIST if the lockspace exists.
+ *
+ * lvblen: length of lvb in bytes. Must be multiple of 8.
+ * dlm_new_lockspace() returns an error if this does not match
+ * what other nodes are using.
+ *
+ * ops: callbacks that indicate lockspace recovery points so the
+ * caller can coordinate its recovery and know lockspace members.
+ * This is only used by the initial dlm_new_lockspace() call.
+ * Optional.
+ *
+ * ops_arg: arg for ops callbacks.
+ *
+ * ops_result: tells caller if the ops callbacks (if provided) will
+ * be used or not. 0: will be used, -EXXX will not be used.
+ * -EOPNOTSUPP: the dlm does not have recovery_callbacks enabled.
+ *
+ * lockspace: handle for dlm functions
*/
-int dlm_new_lockspace(const char *name, int namelen,
- dlm_lockspace_t **lockspace, uint32_t flags, int lvblen);
+int dlm_new_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops, void *ops_arg,
+ int *ops_result, dlm_lockspace_t **lockspace);