[PATCH 13/13] GFS: lock_dlm module

The lock_dlm module uses the DLM in linux/drivers/dlm/ for inter-node
locking.

Signed-off-by: Ken Preslan <ken@preslan.org>
Signed-off-by: David Teigland <teigland@redhat.com>

---

 fs/gfs2/locking/dlm/Makefile   |    3 
 fs/gfs2/locking/dlm/lock.c     |  533 +++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/locking/dlm/lock_dlm.h |  200 +++++++++++++++
 fs/gfs2/locking/dlm/main.c     |   62 ++++
 fs/gfs2/locking/dlm/mount.c    |  218 ++++++++++++++++
 fs/gfs2/locking/dlm/plock.c    |  274 +++++++++++++++++++++
 fs/gfs2/locking/dlm/sysfs.c    |  283 +++++++++++++++++++++
 fs/gfs2/locking/dlm/thread.c   |  355 +++++++++++++++++++++++++++
 include/linux/lock_dlm_plock.h |   40 +++
 9 files changed, 1968 insertions(+)

diff -urpN a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
--- a/fs/gfs2/locking/dlm/Makefile	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/Makefile	2005-09-01 17:48:48.143749048 +0800
@@ -0,0 +1,3 @@
+obj-$(CONFIG_GFS2_FS) += lock_dlm.o
+lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o
+
diff -urpN a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
--- a/fs/gfs2/locking/dlm/lock.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/lock.c	2005-09-01 17:48:48.139749656 +0800
@@ -0,0 +1,533 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+static char junk_lvb[GDLM_LVB_SIZE];
+
+static void queue_complete(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+
+	clear_bit(LFL_ACTIVE, &lp->flags);
+
+	spin_lock(&ls->async_lock);
+	list_add_tail(&lp->clist, &ls->complete);
+	spin_unlock(&ls->async_lock);
+	wake_up(&ls->thread_wait);
+}
+
+static inline void gdlm_ast(void *astarg)
+{
+	queue_complete((struct gdlm_lock *) astarg);
+}
+
+static inline void gdlm_bast(void *astarg, int mode)
+{
+	struct gdlm_lock *lp = astarg;
+	struct gdlm_ls *ls = lp->ls;
+
+	if (!mode) {
+		printk("lock_dlm: bast mode zero %x,%"PRIx64"\n",
+			lp->lockname.ln_type, lp->lockname.ln_number);
+		return;
+	}
+
+	spin_lock(&ls->async_lock);
+	if (!lp->bast_mode) {
+		list_add_tail(&lp->blist, &ls->blocking);
+		lp->bast_mode = mode;
+	} else if (lp->bast_mode < mode)
+		lp->bast_mode = mode;
+	spin_unlock(&ls->async_lock);
+	wake_up(&ls->thread_wait);
+}
+
+void gdlm_queue_delayed(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+
+	spin_lock(&ls->async_lock);
+	list_add_tail(&lp->delay_list, &ls->delayed);
+	spin_unlock(&ls->async_lock);
+}
+
+/* convert gfs lock-state to dlm lock-mode */
+
+static int16_t make_mode(int16_t lmstate)
+{
+	switch (lmstate) {
+	case LM_ST_UNLOCKED:
+		return DLM_LOCK_NL;
+	case LM_ST_EXCLUSIVE:
+		return DLM_LOCK_EX;
+	case LM_ST_DEFERRED:
+		return DLM_LOCK_CW;
+	case LM_ST_SHARED:
+		return DLM_LOCK_PR;
+	default:
+		GDLM_ASSERT(0, printk("unknown LM state %d\n", lmstate););
+	}
+}
+
+/* convert dlm lock-mode to gfs lock-state */
+
+int16_t gdlm_make_lmstate(int16_t dlmmode)
+{
+	switch (dlmmode) {
+	case DLM_LOCK_IV:
+	case DLM_LOCK_NL:
+		return LM_ST_UNLOCKED;
+	case DLM_LOCK_EX:
+		return LM_ST_EXCLUSIVE;
+	case DLM_LOCK_CW:
+		return LM_ST_DEFERRED;
+	case DLM_LOCK_PR:
+		return LM_ST_SHARED;
+	default:
+		GDLM_ASSERT(0, printk("unknown DLM mode %d\n", dlmmode););
+	}
+}
+
+/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
+   DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
+
+static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
+{
+	int16_t cur = make_mode(cur_state);
+	if (lp->cur != DLM_LOCK_IV)
+		GDLM_ASSERT(lp->cur == cur, printk("%d, %d\n", lp->cur, cur););
+}
+
+static inline unsigned int make_flags(struct gdlm_lock *lp,
+				      unsigned int gfs_flags,
+				      int16_t cur, int16_t req)
+{
+	unsigned int lkf = 0;
+
+	if (gfs_flags & LM_FLAG_TRY)
+		lkf |= DLM_LKF_NOQUEUE;
+
+	if (gfs_flags & LM_FLAG_TRY_1CB) {
+		lkf |= DLM_LKF_NOQUEUE;
+		lkf |= DLM_LKF_NOQUEUEBAST;
+	}
+
+	if (gfs_flags & LM_FLAG_PRIORITY) {
+		lkf |= DLM_LKF_NOORDER;
+		lkf |= DLM_LKF_HEADQUE;
+	}
+
+	if (gfs_flags & LM_FLAG_ANY) {
+		if (req == DLM_LOCK_PR)
+			lkf |= DLM_LKF_ALTCW;
+		else if (req == DLM_LOCK_CW)
+			lkf |= DLM_LKF_ALTPR;
+	}
+
+	if (lp->lksb.sb_lkid != 0) {
+		lkf |= DLM_LKF_CONVERT;
+
+		/* Conversion deadlock avoidance by DLM */
+
+		if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
+		    !(lkf & DLM_LKF_NOQUEUE) &&
+		    cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
+			lkf |= DLM_LKF_CONVDEADLK;
+	}
+
+	if (lp->lvb)
+		lkf |= DLM_LKF_VALBLK;
+
+	return lkf;
+}
+
+/* make_strname - convert GFS lock numbers to a string */
+
+static inline void make_strname(struct lm_lockname *lockname,
+				struct gdlm_strname *str)
+{
+	sprintf(str->name, "%8x%16"PRIx64, lockname->ln_type,
+		lockname->ln_number);
+	str->namelen = GDLM_STRNAME_BYTES;
+}
+
+int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
+		   struct gdlm_lock **lpp)
+{
+	struct gdlm_lock *lp;
+
+	lp = kmalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
+	if (!lp)
+		return -ENOMEM;
+
+	memset(lp, 0, sizeof(struct gdlm_lock));
+	lp->lockname = *name;
+	lp->ls = ls;
+	lp->cur = DLM_LOCK_IV;
+	lp->lvb = NULL;
+	lp->hold_null = NULL;
+	init_completion(&lp->ast_wait);
+	INIT_LIST_HEAD(&lp->clist);
+	INIT_LIST_HEAD(&lp->blist);
+	INIT_LIST_HEAD(&lp->delay_list);
+
+	spin_lock(&ls->async_lock);
+	list_add(&lp->all_list, &ls->all_locks);
+	ls->all_locks_count++;
+	spin_unlock(&ls->async_lock);
+
+	*lpp = lp;
+	return 0;
+}
+
+void gdlm_delete_lp(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+
+	spin_lock(&ls->async_lock);
+	if (!list_empty(&lp->clist))
+		list_del_init(&lp->clist);
+	if (!list_empty(&lp->blist))
+		list_del_init(&lp->blist);
+	if (!list_empty(&lp->delay_list))
+		list_del_init(&lp->delay_list);
+	GDLM_ASSERT(!list_empty(&lp->all_list),);
+	list_del_init(&lp->all_list);
+	ls->all_locks_count--;
+	spin_unlock(&ls->async_lock);
+
+	kfree(lp);
+}
+
+int gdlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+		  lm_lock_t **lockp)
+{
+	struct gdlm_lock *lp;
+	int error;
+
+	error = gdlm_create_lp((struct gdlm_ls *) lockspace, name, &lp);
+
+	*lockp = (lm_lock_t *) lp;
+	return error;
+}
+
+void gdlm_put_lock(lm_lock_t *lock)
+{
+	gdlm_delete_lp((struct gdlm_lock *) lock);
+}
+
+void gdlm_do_lock(struct gdlm_lock *lp, struct dlm_range *range)
+{
+	struct gdlm_ls *ls = lp->ls;
+	struct gdlm_strname str;
+	int error, bast = 1;
+
+	/*
+	 * When recovery is in progress, delay lock requests for submission
+	 * once recovery is done.  Requests for recovery (NOEXP) and unlocks
+	 * can pass.
+	 */
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+	    !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
+		gdlm_queue_delayed(lp);
+		return;
+	}
+
+	/*
+	 * Submit the actual lock request.
+	 */
+
+	if (test_bit(LFL_NOBAST, &lp->flags))
+		bast = 0;
+
+	make_strname(&lp->lockname, &str);
+
+	set_bit(LFL_ACTIVE, &lp->flags);
+
+	log_debug("lk %x,%"PRIx64" id %x %d,%d %x", lp->lockname.ln_type,
+		  lp->lockname.ln_number, lp->lksb.sb_lkid,
+		  lp->cur, lp->req, lp->lkf);
+
+	error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
+			 str.name, str.namelen, 0, gdlm_ast, (void *) lp,
+			 bast ? gdlm_bast : NULL, range);
+
+	if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
+		lp->lksb.sb_status = -EAGAIN;
+		queue_complete(lp);
+		error = 0;
+	}
+
+	GDLM_ASSERT(!error,
+		   printk("%s: num=%x,%"PRIx64" err=%d cur=%d req=%d lkf=%x\n",
+			  ls->fsname, lp->lockname.ln_type,
+			  lp->lockname.ln_number, error, lp->cur, lp->req,
+			  lp->lkf););
+}
+
+void gdlm_do_unlock(struct gdlm_lock *lp)
+{
+	unsigned int lkf = 0;
+	int error;
+
+	set_bit(LFL_DLM_UNLOCK, &lp->flags);
+	set_bit(LFL_ACTIVE, &lp->flags);
+
+	if (lp->lvb)
+		lkf = DLM_LKF_VALBLK;
+
+	log_debug("un %x,%"PRIx64" %x %d %x", lp->lockname.ln_type,
+		  lp->lockname.ln_number, lp->lksb.sb_lkid, lp->cur, lkf);
+
+	error = dlm_unlock(lp->ls->dlm_lockspace, lp->lksb.sb_lkid, lkf,
+			   NULL, lp);
+
+	GDLM_ASSERT(!error,
+		   printk("%s: error=%d num=%x,%"PRIx64" lkf=%x flags=%lx\n",
+			  lp->ls->fsname, error, lp->lockname.ln_type,
+			  lp->lockname.ln_number, lkf, lp->flags););
+}
+
+unsigned int gdlm_lock(lm_lock_t *lock, unsigned int cur_state,
+		       unsigned int req_state, unsigned int flags)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+	clear_bit(LFL_DLM_CANCEL, &lp->flags);
+	if (flags & LM_FLAG_NOEXP)
+		set_bit(LFL_NOBLOCK, &lp->flags);
+
+	check_cur_state(lp, cur_state);
+	lp->req = make_mode(req_state);
+	lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
+
+	gdlm_do_lock(lp, NULL);
+	return LM_OUT_ASYNC;
+}
+
+unsigned int gdlm_unlock(lm_lock_t *lock, unsigned int cur_state)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+	clear_bit(LFL_DLM_CANCEL, &lp->flags);
+	if (lp->cur == DLM_LOCK_IV)
+		return 0;
+	gdlm_do_unlock(lp);
+	return LM_OUT_ASYNC;
+}
+
+void gdlm_cancel(lm_lock_t *lock)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+	struct gdlm_ls *ls = lp->ls;
+	int error, delay_list = 0;
+
+	if (test_bit(LFL_DLM_CANCEL, &lp->flags))
+		return;
+
+	log_info("gdlm_cancel %x,%"PRIx64" flags %lx",
+		 lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
+
+	spin_lock(&ls->async_lock);
+	if (!list_empty(&lp->delay_list)) {
+		list_del_init(&lp->delay_list);
+		delay_list = 1;
+	}
+	spin_unlock(&ls->async_lock);
+
+	if (delay_list) {
+		set_bit(LFL_CANCEL, &lp->flags);
+		set_bit(LFL_ACTIVE, &lp->flags);
+		queue_complete(lp);
+		return;
+	}
+
+	if (!test_bit(LFL_ACTIVE, &lp->flags) ||
+	    test_bit(LFL_DLM_UNLOCK, &lp->flags))	{
+		log_info("gdlm_cancel skip %x,%"PRIx64" flags %lx",
+		 	 lp->lockname.ln_type, lp->lockname.ln_number,
+			 lp->flags);
+		return;
+	}
+
+	/* the lock is blocked in the dlm */
+
+	set_bit(LFL_DLM_CANCEL, &lp->flags);
+	set_bit(LFL_ACTIVE, &lp->flags);
+
+	error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
+			   NULL, lp);
+
+	log_info("gdlm_cancel rv %d %x,%"PRIx64" flags %lx", error,
+		 lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
+
+	if (error == -EBUSY)
+		clear_bit(LFL_DLM_CANCEL, &lp->flags);
+}
+
+int gdlm_add_lvb(struct gdlm_lock *lp)
+{
+	char *lvb;
+
+	lvb = kmalloc(GDLM_LVB_SIZE, GFP_KERNEL);
+	if (!lvb)
+		return -ENOMEM;
+
+	memset(lvb, 0, GDLM_LVB_SIZE);
+
+	lp->lksb.sb_lvbptr = lvb;
+	lp->lvb = lvb;
+	return 0;
+}
+
+void gdlm_del_lvb(struct gdlm_lock *lp)
+{
+	kfree(lp->lvb);
+	lp->lvb = NULL;
+	lp->lksb.sb_lvbptr = NULL;
+}
+
+/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
+   the completion) because gfs won't call hold_lvb() during a callback (from
+   the context of a lock_dlm thread). */
+
+static int hold_null_lock(struct gdlm_lock *lp)
+{
+	struct gdlm_lock *lpn = NULL;
+	int error;
+
+	if (lp->hold_null) {
+		printk("lock_dlm: lvb already held\n");
+		return 0;
+	}
+
+	error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
+	if (error)
+		goto out;
+
+	lpn->lksb.sb_lvbptr = junk_lvb;
+	lpn->lvb = junk_lvb;
+
+	lpn->req = DLM_LOCK_NL;
+	lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
+	set_bit(LFL_NOBAST, &lpn->flags);
+	set_bit(LFL_INLOCK, &lpn->flags);
+
+	init_completion(&lpn->ast_wait);
+	gdlm_do_lock(lpn, NULL);
+	wait_for_completion(&lpn->ast_wait);
+	error = lp->lksb.sb_status;
+	if (error) {
+		printk("lock_dlm: hold_null_lock dlm error %d\n", error);
+		gdlm_delete_lp(lpn);
+		lpn = NULL;
+	}
+ out:
+	lp->hold_null = lpn;
+	return error;
+}
+
+/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
+   the completion) because gfs may call unhold_lvb() during a callback (from
+   the context of a lock_dlm thread) which could cause a deadlock since the
+   other lock_dlm thread could be engaged in recovery. */
+
+static void unhold_null_lock(struct gdlm_lock *lp)
+{
+	struct gdlm_lock *lpn = lp->hold_null;
+
+	GDLM_ASSERT(lpn,);
+	lpn->lksb.sb_lvbptr = NULL;
+	lpn->lvb = NULL;
+	set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
+	gdlm_do_unlock(lpn);
+	lp->hold_null = NULL;
+}
+
+/* Acquire a NL lock because gfs requires the value block to remain
+   intact on the resource while the lvb is "held" even if it's holding no locks
+   on the resource. */
+
+int gdlm_hold_lvb(lm_lock_t *lock, char **lvbp)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+	int error;
+
+	error = gdlm_add_lvb(lp);
+	if (error)
+		return error;
+
+	*lvbp = lp->lvb;
+
+	error = hold_null_lock(lp);
+	if (error)
+		gdlm_del_lvb(lp);
+
+	return error;
+}
+
+void gdlm_unhold_lvb(lm_lock_t *lock, char *lvb)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+	unhold_null_lock(lp);
+	gdlm_del_lvb(lp);
+}
+
+void gdlm_sync_lvb(lm_lock_t *lock, char *lvb)
+{
+	struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+	if (lp->cur != DLM_LOCK_EX)
+		return;
+
+	init_completion(&lp->ast_wait);
+	set_bit(LFL_SYNC_LVB, &lp->flags);
+
+	lp->req = DLM_LOCK_EX;
+	lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
+
+	gdlm_do_lock(lp, NULL);
+	wait_for_completion(&lp->ast_wait);
+}
+
+void gdlm_submit_delayed(struct gdlm_ls *ls)
+{
+	struct gdlm_lock *lp, *safe;
+
+	spin_lock(&ls->async_lock);
+	list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
+		list_del_init(&lp->delay_list);
+		list_add_tail(&lp->delay_list, &ls->submit);
+	}
+	spin_unlock(&ls->async_lock);
+	wake_up(&ls->thread_wait);
+}
+
+int gdlm_release_all_locks(struct gdlm_ls *ls)
+{
+	struct gdlm_lock *lp, *safe;
+	int count = 0;
+
+	spin_lock(&ls->async_lock);
+	list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
+		list_del_init(&lp->all_list);
+
+		if (lp->lvb && lp->lvb != junk_lvb)
+			kfree(lp->lvb);
+		kfree(lp);
+		count++;
+	}
+	spin_unlock(&ls->async_lock);
+
+	return count;
+}
+
diff -urpN a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
--- a/fs/gfs2/locking/dlm/lock_dlm.h	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/lock_dlm.h	2005-09-01 17:48:48.147748440 +0800
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef LOCK_DLM_DOT_H
+#define LOCK_DLM_DOT_H
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/socket.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/kobject.h>
+#include <linux/fcntl.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+#include <linux/dlm.h>
+#include "../harness/lm_interface.h"
+
+/*
+ * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
+ * prefix of lock_dlm_ gets awkward.  Externally, GFS refers to this module
+ * as "lock_dlm".
+ */
+
+#define GDLM_STRNAME_BYTES	24
+#define GDLM_LVB_SIZE		32
+#define GDLM_DROP_COUNT		50000
+#define GDLM_DROP_PERIOD	60
+
+/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
+   We sprintf these numbers into a 24 byte string of hex values to make them
+   human-readable (to make debugging simpler.) */
+
+struct gdlm_strname {
+	unsigned char		name[GDLM_STRNAME_BYTES];
+	unsigned short		namelen;
+};
+
+#define DFL_BLOCK_LOCKS		0
+#define DFL_JOIN_DONE		1
+#define DFL_LEAVE_DONE		2
+#define DFL_TERMINATE		3
+#define DFL_SPECTATOR		4
+#define DFL_WITHDRAW		5
+
+struct gdlm_ls {
+	uint32_t		id;
+	int			jid;
+	int			first;
+	int			first_done;
+	unsigned long		flags;
+	struct kobject		kobj;
+	char			clustername[128];
+	char			fsname[128];
+	int			fsflags;
+	dlm_lockspace_t		*dlm_lockspace;
+	lm_callback_t		fscb;
+	lm_fsdata_t		*fsdata;
+	int			recover_jid;
+	int			recover_done;
+	spinlock_t		async_lock;
+	struct list_head	complete;
+	struct list_head	blocking;
+	struct list_head	delayed;
+	struct list_head	submit;
+	struct list_head	all_locks;
+	uint32_t		all_locks_count;
+	wait_queue_head_t	wait_control;
+	struct task_struct	*thread1;
+	struct task_struct	*thread2;
+	wait_queue_head_t	thread_wait;
+	unsigned long		drop_time;
+	int			drop_locks_count;
+	int			drop_locks_period;
+};
+
+#define LFL_NOBLOCK		0
+#define LFL_NOCACHE		1
+#define LFL_DLM_UNLOCK		2
+#define LFL_DLM_CANCEL		3
+#define LFL_SYNC_LVB		4
+#define LFL_FORCE_PROMOTE	5
+#define LFL_REREQUEST		6
+#define LFL_ACTIVE		7
+#define LFL_INLOCK		8
+#define LFL_CANCEL		9
+#define LFL_NOBAST		10
+#define LFL_HEADQUE		11
+#define LFL_UNLOCK_DELETE	12
+
+struct gdlm_lock {
+	struct gdlm_ls		*ls;
+	struct lm_lockname	lockname;
+	char			*lvb;
+	struct dlm_lksb		lksb;
+
+	int16_t			cur;
+	int16_t			req;
+	int16_t			prev_req;
+	uint32_t		lkf;		/* dlm flags DLM_LKF_ */
+	unsigned long		flags;		/* lock_dlm flags LFL_ */
+
+	int			bast_mode;	/* protected by async_lock */
+	struct completion	ast_wait;
+
+	struct list_head	clist;		/* complete */
+	struct list_head	blist;		/* blocking */
+	struct list_head	delay_list;	/* delayed */
+	struct list_head	all_list;	/* all locks for the fs */
+	struct gdlm_lock	*hold_null;	/* NL lock for hold_lvb */
+};
+
+#if (BITS_PER_LONG == 64)
+#define PRIx64 "lx"
+#else
+#define PRIx64 "Lx"
+#endif
+
+#define GDLM_ASSERT(x, do) \
+{ \
+  if (!(x)) \
+  { \
+    printk("\nlock_dlm:  Assertion failed on line %d of file %s\n" \
+           "lock_dlm:  assertion:  \"%s\"\n" \
+           "lock_dlm:  time = %lu\n", \
+           __LINE__, __FILE__, #x, jiffies); \
+    {do} \
+    printk("\n"); \
+    BUG(); \
+    panic("lock_dlm:  Record message above and reboot.\n"); \
+  } \
+}
+
+#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
+#define log_info(fmt, arg...)  log_print(KERN_INFO , fmt , ## arg)
+#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
+#ifdef LOCK_DLM_LOG_DEBUG
+#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
+#else
+#define log_debug(fmt, arg...)
+#endif
+
+/* sysfs.c */
+
+int gdlm_sysfs_init(void);
+void gdlm_sysfs_exit(void);
+int gdlm_kobject_setup(struct gdlm_ls *);
+void gdlm_kobject_release(struct gdlm_ls *);
+
+/* thread.c */
+
+int gdlm_init_threads(struct gdlm_ls *);
+void gdlm_release_threads(struct gdlm_ls *);
+
+/* lock.c */
+
+int16_t gdlm_make_lmstate(int16_t);
+void gdlm_queue_delayed(struct gdlm_lock *);
+void gdlm_submit_delayed(struct gdlm_ls *);
+int gdlm_release_all_locks(struct gdlm_ls *);
+int gdlm_create_lp(struct gdlm_ls *, struct lm_lockname *, struct gdlm_lock **);
+void gdlm_delete_lp(struct gdlm_lock *);
+int gdlm_add_lvb(struct gdlm_lock *);
+void gdlm_del_lvb(struct gdlm_lock *);
+void gdlm_do_lock(struct gdlm_lock *, struct dlm_range *);
+void gdlm_do_unlock(struct gdlm_lock *);
+
+int gdlm_get_lock(lm_lockspace_t *, struct lm_lockname *, lm_lock_t **);
+void gdlm_put_lock(lm_lock_t *);
+unsigned int gdlm_lock(lm_lock_t *, unsigned int, unsigned int, unsigned int);
+unsigned int gdlm_unlock(lm_lock_t *, unsigned int);
+void gdlm_cancel(lm_lock_t *);
+int gdlm_hold_lvb(lm_lock_t *, char **);
+void gdlm_unhold_lvb(lm_lock_t *, char *);
+void gdlm_sync_lvb(lm_lock_t *, char *);
+
+/* plock.c */
+
+int gdlm_plock_init(void);
+void gdlm_plock_exit(void);
+int gdlm_plock(lm_lockspace_t *, struct lm_lockname *, struct file *, int,
+		struct file_lock *);
+int gdlm_plock_get(lm_lockspace_t *, struct lm_lockname *, struct file *,
+		struct file_lock *);
+int gdlm_punlock(lm_lockspace_t *, struct lm_lockname *, struct file *,
+		struct file_lock *);
+#endif
+
diff -urpN a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
--- a/fs/gfs2/locking/dlm/main.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/main.c	2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/init.h>
+
+#include "lock_dlm.h"
+
+extern int gdlm_drop_count;
+extern int gdlm_drop_period;
+
+extern struct lm_lockops gdlm_ops;
+
+int __init init_lock_dlm(void)
+{
+	int error;
+
+	error = lm_register_proto(&gdlm_ops);
+	if (error) {
+		printk("lock_dlm:  can't register protocol: %d\n", error);
+		return error;
+	}
+
+	error = gdlm_sysfs_init();
+	if (error) {
+		lm_unregister_proto(&gdlm_ops);
+		return error;
+	}
+
+	error = gdlm_plock_init();
+	if (error) {
+		gdlm_sysfs_exit();
+		lm_unregister_proto(&gdlm_ops);
+		return error;
+	}
+
+	gdlm_drop_count = GDLM_DROP_COUNT;
+	gdlm_drop_period = GDLM_DROP_PERIOD;
+
+	printk("Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
+	return 0;
+}
+
+void __exit exit_lock_dlm(void)
+{
+	gdlm_plock_exit();
+	gdlm_sysfs_exit();
+	lm_unregister_proto(&gdlm_ops);
+}
+
+module_init(init_lock_dlm);
+module_exit(exit_lock_dlm);
+
+MODULE_DESCRIPTION("GFS DLM Locking Module");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
diff -urpN a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
--- a/fs/gfs2/locking/dlm/mount.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/mount.c	2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+int gdlm_drop_count;
+int gdlm_drop_period;
+struct lm_lockops gdlm_ops;
+
+
+static struct gdlm_ls *init_gdlm(lm_callback_t cb, lm_fsdata_t *fsdata,
+				 int flags, char *table_name)
+{
+	struct gdlm_ls *ls;
+	char buf[256], *p;
+
+	ls = kmalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
+	if (!ls)
+		return NULL;
+
+	memset(ls, 0, sizeof(struct gdlm_ls));
+
+	ls->drop_locks_count = gdlm_drop_count;
+	ls->drop_locks_period = gdlm_drop_period;
+
+	ls->fscb = cb;
+	ls->fsdata = fsdata;
+	ls->fsflags = flags;
+
+	spin_lock_init(&ls->async_lock);
+
+	INIT_LIST_HEAD(&ls->complete);
+	INIT_LIST_HEAD(&ls->blocking);
+	INIT_LIST_HEAD(&ls->delayed);
+	INIT_LIST_HEAD(&ls->submit);
+	INIT_LIST_HEAD(&ls->all_locks);
+
+	init_waitqueue_head(&ls->thread_wait);
+	init_waitqueue_head(&ls->wait_control);
+	ls->thread1 = NULL;
+	ls->thread2 = NULL;
+	ls->drop_time = jiffies;
+	ls->jid = -1;
+
+	strncpy(buf, table_name, 256);
+	buf[255] = '\0';
+
+	p = strstr(buf, ":");
+	if (!p) {
+		printk("lock_dlm: invalid table_name \"%s\"\n", table_name);
+		kfree(ls);
+		return NULL;
+	}
+	*p = '\0';
+	p++;
+
+	strncpy(ls->clustername, buf, 128);
+	strncpy(ls->fsname, p, 128);
+
+	return ls;
+}
+
+static int gdlm_mount(char *table_name, char *host_data,
+			lm_callback_t cb, lm_fsdata_t *fsdata,
+			unsigned int min_lvb_size, int flags,
+			struct lm_lockstruct *lockstruct)
+{
+	struct gdlm_ls *ls;
+	int error = -ENOMEM;
+
+	if (min_lvb_size > GDLM_LVB_SIZE)
+		goto out;
+
+	ls = init_gdlm(cb, fsdata, flags, table_name);
+	if (!ls)
+		goto out;
+
+	error = gdlm_init_threads(ls);
+	if (error)
+		goto out_free;
+
+	error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
+				  &ls->dlm_lockspace, 0, GDLM_LVB_SIZE);
+	if (error) {
+		printk("lock_dlm: dlm_new_lockspace error %d\n", error);
+		goto out_thread;
+	}
+
+	error = gdlm_kobject_setup(ls);
+	if (error)
+		goto out_dlm;
+	kobject_uevent(&ls->kobj, KOBJ_MOUNT, NULL);
+
+	/* Now we depend on userspace to notice the new mount,
+	   join the appropriate group, and do a write to our sysfs
+	   "mounted" or "terminate" file.  Before the start, userspace
+	   must set "jid" and "first". */
+
+	error = wait_event_interruptible(ls->wait_control,
+			test_bit(DFL_JOIN_DONE, &ls->flags));
+	if (error)
+		goto out_sysfs;
+
+	if (test_bit(DFL_TERMINATE, &ls->flags)) {
+		error = -ERESTARTSYS;
+		goto out_sysfs;
+	}
+
+	lockstruct->ls_jid = ls->jid;
+	lockstruct->ls_first = ls->first;
+	lockstruct->ls_lockspace = ls;
+	lockstruct->ls_ops = &gdlm_ops;
+	lockstruct->ls_flags = 0;
+	lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
+	return 0;
+
+ out_sysfs:
+	gdlm_kobject_release(ls);
+ out_dlm:
+	dlm_release_lockspace(ls->dlm_lockspace, 2);
+ out_thread:
+	gdlm_release_threads(ls);
+ out_free:
+	kfree(ls);
+ out:
+	return error;
+}
+
+static void gdlm_unmount(lm_lockspace_t *lockspace)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	int rv;
+
+	log_debug("unmount flags %lx", ls->flags);
+
+	if (test_bit(DFL_WITHDRAW, &ls->flags)) {
+		gdlm_kobject_release(ls);
+		goto out;
+	}
+
+	kobject_uevent(&ls->kobj, KOBJ_UMOUNT, NULL);
+
+	wait_event_interruptible(ls->wait_control,
+				 test_bit(DFL_LEAVE_DONE, &ls->flags));
+
+	gdlm_kobject_release(ls);
+	dlm_release_lockspace(ls->dlm_lockspace, 2);
+	gdlm_release_threads(ls);
+	rv = gdlm_release_all_locks(ls);
+	if (rv)
+		log_info("lm_dlm_unmount: %d stray locks freed", rv);
+ out:
+	kfree(ls);
+}
+
+static void gdlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
+                               unsigned int message)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	ls->recover_done = jid;
+	kobject_uevent(&ls->kobj, KOBJ_CHANGE, NULL);
+}
+
+static void gdlm_others_may_mount(lm_lockspace_t *lockspace)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	ls->first_done = 1;
+	kobject_uevent(&ls->kobj, KOBJ_CHANGE, NULL);
+}
+
+static void gdlm_withdraw(lm_lockspace_t *lockspace)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+
+	/* userspace suspends locking on all other members */
+
+	kobject_uevent(&ls->kobj, KOBJ_OFFLINE, NULL);
+
+	wait_event_interruptible(ls->wait_control,
+				 test_bit(DFL_WITHDRAW, &ls->flags));
+
+	dlm_release_lockspace(ls->dlm_lockspace, 2);
+	gdlm_release_threads(ls);
+	gdlm_release_all_locks(ls);
+
+	kobject_uevent(&ls->kobj, KOBJ_UMOUNT, NULL);
+
+	/* userspace leaves the mount group, we don't need to wait for
+	   that to complete */
+}
+
+struct lm_lockops gdlm_ops = {
+	.lm_proto_name = "lock_dlm",
+	.lm_mount = gdlm_mount,
+	.lm_others_may_mount = gdlm_others_may_mount,
+	.lm_unmount = gdlm_unmount,
+	.lm_withdraw = gdlm_withdraw,
+	.lm_get_lock = gdlm_get_lock,
+	.lm_put_lock = gdlm_put_lock,
+	.lm_lock = gdlm_lock,
+	.lm_unlock = gdlm_unlock,
+	.lm_plock = gdlm_plock,
+	.lm_punlock = gdlm_punlock,
+	.lm_plock_get = gdlm_plock_get,
+	.lm_cancel = gdlm_cancel,
+	.lm_hold_lvb = gdlm_hold_lvb,
+	.lm_unhold_lvb = gdlm_unhold_lvb,
+	.lm_sync_lvb = gdlm_sync_lvb,
+	.lm_recovery_done = gdlm_recovery_done,
+	.lm_owner = THIS_MODULE,
+};
+
diff -urpN a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
--- a/fs/gfs2/locking/dlm/plock.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/plock.c	2005-09-01 17:48:48.148748288 +0800
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+#include <linux/lock_dlm_plock.h>
+
+#include <linux/miscdevice.h>
+
+static spinlock_t ops_lock;
+static struct list_head send_list;
+static struct list_head recv_list;
+static wait_queue_head_t send_wq;
+static wait_queue_head_t recv_wq;
+
+struct plock_op {
+	struct list_head list;
+	int done;
+	struct gdlm_plock_info info;
+};
+
+static inline void set_version(struct gdlm_plock_info *info)
+{
+	info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
+	info->version[1] = GDLM_PLOCK_VERSION_MINOR;
+	info->version[2] = GDLM_PLOCK_VERSION_PATCH;
+}
+
+static int check_version(struct gdlm_plock_info *info)
+{
+	if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
+	    (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
+		log_error("plock device version mismatch: "
+			  "kernel (%u.%u.%u), user (%u.%u.%u)",
+			  GDLM_PLOCK_VERSION_MAJOR,
+			  GDLM_PLOCK_VERSION_MINOR,
+			  GDLM_PLOCK_VERSION_PATCH,
+			  info->version[0],
+			  info->version[1],
+			  info->version[2]);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int gdlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+	       struct file *file, int cmd, struct file_lock *fl)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	struct plock_op *op;
+	int rv;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	log_debug("en plock %x,%"PRIx64"", name->ln_type, name->ln_number);
+
+	set_version(&op->info);
+	op->info.optype		= GDLM_PLOCK_OP_LOCK;
+	op->info.pid		= (uint32_t) fl->fl_owner;
+	op->info.ex		= (fl->fl_type == F_WRLCK);
+	op->info.wait		= IS_SETLKW(cmd);
+	op->info.fsid		= ls->id;
+	op->info.number		= name->ln_number;
+	op->info.start		= fl->fl_start;
+	op->info.end		= fl->fl_end;
+
+	INIT_LIST_HEAD(&op->list);
+	spin_lock(&ops_lock);
+	list_add_tail(&op->list, &send_list);
+	spin_unlock(&ops_lock);
+	wake_up(&send_wq);
+
+	wait_event(recv_wq, (op->done != 0));
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		printk("plock op on list\n");
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	log_debug("ex plock done %d rv %d", op->done, op->info.rv);
+
+	rv = op->info.rv;
+
+	if (!rv) {
+		if (posix_lock_file_wait(file, fl) < 0)
+			log_error("gdlm_plock: vfs lock error %x,%"PRIx64"",
+				  name->ln_type, name->ln_number);
+	}
+
+	kfree(op);
+	return rv;
+}
+
+int gdlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+		 struct file *file, struct file_lock *fl)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+	struct plock_op *op;
+	int rv;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	log_debug("en punlock %x,%"PRIx64"", name->ln_type, name->ln_number);
+
+	if (posix_lock_file_wait(file, fl) < 0)
+		log_error("gdlm_punlock: vfs unlock error %x,%"PRIx64"",
+			  name->ln_type, name->ln_number);
+
+	set_version(&op->info);
+	op->info.optype		= GDLM_PLOCK_OP_UNLOCK;
+	op->info.pid		= (uint32_t) fl->fl_owner;
+	op->info.fsid		= ls->id;
+	op->info.number		= name->ln_number;
+	op->info.start		= fl->fl_start;
+	op->info.end		= fl->fl_end;
+
+	INIT_LIST_HEAD(&op->list);
+	spin_lock(&ops_lock);
+	list_add_tail(&op->list, &send_list);
+	spin_unlock(&ops_lock);
+	wake_up(&send_wq);
+
+	wait_event(recv_wq, (op->done != 0));
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		printk("plock op on list\n");
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	log_debug("ex punlock done %d rv %d", op->done, op->info.rv);
+
+	rv = op->info.rv;
+
+	kfree(op);
+	return rv;
+}
+
+int gdlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
+		   struct file *file, struct file_lock *fl)
+{
+	return -ENOSYS;
+}
+
+/* a read copies out one plock request from the send list */
+static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+			loff_t *ppos)
+{
+	struct gdlm_plock_info info;
+	struct plock_op *op = NULL;
+
+	if (count < sizeof(info))
+		return -EINVAL;
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&send_list)) {
+		op = list_entry(send_list.next, struct plock_op, list);
+		list_move(&op->list, &recv_list);
+		memcpy(&info, &op->info, sizeof(info));
+	}
+	spin_unlock(&ops_lock);
+
+	if (!op)
+		return -EAGAIN;
+
+	log_debug("send %"PRIx64" op %d ex %d wait %d", info.number,
+		  info.optype, info.ex, info.wait);
+
+	if (copy_to_user(u, &info, sizeof(info)))
+		return -EFAULT;
+	return sizeof(info);
+}
+
+/* a write copies in one plock result that should match a plock_op
+   on the recv list */
+static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
+			 loff_t *ppos)
+{
+	struct gdlm_plock_info info;
+	struct plock_op *op;
+	int found = 0;
+
+	if (count != sizeof(info))
+		return -EINVAL;
+
+	if (copy_from_user(&info, u, sizeof(info)))
+		return -EFAULT;
+
+	if (check_version(&info))
+		return -EINVAL;
+
+	log_debug("recv %"PRIx64" op %d ex %d wait %d", info.number,
+		  info.optype, info.ex, info.wait);
+
+	spin_lock(&ops_lock);
+	list_for_each_entry(op, &recv_list, list) {
+		if (op->info.fsid == info.fsid &&
+		    op->info.number == info.number) {
+			list_del_init(&op->list);
+			found = 1;
+			op->done = 1;
+			memcpy(&op->info, &info, sizeof(info));
+			break;
+		}
+	}
+	spin_unlock(&ops_lock);
+
+	if (found)
+		wake_up(&recv_wq);
+	else
+		printk("gdlm dev_write no op %x %"PRIx64"\n", info.fsid,
+			info.number);
+	return count;
+}
+
+static unsigned int dev_poll(struct file *file, poll_table *wait)
+{
+	poll_wait(file, &send_wq, wait);
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&send_list)) {
+		spin_unlock(&ops_lock);
+		return POLLIN | POLLRDNORM;
+	}
+	spin_unlock(&ops_lock);
+	return 0;
+}
+
+static struct file_operations dev_fops = {
+	.read    = dev_read,
+	.write   = dev_write,
+	.poll    = dev_poll,
+	.owner   = THIS_MODULE
+};
+
+static struct miscdevice plock_dev_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = GDLM_PLOCK_MISC_NAME,
+	.fops = &dev_fops
+};
+
+int gdlm_plock_init(void)
+{
+	int rv;
+
+	spin_lock_init(&ops_lock);
+	INIT_LIST_HEAD(&send_list);
+	INIT_LIST_HEAD(&recv_list);
+	init_waitqueue_head(&send_wq);
+	init_waitqueue_head(&recv_wq);
+
+	rv = misc_register(&plock_dev_misc);
+	if (rv)
+		printk("gdlm_plock_init: misc_register failed %d", rv);
+	return rv;
+}
+
+void gdlm_plock_exit(void)
+{
+	if (misc_deregister(&plock_dev_misc) < 0)
+		printk("gdlm_plock_exit: misc_deregister failed");
+}
+
diff -urpN a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
--- a/fs/gfs2/locking/dlm/sysfs.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/sysfs.c	2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/ctype.h>
+#include <linux/stat.h>
+
+#include "lock_dlm.h"
+
+static ssize_t gdlm_block_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	int val = 0;
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
+		val = 1;
+	ret = sprintf(buf, "%d\n", val);
+	return ret;
+}
+
+static ssize_t gdlm_block_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ssize_t ret = len;
+	int val;
+
+	val = simple_strtol(buf, NULL, 0);
+
+	if (val == 1)
+		set_bit(DFL_BLOCK_LOCKS, &ls->flags);
+	else if (val == 0) {
+		clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
+		gdlm_submit_delayed(ls);
+	} else
+		ret = -EINVAL;
+	return ret;
+}
+
+static ssize_t gdlm_mounted_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	int val = -2;
+
+	if (test_bit(DFL_TERMINATE, &ls->flags))
+		val = -1;
+	else if (test_bit(DFL_LEAVE_DONE, &ls->flags))
+		val = 0;
+	else if (test_bit(DFL_JOIN_DONE, &ls->flags))
+		val = 1;
+	ret = sprintf(buf, "%d\n", val);
+	return ret;
+}
+
+static ssize_t gdlm_mounted_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ssize_t ret = len;
+	int val;
+
+	val = simple_strtol(buf, NULL, 0);
+
+	if (val == 1)
+		set_bit(DFL_JOIN_DONE, &ls->flags);
+	else if (val == 0)
+		set_bit(DFL_LEAVE_DONE, &ls->flags);
+	else if (val == -1) {
+		set_bit(DFL_TERMINATE, &ls->flags);
+		set_bit(DFL_JOIN_DONE, &ls->flags);
+		set_bit(DFL_LEAVE_DONE, &ls->flags);
+	} else
+		ret = -EINVAL;
+	wake_up(&ls->wait_control);
+	return ret;
+}
+
+static ssize_t gdlm_withdraw_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	int val = 0;
+
+	if (test_bit(DFL_WITHDRAW, &ls->flags))
+		val = 1;
+	ret = sprintf(buf, "%d\n", val);
+	return ret;
+}
+
+static ssize_t gdlm_withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ssize_t ret = len;
+	int val;
+
+	val = simple_strtol(buf, NULL, 0);
+
+	if (val == 1)
+		set_bit(DFL_WITHDRAW, &ls->flags);
+	else
+		ret = -EINVAL;
+	wake_up(&ls->wait_control);
+	return ret;
+}
+
+static ssize_t gdlm_id_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%u\n", ls->id);
+}
+
+static ssize_t gdlm_id_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ls->id = simple_strtoul(buf, NULL, 0);
+	return len;
+}
+
+static ssize_t gdlm_jid_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%d\n", ls->jid);
+}
+
+static ssize_t gdlm_jid_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ls->jid = simple_strtol(buf, NULL, 0);
+	return len;
+}
+
+static ssize_t gdlm_first_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%d\n", ls->first);
+}
+
+static ssize_t gdlm_first_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ls->first = simple_strtol(buf, NULL, 0);
+	return len;
+}
+
+static ssize_t gdlm_first_done_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%d\n", ls->first_done);
+}
+
+static ssize_t gdlm_recover_show(struct gdlm_ls *ls, char *buf)
+{
+	return sprintf(buf, "%d\n", ls->recover_jid);
+}
+
+static ssize_t gdlm_recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+	ls->recover_jid = simple_strtol(buf, NULL, 0);
+	ls->fscb(ls->fsdata, LM_CB_NEED_RECOVERY, &ls->recover_jid);
+	return len;
+}
+
+static ssize_t gdlm_recover_done_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	ret = sprintf(buf, "%d\n", ls->recover_done);
+	return ret;
+}
+
+static ssize_t gdlm_cluster_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret;
+	ret = sprintf(buf, "%s\n", ls->clustername);
+	return ret;
+}
+
+static ssize_t gdlm_options_show(struct gdlm_ls *ls, char *buf)
+{
+	ssize_t ret = 0;
+
+	if (ls->fsflags & LM_MFLAG_SPECTATOR)
+		ret += sprintf(buf, "spectator ");
+
+	return ret;
+}
+
+struct gdlm_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct gdlm_ls *, char *);
+	ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
+};
+
+#define GDLM_ATTR(_name,_mode,_show,_store) \
+static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
+
+GDLM_ATTR(block, S_IRUGO | S_IWUSR, gdlm_block_show, gdlm_block_store);
+GDLM_ATTR(mounted, S_IRUGO | S_IWUSR, gdlm_mounted_show, gdlm_mounted_store);
+GDLM_ATTR(withdraw, S_IRUGO | S_IWUSR, gdlm_withdraw_show, gdlm_withdraw_store);
+GDLM_ATTR(id, S_IRUGO | S_IWUSR, gdlm_id_show, gdlm_id_store);
+GDLM_ATTR(jid, S_IRUGO | S_IWUSR, gdlm_jid_show, gdlm_jid_store);
+GDLM_ATTR(first, S_IRUGO | S_IWUSR, gdlm_first_show, gdlm_first_store);
+GDLM_ATTR(first_done, S_IRUGO, gdlm_first_done_show, NULL);
+GDLM_ATTR(recover, S_IRUGO | S_IWUSR, gdlm_recover_show, gdlm_recover_store);
+GDLM_ATTR(recover_done, S_IRUGO, gdlm_recover_done_show, NULL);
+GDLM_ATTR(cluster, S_IRUGO, gdlm_cluster_show, NULL);
+GDLM_ATTR(options, S_IRUGO, gdlm_options_show, NULL);
+
+static struct attribute *gdlm_attrs[] = {
+	&gdlm_attr_block.attr,
+	&gdlm_attr_mounted.attr,
+	&gdlm_attr_withdraw.attr,
+	&gdlm_attr_id.attr,
+	&gdlm_attr_jid.attr,
+	&gdlm_attr_first.attr,
+	&gdlm_attr_first_done.attr,
+	&gdlm_attr_recover.attr,
+	&gdlm_attr_recover_done.attr,
+	&gdlm_attr_cluster.attr,
+	&gdlm_attr_options.attr,
+	NULL,
+};
+
+static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
+			      char *buf)
+{
+	struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
+	struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
+	return a->show ? a->show(ls, buf) : 0;
+}
+
+static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
+	struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
+	return a->store ? a->store(ls, buf, len) : len;
+}
+
+static struct sysfs_ops gdlm_attr_ops = {
+	.show  = gdlm_attr_show,
+	.store = gdlm_attr_store,
+};
+
+static struct kobj_type gdlm_ktype = {
+	.default_attrs = gdlm_attrs,
+	.sysfs_ops     = &gdlm_attr_ops,
+};
+
+static struct kset gdlm_kset = {
+	.subsys = &kernel_subsys,
+	.kobj   = {.name = "lock_dlm",},
+	.ktype  = &gdlm_ktype,
+};
+
+int gdlm_kobject_setup(struct gdlm_ls *ls)
+{
+	int error;
+
+	error = kobject_set_name(&ls->kobj, "%s", ls->fsname);
+	if (error)
+		return error;
+
+	ls->kobj.kset = &gdlm_kset;
+	ls->kobj.ktype = &gdlm_ktype;
+
+	error = kobject_register(&ls->kobj);
+
+	return 0;
+}
+
+void gdlm_kobject_release(struct gdlm_ls *ls)
+{
+	kobject_unregister(&ls->kobj);
+}
+
+int gdlm_sysfs_init(void)
+{
+	int error;
+
+	error = kset_register(&gdlm_kset);
+	if (error)
+		printk("lock_dlm: cannot register kset %d\n", error);
+
+	return error;
+}
+
+void gdlm_sysfs_exit(void)
+{
+	kset_unregister(&gdlm_kset);
+}
+
diff -urpN a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
--- a/fs/gfs2/locking/dlm/thread.c	1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/thread.c	2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
+   thread gets to it. */
+
+static void queue_submit(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+
+	spin_lock(&ls->async_lock);
+	list_add_tail(&lp->delay_list, &ls->submit);
+	spin_unlock(&ls->async_lock);
+	wake_up(&ls->thread_wait);
+}
+
+static void process_submit(struct gdlm_lock *lp)
+{
+	gdlm_do_lock(lp, NULL);
+}
+
+static void process_blocking(struct gdlm_lock *lp, int bast_mode)
+{
+	struct gdlm_ls *ls = lp->ls;
+	unsigned int cb;
+
+	switch (gdlm_make_lmstate(bast_mode)) {
+	case LM_ST_EXCLUSIVE:
+		cb = LM_CB_NEED_E;
+		break;
+	case LM_ST_DEFERRED:
+		cb = LM_CB_NEED_D;
+		break;
+	case LM_ST_SHARED:
+		cb = LM_CB_NEED_S;
+		break;
+	default:
+		GDLM_ASSERT(0, printk("unknown bast mode %u\n",lp->bast_mode););
+	}
+
+	ls->fscb(ls->fsdata, cb, &lp->lockname);
+}
+
+static void process_complete(struct gdlm_lock *lp)
+{
+	struct gdlm_ls *ls = lp->ls;
+	struct lm_async_cb acb;
+	int16_t prev_mode = lp->cur;
+
+	memset(&acb, 0, sizeof(acb));
+
+	if (lp->lksb.sb_status == -DLM_ECANCEL) {
+		log_info("complete dlm cancel %x,%"PRIx64" flags %lx",
+		 	 lp->lockname.ln_type, lp->lockname.ln_number,
+			 lp->flags);
+
+		lp->req = lp->cur;
+		acb.lc_ret |= LM_OUT_CANCELED;
+		if (lp->cur == DLM_LOCK_IV)
+			lp->lksb.sb_lkid = 0;
+		goto out;
+	}
+
+	if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
+		if (lp->lksb.sb_status != -DLM_EUNLOCK) {
+			log_info("unlock sb_status %d %x,%"PRIx64" flags %lx",
+				 lp->lksb.sb_status, lp->lockname.ln_type,
+				 lp->lockname.ln_number, lp->flags);
+			return;
+		}
+
+		lp->cur = DLM_LOCK_IV;
+		lp->req = DLM_LOCK_IV;
+		lp->lksb.sb_lkid = 0;
+
+		if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
+			gdlm_delete_lp(lp);
+			return;
+		}
+		goto out;
+	}
+
+	if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
+		memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
+
+	if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
+		if (lp->req == DLM_LOCK_PR)
+			lp->req = DLM_LOCK_CW;
+		else if (lp->req == DLM_LOCK_CW)
+			lp->req = DLM_LOCK_PR;
+	}
+
+	/*
+	 * A canceled lock request.  The lock was just taken off the delayed
+	 * list and was never even submitted to dlm.
+	 */
+
+	if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
+		log_info("complete internal cancel %x,%"PRIx64"",
+		 	 lp->lockname.ln_type, lp->lockname.ln_number);
+		lp->req = lp->cur;
+		acb.lc_ret |= LM_OUT_CANCELED;
+		goto out;
+	}
+
+	/*
+	 * An error occured.
+	 */
+
+	if (lp->lksb.sb_status) {
+		/* a "normal" error */
+		if ((lp->lksb.sb_status == -EAGAIN) &&
+		    (lp->lkf & DLM_LKF_NOQUEUE)) {
+			lp->req = lp->cur;
+			if (lp->cur == DLM_LOCK_IV)
+				lp->lksb.sb_lkid = 0;
+			goto out;
+		}
+
+		/* this could only happen with cancels I think */
+		log_info("ast sb_status %d %x,%"PRIx64" flags %lx",
+			 lp->lksb.sb_status, lp->lockname.ln_type,
+			 lp->lockname.ln_number, lp->flags);
+		return;
+	}
+
+	/*
+	 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
+	 */
+
+	if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
+		complete(&lp->ast_wait);
+		return;
+	}
+
+	/*
+	 * A lock has been demoted to NL because it initially completed during
+	 * BLOCK_LOCKS.  Now it must be requested in the originally requested
+	 * mode.
+	 */
+
+	if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
+		GDLM_ASSERT(lp->req == DLM_LOCK_NL,);
+		GDLM_ASSERT(lp->prev_req > DLM_LOCK_NL,);
+
+		lp->cur = DLM_LOCK_NL;
+		lp->req = lp->prev_req;
+		lp->prev_req = DLM_LOCK_IV;
+		lp->lkf &= ~DLM_LKF_CONVDEADLK;
+
+		set_bit(LFL_NOCACHE, &lp->flags);
+
+		if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+		    !test_bit(LFL_NOBLOCK, &lp->flags))
+			gdlm_queue_delayed(lp);
+		else
+			queue_submit(lp);
+		return;
+	}
+
+	/*
+	 * A request is granted during dlm recovery.  It may be granted
+	 * because the locks of a failed node were cleared.  In that case,
+	 * there may be inconsistent data beneath this lock and we must wait
+	 * for recovery to complete to use it.  When gfs recovery is done this
+	 * granted lock will be converted to NL and then reacquired in this
+	 * granted state.
+	 */
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+	    !test_bit(LFL_NOBLOCK, &lp->flags) &&
+	    lp->req != DLM_LOCK_NL) {
+
+		lp->cur = lp->req;
+		lp->prev_req = lp->req;
+		lp->req = DLM_LOCK_NL;
+		lp->lkf |= DLM_LKF_CONVERT;
+		lp->lkf &= ~DLM_LKF_CONVDEADLK;
+
+		log_debug("rereq %x,%"PRIx64" id %x %d,%d",
+			  lp->lockname.ln_type, lp->lockname.ln_number,
+			  lp->lksb.sb_lkid, lp->cur, lp->req);
+
+		set_bit(LFL_REREQUEST, &lp->flags);
+		queue_submit(lp);
+		return;
+	}
+
+	/*
+	 * DLM demoted the lock to NL before it was granted so GFS must be
+	 * told it cannot cache data for this lock.
+	 */
+
+	if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
+		set_bit(LFL_NOCACHE, &lp->flags);
+
+ out:
+	/*
+	 * This is an internal lock_dlm lock
+	 */
+
+	if (test_bit(LFL_INLOCK, &lp->flags)) {
+		clear_bit(LFL_NOBLOCK, &lp->flags);
+		lp->cur = lp->req;
+		complete(&lp->ast_wait);
+		return;
+	}
+
+	/*
+	 * Normal completion of a lock request.  Tell GFS it now has the lock.
+	 */
+
+	clear_bit(LFL_NOBLOCK, &lp->flags);
+	lp->cur = lp->req;
+
+	acb.lc_name = lp->lockname;
+	acb.lc_ret |= gdlm_make_lmstate(lp->cur);
+
+	if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
+	    (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
+		acb.lc_ret |= LM_OUT_CACHEABLE;
+
+	ls->fscb(ls->fsdata, LM_CB_ASYNC, &acb);
+}
+
+static inline int no_work(struct gdlm_ls *ls, int blocking)
+{
+	int ret;
+
+	spin_lock(&ls->async_lock);
+	ret = list_empty(&ls->complete) && list_empty(&ls->submit);
+	if (ret && blocking)
+		ret = list_empty(&ls->blocking);
+	spin_unlock(&ls->async_lock);
+
+	return ret;
+}
+
+static inline int check_drop(struct gdlm_ls *ls)
+{
+	if (!ls->drop_locks_count)
+		return 0;
+
+	if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
+		ls->drop_time = jiffies;
+		if (ls->all_locks_count >= ls->drop_locks_count)
+			return 1;
+	}
+	return 0;
+}
+
+static int gdlm_thread(void *data)
+{
+	struct gdlm_ls *ls = (struct gdlm_ls *) data;
+	struct gdlm_lock *lp = NULL;
+	int blist = 0;
+	uint8_t complete, blocking, submit, drop;
+	DECLARE_WAITQUEUE(wait, current);
+
+	/* Only thread1 is allowed to do blocking callbacks since gfs
+	   may wait for a completion callback within a blocking cb. */
+
+	if (current == ls->thread1)
+		blist = 1;
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&ls->thread_wait, &wait);
+		if (no_work(ls, blist))
+			schedule();
+		remove_wait_queue(&ls->thread_wait, &wait);
+		set_current_state(TASK_RUNNING);
+
+		complete = blocking = submit = drop = 0;
+
+		spin_lock(&ls->async_lock);
+
+		if (blist && !list_empty(&ls->blocking)) {
+			lp = list_entry(ls->blocking.next, struct gdlm_lock,
+					blist);
+			list_del_init(&lp->blist);
+			blocking = lp->bast_mode;
+			lp->bast_mode = 0;
+		} else if (!list_empty(&ls->complete)) {
+			lp = list_entry(ls->complete.next, struct gdlm_lock,
+					clist);
+			list_del_init(&lp->clist);
+			complete = 1;
+		} else if (!list_empty(&ls->submit)) {
+			lp = list_entry(ls->submit.next, struct gdlm_lock,
+					delay_list);
+			list_del_init(&lp->delay_list);
+			submit = 1;
+		}
+
+		drop = check_drop(ls);
+		spin_unlock(&ls->async_lock);
+
+		if (complete)
+			process_complete(lp);
+
+		else if (blocking)
+			process_blocking(lp, blocking);
+
+		else if (submit)
+			process_submit(lp);
+
+		if (drop)
+			ls->fscb(ls->fsdata, LM_CB_DROPLOCKS, NULL);
+
+		schedule();
+	}
+
+	return 0;
+}
+
+int gdlm_init_threads(struct gdlm_ls *ls)
+{
+	struct task_struct *p;
+	int error;
+
+	p = kthread_run(gdlm_thread, ls, "lock_dlm1");
+	error = IS_ERR(p);
+	if (error) {
+		log_error("can't start lock_dlm1 thread %d", error);
+		return error;
+	}
+	ls->thread1 = p;
+
+	p = kthread_run(gdlm_thread, ls, "lock_dlm2");
+	error = IS_ERR(p);
+	if (error) {
+		log_error("can't start lock_dlm2 thread %d", error);
+		kthread_stop(ls->thread1);
+		return error;
+	}
+	ls->thread2 = p;
+
+	return 0;
+}
+
+void gdlm_release_threads(struct gdlm_ls *ls)
+{
+	kthread_stop(ls->thread1);
+	kthread_stop(ls->thread2);
+}
+
diff -urpN a/include/linux/lock_dlm_plock.h b/include/linux/lock_dlm_plock.h
--- a/include/linux/lock_dlm_plock.h	1970-01-01 07:30:00.000000000 +0730
+++ b/include/linux/lock_dlm_plock.h	2005-09-01 17:48:48.142749200 +0800
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __LOCK_DLM_PLOCK_DOT_H__
+#define __LOCK_DLM_PLOCK_DOT_H__
+
+#define GDLM_PLOCK_MISC_NAME		"lock_dlm_plock"
+
+#define GDLM_PLOCK_VERSION_MAJOR	1
+#define GDLM_PLOCK_VERSION_MINOR	0
+#define GDLM_PLOCK_VERSION_PATCH	0
+
+enum {
+	GDLM_PLOCK_OP_LOCK = 1,
+	GDLM_PLOCK_OP_UNLOCK,
+	GDLM_PLOCK_OP_GET,
+};
+
+struct gdlm_plock_info {
+	__u32 version[3];
+	__u8 optype;
+	__u8 ex;
+	__u8 wait;
+	__u8 pad;
+	__u32 pid;
+	__s32 nodeid;
+	__s32 rv;
+	__u32 fsid;
+	__u64 number;
+	__u64 start;
+	__u64 end;
+};
+
+#endif
+