--- diff/drivers/md/dm-log.c	2004-01-05 15:58:39.000000000 +0000
+++ source/drivers/md/dm-log.c	2004-01-07 10:26:43.000000000 +0000
@@ -553,7 +553,6 @@
 	.get_region_size = core_get_region_size,
 	.is_clean = core_is_clean,
 	.in_sync = core_in_sync,
-	.flush = core_flush,
 	.mark_region = core_mark_region,
 	.clear_region = core_clear_region,
 	.get_resync_work = core_get_resync_work,
@@ -568,7 +567,6 @@
 	.get_region_size = core_get_region_size,
 	.is_clean = core_is_clean,
 	.in_sync = core_in_sync,
-	.flush = disk_flush,
 	.mark_region = core_mark_region,
 	.clear_region = core_clear_region,
 	.get_resync_work = core_get_resync_work,
--- diff/drivers/md/dm-log.h	2003-12-29 10:17:03.000000000 +0000
+++ source/drivers/md/dm-log.h	2004-01-07 10:41:11.000000000 +0000
@@ -18,6 +18,11 @@
 	void *context;
 };
 
+struct region_list {
+	region_t key;
+	struct list_head list;
+};
+
 struct dirty_log_type {
 	struct list_head list;
 	const char *name;
@@ -29,8 +34,17 @@
 	void (*dtr)(struct dirty_log *log);
 
 	/*
+	 * To avoid proliferation of kernel threads, we provide
+	 * this function which should be called periodically by
+	 * the client.
+	 */
+	void (*do_work)(struct dirty_log *log);
+
+	/*
 	 * There are times when we don't want the log to touch
-	 * the disk.
+	 * the disk.  Someone else may touch the log while it is
+	 * suspended, so the resume method should reread the log
+	 * from disk/network etc.
 	 */
 	int (*suspend)(struct dirty_log *log);
 	int (*resume)(struct dirty_log *log);
@@ -41,12 +55,6 @@
 	 */
 	sector_t (*get_region_size)(struct dirty_log *log);
 
-        /*
-	 * A predicate to say whether a region is clean or not.
-	 * May block.
-	 */
-	int (*is_clean)(struct dirty_log *log, region_t region);
-
 	/*
 	 *  Returns: 0, 1, -EWOULDBLOCK, < 0
 	 *
@@ -61,18 +69,21 @@
 	int (*in_sync)(struct dirty_log *log, region_t region, int can_block);
 
 	/*
-	 * Flush the current log state (eg, to disk).  This
-	 * function may block.
-	 */
-	int (*flush)(struct dirty_log *log);
+	 * Mark an area as dirty.  The log will use the callback
+	 * when the operation is complete.  By using a callback
+	 * we are also allowing the log a lot more freedom, for
+	 * example 'working set' or delayed commit' algorithms
+	 * become trivial to implement.  All the log promises is
+	 * that _eventually_ the callback will be made.
+	 */
+	void (*mark_region)(struct dirty_log *log, struct region_list *rl,
+			    void (*callback)(int, struct region_list *));
 
 	/*
-	 * Mark an area as clean or dirty.  These functions may
-	 * block, though for performance reasons blocking should
-	 * be extremely rare (eg, allocating another chunk of
-	 * memory for some reason).
+	 * Mark an area as clean, no callback is needed since we
+	 * really don't care if a clean region is accidentally
+	 * considered dirty afer a crash.
 	 */
-	void (*mark_region)(struct dirty_log *log, region_t region);
 	void (*clear_region)(struct dirty_log *log, region_t region);
 
 	/*
--- diff/drivers/md/dm-raid1.c	2004-01-05 14:16:49.000000000 +0000
+++ source/drivers/md/dm-raid1.c	2004-01-07 13:07:40.000000000 +0000
@@ -132,16 +132,25 @@
 
 struct region {
 	struct region_hash *rh;	/* FIXME: can we get rid of this ? */
-	region_t key;
 	int state;
 
+	struct region_list rl;
 	struct list_head hash_list;
-	struct list_head list;
 
 	atomic_t pending;
 	struct bio *delayed_bios;
 };
 
+static inline struct region *rl_to_region(struct region_list *rl)
+{
+	return container_of(rl, struct region, rl);
+}
+
+static inline struct region *l_to_region(struct list_head *l)
+{
+	return rl_to_region(container_of(l, struct region_list, list));
+}
+
 /*
  * Conversion fns
  */
@@ -254,7 +263,7 @@
 	struct region *reg;
 
 	list_for_each_entry (reg, rh->buckets + rh_hash(rh, region), hash_list)
-		if (reg->key == region)
+		if (reg->rl.key == region)
 			return reg;
 
 	return NULL;
@@ -262,7 +271,7 @@
 
 static void __rh_insert(struct region_hash *rh, struct region *reg)
 {
-	unsigned int h = rh_hash(rh, reg->key);
+	unsigned int h = rh_hash(rh, reg->rl.key);
 	list_add(&reg->hash_list, rh->buckets + h);
 }
 
@@ -275,9 +284,8 @@
 	nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
 		RH_CLEAN : RH_NOSYNC;
 	nreg->rh = rh;
-	nreg->key = region;
-
-	INIT_LIST_HEAD(&nreg->list);
+	nreg->rl.key = region;
+	INIT_LIST_HEAD(&nreg->rl.list);
 
 	atomic_set(&nreg->pending, 0);
 	nreg->delayed_bios = NULL;
@@ -292,7 +300,7 @@
 		__rh_insert(rh, nreg);
 		if (nreg->state == RH_CLEAN) {
 			spin_lock_irq(&rh->region_lock);
-			list_add(&nreg->list, &rh->clean_regions);
+			list_add(&nreg->rl.list, &rh->clean_regions);
 			spin_unlock_irq(&rh->region_lock);
 		}
 		reg = nreg;
@@ -374,8 +382,9 @@
 		list_splice(&rh->clean_regions, &clean);
 		INIT_LIST_HEAD(&rh->clean_regions);
 
-		list_for_each_entry (reg, &clean, list) {
-			rh->log->type->clear_region(rh->log, reg->key);
+		list_for_each (tmp, &clean) {
+			reg = l_to_region(tmp);
+			rh->log->type->clear_region(rh->log, reg->rl.key);
 			list_del(&reg->hash_list);
 		}
 	}
@@ -384,8 +393,10 @@
 		list_splice(&rh->recovered_regions, &recovered);
 		INIT_LIST_HEAD(&rh->recovered_regions);
 
-		list_for_each_entry (reg, &recovered, list)
+		list_for_each (tmp, &recovered) {
+			reg = l_to_region(tmp);
 			list_del(&reg->hash_list);
+		}
 	}
 	spin_unlock(&rh->region_lock);
 	write_unlock_irq(&rh->hash_lock);
@@ -396,20 +407,28 @@
 	 * any more locking.
 	 */
 	list_for_each_safe (tmp, tmp2, &recovered) {
-		reg = list_entry(tmp, struct region, list);
+		reg = l_to_region(tmp);
 
-		rh->log->type->complete_resync_work(rh->log, reg->key, 1);
+		rh->log->type->complete_resync_work(rh->log, reg->rl.key, 1);
 		dispatch_bios(rh->ms, reg->delayed_bios);
 		up(&rh->recovery_count);
 		mempool_free(reg, rh->region_pool);
 	}
 
 	list_for_each_safe (tmp, tmp2, &clean) {
-		reg = list_entry(tmp, struct region, list);
+		reg = l_to_region(tmp);
 		mempool_free(reg, rh->region_pool);
 	}
 }
 
+static void rh_mark_callback(int error, struct region_list *rl)
+{
+	spin_lock_irq(&rh->region_lock);
+	reg->state = RH_DIRTY;
+	list_del_init(&reg->rl.list);	/* take off the clean list */
+	spin_unlock_irq(&rh->region_lock);
+}
+
 static void rh_inc(struct region_hash *rh, region_t region)
 {
 	struct region *reg;
@@ -417,11 +436,11 @@
 	read_lock(&rh->hash_lock);
 	reg = __rh_find(rh, region);
 	if (reg->state == RH_CLEAN) {
-		rh->log->type->mark_region(rh->log, reg->key);
+		rh->log->type->mark_region(rh->log, &reg->rl, rh_mark_callback);
 
 		spin_lock_irq(&rh->region_lock);
 		reg->state = RH_DIRTY;
-		list_del_init(&reg->list);	/* take off the clean list */
+		list_del_init(&reg->rl.list);	/* take off the clean list */
 		spin_unlock_irq(&rh->region_lock);
 	}
 
@@ -450,10 +469,10 @@
 	if (atomic_dec_and_test(&reg->pending)) {
 		spin_lock_irqsave(&rh->region_lock, flags);
 		if (reg->state == RH_RECOVERING) {
-			list_add_tail(&reg->list, &rh->quiesced_regions);
+			list_add_tail(&reg->rl.list, &rh->quiesced_regions);
 		} else {
 			reg->state = RH_CLEAN;
-			list_add(&reg->list, &rh->clean_regions);
+			list_add(&reg->rl.list, &rh->clean_regions);
 		}
 		spin_unlock_irqrestore(&rh->region_lock, flags);
 		wake = 1;
@@ -492,11 +511,11 @@
 
 	/* Already quiesced ? */
 	if (atomic_read(&reg->pending))
-		list_del_init(&reg->list);
+		list_del_init(&reg->rl.list);
 
 	else {
-		list_del_init(&reg->list);
-		list_add(&reg->list, &rh->quiesced_regions);
+		list_del_init(&reg->rl.list);
+		list_add(&reg->rl.list, &rh->quiesced_regions);
 	}
 	spin_unlock_irq(&rh->region_lock);
 
@@ -521,9 +540,8 @@
 
 	spin_lock_irq(&rh->region_lock);
 	if (!list_empty(&rh->quiesced_regions)) {
-		reg = list_entry(rh->quiesced_regions.next,
-				 struct region, list);
-		list_del_init(&reg->list);	/* remove from the quiesced list */
+		reg = l_to_region(rh->quiesced_regions.next);
+		list_del_init(&reg->rl.list);	/* remove from the quiesced list */
 	}
 	spin_unlock_irq(&rh->region_lock);
 
@@ -536,17 +554,12 @@
 	struct region_hash *rh = reg->rh;
 
 	spin_lock_irq(&rh->region_lock);
-	list_add(&reg->list, &reg->rh->recovered_regions);
+	list_add(&reg->rl.list, &reg->rh->recovered_regions);
 	spin_unlock_irq(&rh->region_lock);
 
 	dm_daemon_wake(&_kmirrord);
 }
 
-static void rh_flush(struct region_hash *rh)
-{
-	rh->log->type->flush(rh->log);
-}
-
 static void rh_delay(struct region_hash *rh, struct bio *bio)
 {
 	struct region *reg;
@@ -657,8 +670,8 @@
 	/* fill in the source */
 	m = ms->mirror + DEFAULT_MIRROR;
 	from.bdev = m->dev->bdev;
-	from.sector = m->offset + region_to_sector(reg->rh, reg->key);
-	if (reg->key == (ms->nr_regions - 1)) {
+	from.sector = m->offset + region_to_sector(reg->rh, reg->rl.key);
+	if (reg->rl.key == (ms->nr_regions - 1)) {
 		/*
 		 * The final region may be smaller than
 		 * region_size.
@@ -676,7 +689,7 @@
 
 		m = ms->mirror + i;
 		dest->bdev = m->dev->bdev;
-		dest->sector = m->offset + region_to_sector(reg->rh, reg->key);
+		dest->sector = m->offset + region_to_sector(reg->rh, reg->rl.key);
 		dest->count = from.count;
 		dest++;
 	}
@@ -880,6 +893,7 @@
 static void do_mirror(struct mirror_set *ms)
 {
 	struct bio_list reads, writes;
+	struct dirty_log *log = &ms->rh.log;
 
 	spin_lock(&ms->lock);
 	memcpy(&reads, &ms->reads, sizeof(reads));
@@ -892,6 +906,11 @@
 	do_recovery(ms);
 	do_reads(ms, &reads);
 	do_writes(ms, &writes);
+
+	/* let the log use our thread for a bit */
+	if (log->type->do_work)
+		log->type->do_work(log);
+
 	blk_run_queues();
 }