shithub: lwext4

Download patch

ref: 8281a97813ef19452be1ce6ec1a6c40e483f12f1
parent: 2a5abdbf8c1375f83edde31831cd8b0bb36bc6d8
author: Kaho Ng <[email protected]>
date: Wed Apr 27 15:11:31 EDT 2016

ext4_journal: numorous changes.

 - Do not utilize jbd_block_rec::buf because it can be unreliable that
   jbd_block_rec::buf may be freed.
 - Do not need to flush buffers before a new transaction is going to reference
   them.
 - add some comments to the changes.

--- a/include/ext4_bcache.h
+++ b/include/ext4_bcache.h
@@ -239,6 +239,12 @@
  * @param   buf buffer*/
 void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf);
 
+/**@brief   Invalidate a buffer.
+ * @param   bc block cache descriptor
+ * @param   buf buffer*/
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+				struct ext4_buf *buf);
+
 /**@brief   Invalidate a range of buffers.
  * @param   bc block cache descriptor
  * @param   from starting lba
--- a/include/ext4_blockdev.h
+++ b/include/ext4_blockdev.h
@@ -124,6 +124,8 @@
 
 	/**@brief   The filesystem this block device belongs to. */
 	struct ext4_fs *fs;
+
+	void *journal;
 };
 
 /**@brief   Static initialization of the block device.*/
--- a/include/ext4_journal.h
+++ b/include/ext4_journal.h
@@ -47,7 +47,6 @@
 #include "misc/tree.h"
 
 struct jbd_fs {
-	/* If journal block device is used, bdev will be non-null */
 	struct ext4_blockdev *bdev;
 	struct ext4_inode_ref inode_ref;
 	struct jbd_sb sb;
@@ -56,7 +55,7 @@
 };
 
 struct jbd_buf {
-	uint64_t jbd_lba;
+	uint32_t jbd_lba;
 	struct ext4_block block;
 	struct jbd_trans *trans;
 	struct jbd_block_rec *block_rec;
@@ -66,12 +65,11 @@
 
 struct jbd_revoke_rec {
 	ext4_fsblk_t lba;
-	LIST_ENTRY(jbd_revoke_rec) revoke_node;
+	RB_ENTRY(jbd_revoke_rec) revoke_node;
 };
 
 struct jbd_block_rec {
 	ext4_fsblk_t lba;
-	struct ext4_buf *buf;
 	struct jbd_trans *trans;
 	RB_ENTRY(jbd_block_rec) block_rec_node;
 	LIST_ENTRY(jbd_block_rec) tbrec_node;
@@ -91,7 +89,7 @@
 	struct jbd_journal *journal;
 
 	TAILQ_HEAD(jbd_trans_buf, jbd_buf) buf_queue;
-	LIST_HEAD(jbd_revoke_list, jbd_revoke_rec) revoke_list;
+	RB_HEAD(jbd_revoke_tree, jbd_revoke_rec) revoke_root;
 	LIST_HEAD(jbd_trans_block_rec, jbd_block_rec) tbrec_list;
 	TAILQ_ENTRY(jbd_trans) trans_node;
 };
@@ -105,7 +103,6 @@
 
 	uint32_t block_size;
 
-	TAILQ_HEAD(jbd_trans_queue, jbd_trans) trans_queue;
 	TAILQ_HEAD(jbd_cp_queue, jbd_trans) cp_queue;
 	RB_HEAD(jbd_block, jbd_block_rec) block_rec_root;
 
@@ -122,10 +119,8 @@
 int jbd_journal_start(struct jbd_fs *jbd_fs,
 		      struct jbd_journal *journal);
 int jbd_journal_stop(struct jbd_journal *journal);
-struct jbd_trans *jbd_journal_new_trans(struct jbd_journal *journal);
-int jbd_trans_get_access(struct jbd_journal *journal,
-			 struct jbd_trans *trans,
-			 struct ext4_block *block);
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal);
 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
 			      struct ext4_block *block);
 int jbd_trans_revoke_block(struct jbd_trans *trans,
@@ -137,6 +132,10 @@
 			    bool abort);
 int jbd_journal_commit_trans(struct jbd_journal *journal,
 			     struct jbd_trans *trans);
+void
+jbd_journal_purge_cp_trans(struct jbd_journal *journal,
+			   bool flush,
+			   bool once);
 
 #ifdef __cplusplus
 }
--- a/src/ext4_balloc.c
+++ b/src/ext4_balloc.c
@@ -225,7 +225,9 @@
 	}
 	ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1);
 	/* Release block group reference */
-	return ext4_fs_put_block_group_ref(&bg_ref);
+	rc = ext4_fs_put_block_group_ref(&bg_ref);
+
+	return rc;
 }
 
 int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
@@ -342,6 +344,7 @@
 	ext4_bcache_invalidate_lba(fs->bdev->bc, first, count);
 	/*All blocks should be released*/
 	ext4_assert(count == 0);
+
 	return rc;
 }
 
--- a/src/ext4_bcache.c
+++ b/src/ext4_bcache.c
@@ -177,6 +177,19 @@
 	bc->ref_blocks--;
 }
 
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+				struct ext4_buf *buf)
+{
+	buf->end_write = NULL;
+	buf->end_write_arg = NULL;
+
+	/* Clear both dirty and up-to-date flags. */
+	if (ext4_bcache_test_flag(buf, BC_DIRTY))
+		ext4_bcache_remove_dirty_node(bc, buf);
+
+	ext4_bcache_clear_dirty(buf);
+}
+
 void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
 				uint64_t from,
 				uint32_t cnt)
@@ -187,13 +200,7 @@
 		if (buf->lba > end)
 			break;
 
-		/* Clear both dirty and up-to-date flags. */
-		if (ext4_bcache_test_flag(buf, BC_DIRTY))
-			ext4_bcache_remove_dirty_node(bc, buf);
-
-		buf->end_write = NULL;
-		buf->end_write_arg = NULL;
-		ext4_bcache_clear_dirty(buf);
+		ext4_bcache_invalidate_buf(bc, buf);
 	}
 }
 
--- a/src/ext4_blockdev.c
+++ b/src/ext4_blockdev.c
@@ -41,6 +41,8 @@
 #include "ext4_debug.h"
 
 #include "ext4_blockdev.h"
+#include "ext4_fs.h"
+#include "ext4_journal.h"
 
 #include <string.h>
 #include <stdlib.h>
@@ -188,6 +190,8 @@
 	if (bdev->bc->dont_shake)
 		return EOK;
 
+	bdev->bc->dont_shake = true;
+
 	while (!RB_EMPTY(&bdev->bc->lru_root) &&
 		ext4_bcache_is_full(bdev->bc)) {
 
@@ -202,6 +206,7 @@
 
 		ext4_bcache_drop_buf(bdev->bc, buf);
 	}
+	bdev->bc->dont_shake = false;
 	return r;
 }
 
--- a/src/ext4_journal.c
+++ b/src/ext4_journal.c
@@ -124,10 +124,22 @@
 	return 0;
 }
 
+static int
+jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)
+{
+	if (a->lba > b->lba)
+		return 1;
+	else if (a->lba < b->lba)
+		return -1;
+	return 0;
+}
+
 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
 		     jbd_revoke_entry_cmp, static inline)
 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
 		     jbd_block_rec_cmp, static inline)
+RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
+		     jbd_revoke_rec_cmp, static inline)
 
 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
 #define jbd_free_revoke_entry(addr) free(addr)
@@ -456,6 +468,9 @@
 		rc = EIO;
 	}
 
+	if (rc == EOK)
+		jbd_fs->bdev = fs->bdev;
+
 	return rc;
 }
 
@@ -499,6 +514,7 @@
 {
 	/* TODO: journal device. */
 	int rc;
+	struct ext4_blockdev *bdev = jbd_fs->bdev;
 	ext4_lblk_t iblock = (ext4_lblk_t)fblock;
 
 	/* Lookup the logical block address of
@@ -508,7 +524,6 @@
 	if (rc != EOK)
 		return rc;
 
-	struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
 	rc = ext4_block_get(bdev, block, fblock);
 
 	/* If succeeded, mark buffer as BC_FLUSH to indicate
@@ -534,6 +549,7 @@
 {
 	/* TODO: journal device. */
 	int rc;
+	struct ext4_blockdev *bdev = jbd_fs->bdev;
 	ext4_lblk_t iblock = (ext4_lblk_t)fblock;
 	rc = jbd_inode_bmap(jbd_fs, iblock,
 			    &fblock);
@@ -540,7 +556,6 @@
 	if (rc != EOK)
 		return rc;
 
-	struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
 	rc = ext4_block_get_noread(bdev, block, fblock);
 	if (rc == EOK)
 		ext4_bcache_set_flag(block->buf, BC_FLUSH);
@@ -555,8 +570,8 @@
 static int jbd_block_set(struct jbd_fs *jbd_fs,
 		  struct ext4_block *block)
 {
-	return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
-			      block);
+	struct ext4_blockdev *bdev = jbd_fs->bdev;
+	return ext4_block_set(bdev, block);
 }
 
 /**@brief  helper functions to calculate
@@ -1206,7 +1221,7 @@
 			   features_incompatible,
 			   features_incompatible);
 		jbd_fs->dirty = true;
-		r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+		r = ext4_sb_write(jbd_fs->bdev,
 				  &jbd_fs->inode_ref.fs->sb);
 	}
 	jbd_destroy_revoke_tree(&info);
@@ -1237,7 +1252,7 @@
 	ext4_set32(&jbd_fs->inode_ref.fs->sb,
 			features_incompatible,
 			features_incompatible);
-	r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+	r = ext4_sb_write(jbd_fs->bdev,
 			&jbd_fs->inode_ref.fs->sb);
 	if (r != EOK)
 		return r;
@@ -1265,12 +1280,16 @@
 		return r;
 	}
 
-	TAILQ_INIT(&journal->trans_queue);
 	TAILQ_INIT(&journal->cp_queue);
 	RB_INIT(&journal->block_rec_root);
 	journal->jbd_fs = jbd_fs;
 	jbd_journal_write_sb(journal);
-	return jbd_write_sb(jbd_fs);
+	r = jbd_write_sb(jbd_fs);
+	if (r != EOK)
+		return r;
+
+	jbd_fs->bdev->journal = journal;
+	return EOK;
 }
 
 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
@@ -1278,6 +1297,8 @@
 			  int res,
 			  void *arg);
 
+/*
+ * This routine is only suitable to committed transactions. */
 static void jbd_journal_flush_trans(struct jbd_trans *trans)
 {
 	struct jbd_buf *jbd_buf, *tmp;
@@ -1288,25 +1309,29 @@
 
 	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
 			tmp) {
-		struct ext4_buf *buf = jbd_buf->block_rec->buf;
-		/* The buffer in memory is still dirty. */
-		if (buf) {
-			if (jbd_buf->block_rec->trans != trans) {
-				int r;
-				struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
-				ext4_assert(ext4_block_get(fs->bdev,
-							&jbd_block,
-							jbd_buf->jbd_lba) == EOK);
-				memcpy(tmp_data, jbd_block.data,
-						journal->block_size);
-				ext4_block_set(fs->bdev, &jbd_block);
-				r = ext4_blocks_set_direct(fs->bdev, tmp_data,
-						buf->lba, 1);
-				jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
-			} else
-				ext4_block_flush_buf(fs->bdev, buf);
+		struct ext4_buf *buf;
+		struct ext4_block block;
+		/* The buffer is not yet flushed. */
+		buf = ext4_bcache_find_get(fs->bdev->bc, &block,
+					   jbd_buf->block_rec->lba);
+		if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&
+		      jbd_buf->block_rec->trans == trans)) {
+			int r;
+			struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
+			ext4_assert(jbd_block_get(journal->jbd_fs,
+						&jbd_block,
+						jbd_buf->jbd_lba) == EOK);
+			memcpy(tmp_data, jbd_block.data,
+					journal->block_size);
+			ext4_block_set(fs->bdev, &jbd_block);
+			r = ext4_blocks_set_direct(fs->bdev, tmp_data,
+					jbd_buf->block_rec->lba, 1);
+			jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
+		} else
+			ext4_block_flush_buf(fs->bdev, buf);
 
-		}
+		if (buf)
+			ext4_block_set(fs->bdev, &block);
 	}
 
 	free(tmp_data);
@@ -1325,7 +1350,7 @@
 	jbd_journal_write_sb(journal);
 }
 
-static void
+void
 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
 			   bool flush,
 			   bool once)
@@ -1398,7 +1423,7 @@
 	ext4_set32(&jbd_fs->inode_ref.fs->sb,
 			features_incompatible,
 			features_incompatible);
-	r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+	r = ext4_sb_write(jbd_fs->bdev,
 			&jbd_fs->inode_ref.fs->sb);
 	if (r != EOK)
 		return r;
@@ -1430,50 +1455,6 @@
 	return start_block;
 }
 
-/**@brief  Allocate a new transaction
- * @param  journal current journal session
- * @return transaction allocated*/
-struct jbd_trans *
-jbd_journal_new_trans(struct jbd_journal *journal)
-{
-	struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
-	if (!trans)
-		return NULL;
-
-	/* We will assign a trans_id to this transaction,
-	 * once it has been committed.*/
-	trans->journal = journal;
-	trans->data_csum = EXT4_CRC32_INIT;
-	trans->error = EOK;
-	TAILQ_INIT(&trans->buf_queue);
-	return trans;
-}
-
-/**@brief  gain access to it before making any modications.
- * @param  journal current journal session
- * @param  trans transaction
- * @param  block descriptor
- * @return standard error code.*/
-int jbd_trans_get_access(struct jbd_journal *journal,
-			 struct jbd_trans *trans,
-			 struct ext4_block *block)
-{
-	int r = EOK;
-	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
-	struct jbd_buf *jbd_buf = block->buf->end_write_arg;
-
-	/* If the buffer has already been modified, we should
-	 * flush dirty data in this buffer to disk.*/
-	if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
-	    block->buf->end_write == jbd_trans_end_write) {
-		ext4_assert(jbd_buf);
-		if (jbd_buf->trans != trans)
-			r = ext4_block_flush_buf(fs->bdev, block->buf);
-
-	}
-	return r;
-}
-
 static struct jbd_block_rec *
 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
 			   ext4_fsblk_t lba)
@@ -1489,25 +1470,24 @@
 
 static void
 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
-			   struct jbd_trans *new_trans,
-			   struct ext4_buf *new_buf)
+			   struct jbd_trans *new_trans)
 {
 	LIST_REMOVE(block_rec, tbrec_node);
-	/* Now this block record belongs to this transaction. */
-	LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
+	if (new_trans) {
+		/* Now this block record belongs to this transaction. */
+		LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
+	}
 	block_rec->trans = new_trans;
-	block_rec->buf = new_buf;
 }
 
 static inline struct jbd_block_rec *
 jbd_trans_insert_block_rec(struct jbd_trans *trans,
-			   ext4_fsblk_t lba,
-			   struct ext4_buf *buf)
+			   ext4_fsblk_t lba)
 {
 	struct jbd_block_rec *block_rec;
 	block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
 	if (block_rec) {
-		jbd_trans_change_ownership(block_rec, trans, buf);
+		jbd_trans_change_ownership(block_rec, trans);
 		return block_rec;
 	}
 	block_rec = calloc(1, sizeof(struct jbd_block_rec));
@@ -1515,7 +1495,6 @@
 		return NULL;
 
 	block_rec->lba = lba;
-	block_rec->buf = buf;
 	block_rec->trans = trans;
 	TAILQ_INIT(&block_rec->dirty_buf_queue);
 	LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
@@ -1523,11 +1502,15 @@
 	return block_rec;
 }
 
+/*
+ * This routine will do the dirty works.
+ */
 static void
 jbd_trans_finish_callback(struct jbd_journal *journal,
 			  const struct jbd_trans *trans,
 			  struct jbd_block_rec *block_rec,
-			  bool abort)
+			  bool abort,
+			  bool revoke)
 {
 	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
 	if (block_rec->trans != trans)
@@ -1539,15 +1522,16 @@
 				&block_rec->dirty_buf_queue,
 				dirty_buf_node,
 				tmp) {
-			/* All we need is a fake ext4_buf. */
-			struct ext4_buf buf;
-
 			jbd_trans_end_write(fs->bdev->bc,
-					&buf,
+					NULL,
 					EOK,
 					jbd_buf);
 		}
 	} else {
+		/*
+		 * We have to roll back data if the block is going to be
+		 * aborted.
+		 */
 		struct jbd_buf *jbd_buf;
 		struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
 				  block = EXT4_BLOCK_ZERO();
@@ -1554,27 +1538,33 @@
 		jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
 				jbd_buf_dirty);
 		if (jbd_buf) {
-			ext4_assert(ext4_block_get(fs->bdev,
-						&jbd_block,
-						jbd_buf->jbd_lba) == EOK);
-			ext4_assert(ext4_block_get_noread(fs->bdev,
-						&block,
-						block_rec->lba) == EOK);
-			memcpy(block.data, jbd_block.data,
-					journal->block_size);
+			if (!revoke) {
+				ext4_assert(ext4_block_get_noread(fs->bdev,
+							&block,
+							block_rec->lba) == EOK);
+				ext4_assert(jbd_block_get(journal->jbd_fs,
+							&jbd_block,
+							jbd_buf->jbd_lba) == EOK);
+				memcpy(block.data, jbd_block.data,
+						journal->block_size);
 
-			jbd_trans_change_ownership(block_rec,
-					jbd_buf->trans, block.buf);
+				jbd_trans_change_ownership(block_rec,
+						jbd_buf->trans);
 
-			block.buf->end_write = jbd_trans_end_write;
-			block.buf->end_write_arg = jbd_buf;
+				block.buf->end_write = jbd_trans_end_write;
+				block.buf->end_write_arg = jbd_buf;
 
-			ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
-			ext4_bcache_set_dirty(block.buf);
+				ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
+				ext4_bcache_set_dirty(block.buf);
 
-			ext4_block_set(fs->bdev, &jbd_block);
-			ext4_block_set(fs->bdev, &block);
-			return;
+				ext4_block_set(fs->bdev, &jbd_block);
+				ext4_block_set(fs->bdev, &block);
+				return;
+			} else {
+				/* The revoked buffer is yet written. */
+				jbd_trans_change_ownership(block_rec,
+						jbd_buf->trans);
+			}
 		}
 	}
 }
@@ -1603,6 +1593,9 @@
 			      struct ext4_block *block)
 {
 	struct jbd_buf *jbd_buf;
+	struct jbd_revoke_rec *rec, tmp_rec = {
+		.lba = block->lb_id
+	};
 	struct jbd_block_rec *block_rec;
 
 	if (block->buf->end_write == jbd_trans_end_write) {
@@ -1615,8 +1608,7 @@
 		return ENOMEM;
 
 	if ((block_rec = jbd_trans_insert_block_rec(trans,
-					block->lb_id,
-					block->buf)) == NULL) {
+					block->lb_id)) == NULL) {
 		free(jbd_buf);
 		return ENOMEM;
 	}
@@ -1639,6 +1631,13 @@
 	TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
 
 	ext4_bcache_set_dirty(block->buf);
+	rec = RB_FIND(jbd_revoke_tree,
+			&trans->revoke_root,
+			&tmp_rec);
+	if (rec)
+		RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
+			  rec);
+
 	return EOK;
 }
 
@@ -1655,7 +1654,7 @@
 		return ENOMEM;
 
 	rec->lba = lba;
-	LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
+	RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);
 	return EOK;
 }
 
@@ -1668,23 +1667,22 @@
 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
 			       ext4_fsblk_t lba)
 {
-	int r = EOK;
 	struct jbd_journal *journal = trans->journal;
-	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
 	struct jbd_block_rec *block_rec =
 		jbd_trans_block_rec_lookup(journal, lba);
 
-	/* Make sure we don't flush any buffers belong to this transaction. */
-	if (block_rec && block_rec->trans != trans) {
-		/* If the buffer has not been flushed yet, flush it now. */
-		if (block_rec->buf) {
-			r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
-			if (r != EOK)
-				return r;
+	if (block_rec) {
+		if (block_rec->trans == trans) {
+			struct jbd_buf *jbd_buf =
+				TAILQ_LAST(&block_rec->dirty_buf_queue,
+					jbd_buf_dirty);
+			/* If there are still unwritten buffers. */
+			if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=
+			    jbd_buf)
+				jbd_trans_revoke_block(trans, lba);
 
-		}
-
-		jbd_trans_revoke_block(trans, lba);
+		} else
+			jbd_trans_revoke_block(trans, lba);
 	}
 
 	return EOK;
@@ -1719,13 +1717,14 @@
 		jbd_trans_finish_callback(journal,
 				trans,
 				block_rec,
-				abort);
+				abort,
+				false);
 		TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
 		free(jbd_buf);
 	}
-	LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
 			  tmp2) {
-		LIST_REMOVE(rec, revoke_node);
+		RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
 		free(rec);
 	}
 	LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
@@ -1742,18 +1741,17 @@
 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
 {
 	int rc;
+	struct ext4_block block;
 	struct jbd_commit_header *header;
-	uint32_t commit_iblock = 0;
-	struct ext4_block commit_block;
+	uint32_t commit_iblock, orig_commit_iblock;
 	struct jbd_journal *journal = trans->journal;
 
 	commit_iblock = jbd_journal_alloc_block(journal, trans);
-	rc = jbd_block_get_noread(journal->jbd_fs,
-			&commit_block, commit_iblock);
+	rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
 	if (rc != EOK)
 		return rc;
 
-	header = (struct jbd_commit_header *)commit_block.data;
+	header = (struct jbd_commit_header *)block.data;
 	jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
 	jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
 	jbd_set32(&header->header, sequence, trans->trans_id);
@@ -1765,12 +1763,29 @@
 		jbd_set32(header, chksum[0], trans->data_csum);
 	}
 	jbd_commit_csum_set(journal->jbd_fs, header);
-	ext4_bcache_set_dirty(commit_block.buf);
-	rc = jbd_block_set(journal->jbd_fs, &commit_block);
+	ext4_bcache_set_dirty(block.buf);
+	ext4_bcache_set_flag(block.buf, BC_TMP);
+	rc = jbd_block_set(journal->jbd_fs, &block);
 	if (rc != EOK)
 		return rc;
 
-	return EOK;
+	orig_commit_iblock = commit_iblock;
+	commit_iblock++;
+	wrap(&journal->jbd_fs->sb, commit_iblock);
+
+	/* To prevent accidental reference to stale journalling metadata. */
+	if (orig_commit_iblock < commit_iblock) {
+		rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
+		if (rc != EOK)
+			return rc;
+
+		memset(block.data, 0, journal->block_size);
+		ext4_bcache_set_dirty(block.buf);
+		ext4_bcache_set_flag(block.buf, BC_TMP);
+		rc = jbd_block_set(journal->jbd_fs, &block);
+	}
+
+	return rc;
 }
 
 /**@brief  Write descriptor block for a transaction
@@ -1781,19 +1796,25 @@
 			       struct jbd_trans *trans)
 {
 	int rc = EOK, i = 0;
+	struct ext4_block desc_block = EXT4_BLOCK_ZERO(),
+			  data_block = EXT4_BLOCK_ZERO();
 	int32_t tag_tbl_size = 0;
 	uint32_t desc_iblock = 0;
 	uint32_t data_iblock = 0;
 	char *tag_start = NULL, *tag_ptr = NULL;
 	struct jbd_buf *jbd_buf, *tmp;
-	struct ext4_block desc_block, data_block;
 	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
 	uint32_t checksum = EXT4_CRC32_INIT;
+	struct jbd_bhdr *bhdr = NULL;
+	void *data;
 
 	/* Try to remove any non-dirty buffers from the tail of
 	 * buf_queue. */
 	TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
 			jbd_trans_buf, buf_node, tmp) {
+		struct jbd_revoke_rec tmp_rec = {
+			.lba = jbd_buf->block_rec->lba
+		};
 		/* We stop the iteration when we find a dirty buffer. */
 		if (ext4_bcache_test_flag(jbd_buf->block.buf,
 					BC_DIRTY))
@@ -1808,12 +1829,12 @@
 		jbd_trans_finish_callback(journal,
 				trans,
 				jbd_buf->block_rec,
-				true);
-
-		/* The buffer has not been modified, just release
-		 * that jbd_buf. */
+				true,
+				RB_FIND(jbd_revoke_tree,
+					&trans->revoke_root,
+					&tmp_rec));
 		jbd_trans_remove_block_rec(journal,
-				jbd_buf->block_rec, trans);
+					jbd_buf->block_rec, trans);
 		trans->data_cnt--;
 
 		ext4_block_set(fs->bdev, &jbd_buf->block);
@@ -1824,6 +1845,9 @@
 	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
 		struct tag_info tag_info;
 		bool uuid_exist = false;
+		struct jbd_revoke_rec tmp_rec = {
+			.lba = jbd_buf->block_rec->lba
+		};
 		if (!ext4_bcache_test_flag(jbd_buf->block.buf,
 					   BC_DIRTY)) {
 			TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
@@ -1832,13 +1856,16 @@
 
 			jbd_buf->block.buf->end_write = NULL;
 			jbd_buf->block.buf->end_write_arg = NULL;
-			jbd_trans_finish_callback(journal,
-					trans,
-					jbd_buf->block_rec,
-					true);
 
 			/* The buffer has not been modified, just release
 			 * that jbd_buf. */
+			jbd_trans_finish_callback(journal,
+					trans,
+					jbd_buf->block_rec,
+					true,
+					RB_FIND(jbd_revoke_tree,
+						&trans->revoke_root,
+						&tmp_rec));
 			jbd_trans_remove_block_rec(journal,
 					jbd_buf->block_rec, trans);
 			trans->data_cnt--;
@@ -1854,15 +1881,11 @@
 					  trans->trans_id);
 again:
 		if (!desc_iblock) {
-			struct jbd_bhdr *bhdr;
 			desc_iblock = jbd_journal_alloc_block(journal, trans);
-			rc = jbd_block_get_noread(journal->jbd_fs,
-					   &desc_block, desc_iblock);
+			rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock);
 			if (rc != EOK)
 				break;
 
-			ext4_bcache_set_dirty(desc_block.buf);
-
 			bhdr = (struct jbd_bhdr *)desc_block.data;
 			jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
 			jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
@@ -1880,6 +1903,8 @@
 			if (!trans->start_iblock)
 				trans->start_iblock = desc_iblock;
 
+			ext4_bcache_set_dirty(desc_block.buf);
+			ext4_bcache_set_flag(desc_block.buf, BC_TMP);
 		}
 		tag_info.block = jbd_buf->block.lb_id;
 		tag_info.uuid_exist = uuid_exist;
@@ -1899,28 +1924,37 @@
 				tag_tbl_size,
 				&tag_info);
 		if (rc != EOK) {
-			jbd_meta_csum_set(journal->jbd_fs,
-					(struct jbd_bhdr *)desc_block.data);
-			jbd_block_set(journal->jbd_fs, &desc_block);
+			jbd_meta_csum_set(journal->jbd_fs, bhdr);
 			desc_iblock = 0;
+			rc = jbd_block_set(journal->jbd_fs, &desc_block);
+			if (rc != EOK)
+				break;
+
 			goto again;
 		}
 
 		data_iblock = jbd_journal_alloc_block(journal, trans);
-		rc = jbd_block_get_noread(journal->jbd_fs,
-				&data_block, data_iblock);
-		if (rc != EOK)
+		rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock);
+		if (rc != EOK) {
+			desc_iblock = 0;
+			ext4_bcache_clear_dirty(desc_block.buf);
+			jbd_block_set(journal->jbd_fs, &desc_block);
 			break;
+		}
 
-		ext4_bcache_set_dirty(data_block.buf);
-
-		memcpy(data_block.data, jbd_buf->block.data,
+		data = data_block.data;
+		memcpy(data, jbd_buf->block.data,
 			journal->block_size);
-		jbd_buf->jbd_lba = data_block.lb_id;
-
+		ext4_bcache_set_dirty(data_block.buf);
+		ext4_bcache_set_flag(data_block.buf, BC_TMP);
 		rc = jbd_block_set(journal->jbd_fs, &data_block);
-		if (rc != EOK)
+		if (rc != EOK) {
+			desc_iblock = 0;
+			ext4_bcache_clear_dirty(desc_block.buf);
+			jbd_block_set(journal->jbd_fs, &desc_block);
 			break;
+		}
+		jbd_buf->jbd_lba = data_iblock;
 
 		tag_ptr += tag_info.tag_bytes;
 		tag_tbl_size -= tag_info.tag_bytes;
@@ -1929,9 +1963,9 @@
 	}
 	if (rc == EOK && desc_iblock) {
 		jbd_meta_csum_set(journal->jbd_fs,
-				(struct jbd_bhdr *)desc_block.data);
+				(struct jbd_bhdr *)bhdr);
 		trans->data_csum = checksum;
-		jbd_block_set(journal->jbd_fs, &desc_block);
+		rc = jbd_block_set(journal->jbd_fs, &desc_block);
 	}
 
 	return rc;
@@ -1946,32 +1980,29 @@
 			   struct jbd_trans *trans)
 {
 	int rc = EOK, i = 0;
+	struct ext4_block desc_block = EXT4_BLOCK_ZERO();
 	int32_t tag_tbl_size = 0;
 	uint32_t desc_iblock = 0;
 	char *blocks_entry = NULL;
 	struct jbd_revoke_rec *rec, *tmp;
-	struct ext4_block desc_block;
 	struct jbd_revoke_header *header = NULL;
 	int32_t record_len = 4;
+	struct jbd_bhdr *bhdr = NULL;
 
 	if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
 				     JBD_FEATURE_INCOMPAT_64BIT))
 		record_len = 8;
 
-	LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
 			  tmp) {
 again:
 		if (!desc_iblock) {
-			struct jbd_bhdr *bhdr;
 			desc_iblock = jbd_journal_alloc_block(journal, trans);
-			rc = jbd_block_get_noread(journal->jbd_fs,
-					   &desc_block, desc_iblock);
-			if (rc != EOK) {
+			rc = jbd_block_get_noread(journal->jbd_fs, &desc_block,
+						  desc_iblock);
+			if (rc != EOK)
 				break;
-			}
 
-			ext4_bcache_set_dirty(desc_block.buf);
-
 			bhdr = (struct jbd_bhdr *)desc_block.data;
 			jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
 			jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
@@ -1988,16 +2019,21 @@
 			if (!trans->start_iblock)
 				trans->start_iblock = desc_iblock;
 
+			ext4_bcache_set_dirty(desc_block.buf);
+			ext4_bcache_set_flag(desc_block.buf, BC_TMP);
 		}
 
 		if (tag_tbl_size < record_len) {
 			jbd_set32(header, count,
 				  journal->block_size - tag_tbl_size);
-			jbd_meta_csum_set(journal->jbd_fs,
-					(struct jbd_bhdr *)desc_block.data);
-			jbd_block_set(journal->jbd_fs, &desc_block);
+			jbd_meta_csum_set(journal->jbd_fs, bhdr);
+			bhdr = NULL;
 			desc_iblock = 0;
 			header = NULL;
+			rc = jbd_block_set(journal->jbd_fs, &desc_block);
+			if (rc != EOK)
+				break;
+
 			goto again;
 		}
 		if (record_len == 8) {
@@ -2019,9 +2055,8 @@
 			jbd_set32(header, count,
 				  journal->block_size - tag_tbl_size);
 
-		jbd_meta_csum_set(journal->jbd_fs,
-				(struct jbd_bhdr *)desc_block.data);
-		jbd_block_set(journal->jbd_fs, &desc_block);
+		jbd_meta_csum_set(journal->jbd_fs, bhdr);
+		rc = jbd_block_set(journal->jbd_fs, &desc_block);
 	}
 
 	return rc;
@@ -2065,9 +2100,9 @@
 	jbd_trans_finish_callback(journal,
 			trans,
 			jbd_buf->block_rec,
+			false,
 			false);
-	if (block_rec->trans == trans) {
-		block_rec->buf = NULL;
+	if (block_rec->trans == trans && buf) {
 		/* Clear the end_write and end_write_arg fields. */
 		buf->end_write = NULL;
 		buf->end_write_arg = NULL;
@@ -2101,11 +2136,12 @@
  * @param  journal current journal session
  * @param  trans transaction
  * @return standard error code*/
-int jbd_journal_commit_trans(struct jbd_journal *journal,
-			     struct jbd_trans *trans)
+static int __jbd_journal_commit_trans(struct jbd_journal *journal,
+				      struct jbd_trans *trans)
 {
 	int rc = EOK;
 	uint32_t last = journal->last;
+	struct jbd_revoke_rec *rec, *tmp;
 
 	trans->trans_id = journal->alloc_trans_id;
 	rc = jbd_journal_prepare(journal, trans);
@@ -2117,7 +2153,7 @@
 		goto Finish;
 
 	if (TAILQ_EMPTY(&trans->buf_queue) &&
-	    LIST_EMPTY(&trans->revoke_list)) {
+	    RB_EMPTY(&trans->revoke_root)) {
 		/* Since there are no entries in both buffer list
 		 * and revoke entry list, we do not consider trans as
 		 * complete transaction and just return EOK.*/
@@ -2130,7 +2166,43 @@
 		goto Finish;
 
 	journal->alloc_trans_id++;
+
+	/* Complete the checkpoint of buffers which are revoked. */
+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+			tmp) {
+		struct jbd_block_rec *block_rec =
+			jbd_trans_block_rec_lookup(journal, rec->lba);
+		struct jbd_buf *jbd_buf = NULL;
+		if (block_rec)
+			jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
+					jbd_buf_dirty);
+		if (jbd_buf) {
+			struct ext4_buf *buf;
+			struct ext4_block block = EXT4_BLOCK_ZERO();
+			/*
+			 * We do this to reset the ext4_buf::end_write and
+			 * ext4_buf::end_write_arg fields so that the checkpoint
+			 * callback won't be triggered again.
+			 */
+			buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc,
+					&block,
+					jbd_buf->block_rec->lba);
+			jbd_trans_end_write(journal->jbd_fs->bdev->bc,
+					buf,
+					EOK,
+					jbd_buf);
+			if (buf)
+				ext4_block_set(journal->jbd_fs->bdev, &block);
+		}
+	}
+
 	if (TAILQ_EMPTY(&journal->cp_queue)) {
+		/*
+		 * This transaction is going to be the first object in the
+		 * checkpoint queue.
+		 * When the first transaction in checkpoint queue is completely
+		 * written to disk, we shift the tail of the log to right.
+		 */
 		if (trans->data_cnt) {
 			journal->start = trans->start_iblock;
 			wrap(&journal->jbd_fs->sb, journal->start);
@@ -2149,18 +2221,50 @@
 			jbd_journal_free_trans(journal, trans, false);
 		}
 	} else {
+		/* No need to do anything to the JBD superblock. */
 		TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
 				trans_node);
 		if (trans->data_cnt)
 			jbd_journal_cp_trans(journal, trans);
-
 	}
 Finish:
-	if (rc != EOK) {
+	if (rc != EOK && rc != ENOSPC) {
 		journal->last = last;
 		jbd_journal_free_trans(journal, trans, true);
 	}
 	return rc;
+}
+
+/**@brief  Allocate a new transaction
+ * @param  journal current journal session
+ * @return transaction allocated*/
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal)
+{
+	struct jbd_trans *trans = NULL;
+	trans = calloc(1, sizeof(struct jbd_trans));
+	if (!trans)
+		return NULL;
+
+	/* We will assign a trans_id to this transaction,
+	 * once it has been committed.*/
+	trans->journal = journal;
+	trans->data_csum = EXT4_CRC32_INIT;
+	trans->error = EOK;
+	TAILQ_INIT(&trans->buf_queue);
+	return trans;
+}
+
+/**@brief  Commit a transaction to the journal immediately.
+ * @param  journal current journal session
+ * @param  trans transaction
+ * @return standard error code*/
+int jbd_journal_commit_trans(struct jbd_journal *journal,
+			     struct jbd_trans *trans)
+{
+	int r = EOK;
+	r = __jbd_journal_commit_trans(journal, trans);
+	return r;
 }
 
 /**
--- a/src/ext4_trans.c
+++ b/src/ext4_trans.c
@@ -44,20 +44,6 @@
 #include "ext4_fs.h"
 #include "ext4_journal.h"
 
-static int ext4_trans_get_write_access(struct ext4_fs *fs __unused,
-				struct ext4_block *block __unused)
-{
-	int r = EOK;
-#if CONFIG_JOURNALING_ENABLE
-	if (fs->jbd_journal && fs->curr_trans) {
-		struct jbd_journal *journal = fs->jbd_journal;
-		struct jbd_trans *trans = fs->curr_trans;
-		r = jbd_trans_get_access(journal, trans, block);
-	}
-#endif
-	return r;
-}
-
 int ext4_trans_set_block_dirty(struct ext4_buf *buf)
 {
 	int r = EOK;
@@ -86,10 +72,6 @@
 	if (r != EOK)
 		return r;
 
-	r = ext4_trans_get_write_access(bdev->fs, b);
-	if (r != EOK)
-		ext4_block_set(bdev, b);
-
 	return r;
 }
 
@@ -100,10 +82,6 @@
 	int r = ext4_block_get(bdev, b, lba);
 	if (r != EOK)
 		return r;
-
-	r = ext4_trans_get_write_access(bdev->fs, b);
-	if (r != EOK)
-		ext4_block_set(bdev, b);
 
 	return r;
 }