ref: 8281a97813ef19452be1ce6ec1a6c40e483f12f1
parent: 2a5abdbf8c1375f83edde31831cd8b0bb36bc6d8
author: Kaho Ng <[email protected]>
date: Wed Apr 27 15:11:31 EDT 2016
ext4_journal: numorous changes. - Do not utilize jbd_block_rec::buf because it can be unreliable that jbd_block_rec::buf may be freed. - Do not need to flush buffers before a new transaction is going to reference them. - add some comments to the changes.
--- a/include/ext4_bcache.h
+++ b/include/ext4_bcache.h
@@ -239,6 +239,12 @@
* @param buf buffer*/
void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf);
+/**@brief Invalidate a buffer.
+ * @param bc block cache descriptor
+ * @param buf buffer*/
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+ struct ext4_buf *buf);
+
/**@brief Invalidate a range of buffers.
* @param bc block cache descriptor
* @param from starting lba
--- a/include/ext4_blockdev.h
+++ b/include/ext4_blockdev.h
@@ -124,6 +124,8 @@
/**@brief The filesystem this block device belongs to. */
struct ext4_fs *fs;
+
+ void *journal;
};
/**@brief Static initialization of the block device.*/
--- a/include/ext4_journal.h
+++ b/include/ext4_journal.h
@@ -47,7 +47,6 @@
#include "misc/tree.h"
struct jbd_fs {
- /* If journal block device is used, bdev will be non-null */
struct ext4_blockdev *bdev;
struct ext4_inode_ref inode_ref;
struct jbd_sb sb;
@@ -56,7 +55,7 @@
};
struct jbd_buf {
- uint64_t jbd_lba;
+ uint32_t jbd_lba;
struct ext4_block block;
struct jbd_trans *trans;
struct jbd_block_rec *block_rec;
@@ -66,12 +65,11 @@
struct jbd_revoke_rec {
ext4_fsblk_t lba;
- LIST_ENTRY(jbd_revoke_rec) revoke_node;
+ RB_ENTRY(jbd_revoke_rec) revoke_node;
};
struct jbd_block_rec {
ext4_fsblk_t lba;
- struct ext4_buf *buf;
struct jbd_trans *trans;
RB_ENTRY(jbd_block_rec) block_rec_node;
LIST_ENTRY(jbd_block_rec) tbrec_node;
@@ -91,7 +89,7 @@
struct jbd_journal *journal;
TAILQ_HEAD(jbd_trans_buf, jbd_buf) buf_queue;
- LIST_HEAD(jbd_revoke_list, jbd_revoke_rec) revoke_list;
+ RB_HEAD(jbd_revoke_tree, jbd_revoke_rec) revoke_root;
LIST_HEAD(jbd_trans_block_rec, jbd_block_rec) tbrec_list;
TAILQ_ENTRY(jbd_trans) trans_node;
};
@@ -105,7 +103,6 @@
uint32_t block_size;
- TAILQ_HEAD(jbd_trans_queue, jbd_trans) trans_queue;
TAILQ_HEAD(jbd_cp_queue, jbd_trans) cp_queue;
RB_HEAD(jbd_block, jbd_block_rec) block_rec_root;
@@ -122,10 +119,8 @@
int jbd_journal_start(struct jbd_fs *jbd_fs,
struct jbd_journal *journal);
int jbd_journal_stop(struct jbd_journal *journal);
-struct jbd_trans *jbd_journal_new_trans(struct jbd_journal *journal);
-int jbd_trans_get_access(struct jbd_journal *journal,
- struct jbd_trans *trans,
- struct ext4_block *block);
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal);
int jbd_trans_set_block_dirty(struct jbd_trans *trans,
struct ext4_block *block);
int jbd_trans_revoke_block(struct jbd_trans *trans,
@@ -137,6 +132,10 @@
bool abort);
int jbd_journal_commit_trans(struct jbd_journal *journal,
struct jbd_trans *trans);
+void
+jbd_journal_purge_cp_trans(struct jbd_journal *journal,
+ bool flush,
+ bool once);
#ifdef __cplusplus
}
--- a/src/ext4_balloc.c
+++ b/src/ext4_balloc.c
@@ -225,7 +225,9 @@
}
ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1);
/* Release block group reference */
- return ext4_fs_put_block_group_ref(&bg_ref);
+ rc = ext4_fs_put_block_group_ref(&bg_ref);
+
+ return rc;
}
int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
@@ -342,6 +344,7 @@
ext4_bcache_invalidate_lba(fs->bdev->bc, first, count);
/*All blocks should be released*/
ext4_assert(count == 0);
+
return rc;
}
--- a/src/ext4_bcache.c
+++ b/src/ext4_bcache.c
@@ -177,6 +177,19 @@
bc->ref_blocks--;
}
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+ struct ext4_buf *buf)
+{
+ buf->end_write = NULL;
+ buf->end_write_arg = NULL;
+
+ /* Clear both dirty and up-to-date flags. */
+ if (ext4_bcache_test_flag(buf, BC_DIRTY))
+ ext4_bcache_remove_dirty_node(bc, buf);
+
+ ext4_bcache_clear_dirty(buf);
+}
+
void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
uint64_t from,
uint32_t cnt)
@@ -187,13 +200,7 @@
if (buf->lba > end)
break;
- /* Clear both dirty and up-to-date flags. */
- if (ext4_bcache_test_flag(buf, BC_DIRTY))
- ext4_bcache_remove_dirty_node(bc, buf);
-
- buf->end_write = NULL;
- buf->end_write_arg = NULL;
- ext4_bcache_clear_dirty(buf);
+ ext4_bcache_invalidate_buf(bc, buf);
}
}
--- a/src/ext4_blockdev.c
+++ b/src/ext4_blockdev.c
@@ -41,6 +41,8 @@
#include "ext4_debug.h"
#include "ext4_blockdev.h"
+#include "ext4_fs.h"
+#include "ext4_journal.h"
#include <string.h>
#include <stdlib.h>
@@ -188,6 +190,8 @@
if (bdev->bc->dont_shake)
return EOK;
+ bdev->bc->dont_shake = true;
+
while (!RB_EMPTY(&bdev->bc->lru_root) &&
ext4_bcache_is_full(bdev->bc)) {
@@ -202,6 +206,7 @@
ext4_bcache_drop_buf(bdev->bc, buf);
}
+ bdev->bc->dont_shake = false;
return r;
}
--- a/src/ext4_journal.c
+++ b/src/ext4_journal.c
@@ -124,10 +124,22 @@
return 0;
}
+static int
+jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)
+{
+ if (a->lba > b->lba)
+ return 1;
+ else if (a->lba < b->lba)
+ return -1;
+ return 0;
+}
+
RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
jbd_revoke_entry_cmp, static inline)
RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
jbd_block_rec_cmp, static inline)
+RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
+ jbd_revoke_rec_cmp, static inline)
#define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
#define jbd_free_revoke_entry(addr) free(addr)
@@ -456,6 +468,9 @@
rc = EIO;
}
+ if (rc == EOK)
+ jbd_fs->bdev = fs->bdev;
+
return rc;
}
@@ -499,6 +514,7 @@
{
/* TODO: journal device. */
int rc;
+ struct ext4_blockdev *bdev = jbd_fs->bdev;
ext4_lblk_t iblock = (ext4_lblk_t)fblock;
/* Lookup the logical block address of
@@ -508,7 +524,6 @@
if (rc != EOK)
return rc;
- struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
rc = ext4_block_get(bdev, block, fblock);
/* If succeeded, mark buffer as BC_FLUSH to indicate
@@ -534,6 +549,7 @@
{
/* TODO: journal device. */
int rc;
+ struct ext4_blockdev *bdev = jbd_fs->bdev;
ext4_lblk_t iblock = (ext4_lblk_t)fblock;
rc = jbd_inode_bmap(jbd_fs, iblock,
&fblock);
@@ -540,7 +556,6 @@
if (rc != EOK)
return rc;
- struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
rc = ext4_block_get_noread(bdev, block, fblock);
if (rc == EOK)
ext4_bcache_set_flag(block->buf, BC_FLUSH);
@@ -555,8 +570,8 @@
static int jbd_block_set(struct jbd_fs *jbd_fs,
struct ext4_block *block)
{
- return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
- block);
+ struct ext4_blockdev *bdev = jbd_fs->bdev;
+ return ext4_block_set(bdev, block);
}
/**@brief helper functions to calculate
@@ -1206,7 +1221,7 @@
features_incompatible,
features_incompatible);
jbd_fs->dirty = true;
- r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+ r = ext4_sb_write(jbd_fs->bdev,
&jbd_fs->inode_ref.fs->sb);
}
jbd_destroy_revoke_tree(&info);
@@ -1237,7 +1252,7 @@
ext4_set32(&jbd_fs->inode_ref.fs->sb,
features_incompatible,
features_incompatible);
- r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+ r = ext4_sb_write(jbd_fs->bdev,
&jbd_fs->inode_ref.fs->sb);
if (r != EOK)
return r;
@@ -1265,12 +1280,16 @@
return r;
}
- TAILQ_INIT(&journal->trans_queue);
TAILQ_INIT(&journal->cp_queue);
RB_INIT(&journal->block_rec_root);
journal->jbd_fs = jbd_fs;
jbd_journal_write_sb(journal);
- return jbd_write_sb(jbd_fs);
+ r = jbd_write_sb(jbd_fs);
+ if (r != EOK)
+ return r;
+
+ jbd_fs->bdev->journal = journal;
+ return EOK;
}
static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
@@ -1278,6 +1297,8 @@
int res,
void *arg);
+/*
+ * This routine is only suitable to committed transactions. */
static void jbd_journal_flush_trans(struct jbd_trans *trans)
{
struct jbd_buf *jbd_buf, *tmp;
@@ -1288,25 +1309,29 @@
TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
tmp) {
- struct ext4_buf *buf = jbd_buf->block_rec->buf;
- /* The buffer in memory is still dirty. */
- if (buf) {
- if (jbd_buf->block_rec->trans != trans) {
- int r;
- struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
- ext4_assert(ext4_block_get(fs->bdev,
- &jbd_block,
- jbd_buf->jbd_lba) == EOK);
- memcpy(tmp_data, jbd_block.data,
- journal->block_size);
- ext4_block_set(fs->bdev, &jbd_block);
- r = ext4_blocks_set_direct(fs->bdev, tmp_data,
- buf->lba, 1);
- jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
- } else
- ext4_block_flush_buf(fs->bdev, buf);
+ struct ext4_buf *buf;
+ struct ext4_block block;
+ /* The buffer is not yet flushed. */
+ buf = ext4_bcache_find_get(fs->bdev->bc, &block,
+ jbd_buf->block_rec->lba);
+ if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&
+ jbd_buf->block_rec->trans == trans)) {
+ int r;
+ struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
+ ext4_assert(jbd_block_get(journal->jbd_fs,
+ &jbd_block,
+ jbd_buf->jbd_lba) == EOK);
+ memcpy(tmp_data, jbd_block.data,
+ journal->block_size);
+ ext4_block_set(fs->bdev, &jbd_block);
+ r = ext4_blocks_set_direct(fs->bdev, tmp_data,
+ jbd_buf->block_rec->lba, 1);
+ jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
+ } else
+ ext4_block_flush_buf(fs->bdev, buf);
- }
+ if (buf)
+ ext4_block_set(fs->bdev, &block);
}
free(tmp_data);
@@ -1325,7 +1350,7 @@
jbd_journal_write_sb(journal);
}
-static void
+void
jbd_journal_purge_cp_trans(struct jbd_journal *journal,
bool flush,
bool once)
@@ -1398,7 +1423,7 @@
ext4_set32(&jbd_fs->inode_ref.fs->sb,
features_incompatible,
features_incompatible);
- r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+ r = ext4_sb_write(jbd_fs->bdev,
&jbd_fs->inode_ref.fs->sb);
if (r != EOK)
return r;
@@ -1430,50 +1455,6 @@
return start_block;
}
-/**@brief Allocate a new transaction
- * @param journal current journal session
- * @return transaction allocated*/
-struct jbd_trans *
-jbd_journal_new_trans(struct jbd_journal *journal)
-{
- struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
- if (!trans)
- return NULL;
-
- /* We will assign a trans_id to this transaction,
- * once it has been committed.*/
- trans->journal = journal;
- trans->data_csum = EXT4_CRC32_INIT;
- trans->error = EOK;
- TAILQ_INIT(&trans->buf_queue);
- return trans;
-}
-
-/**@brief gain access to it before making any modications.
- * @param journal current journal session
- * @param trans transaction
- * @param block descriptor
- * @return standard error code.*/
-int jbd_trans_get_access(struct jbd_journal *journal,
- struct jbd_trans *trans,
- struct ext4_block *block)
-{
- int r = EOK;
- struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
- struct jbd_buf *jbd_buf = block->buf->end_write_arg;
-
- /* If the buffer has already been modified, we should
- * flush dirty data in this buffer to disk.*/
- if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
- block->buf->end_write == jbd_trans_end_write) {
- ext4_assert(jbd_buf);
- if (jbd_buf->trans != trans)
- r = ext4_block_flush_buf(fs->bdev, block->buf);
-
- }
- return r;
-}
-
static struct jbd_block_rec *
jbd_trans_block_rec_lookup(struct jbd_journal *journal,
ext4_fsblk_t lba)
@@ -1489,25 +1470,24 @@
static void
jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
- struct jbd_trans *new_trans,
- struct ext4_buf *new_buf)
+ struct jbd_trans *new_trans)
{
LIST_REMOVE(block_rec, tbrec_node);
- /* Now this block record belongs to this transaction. */
- LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
+ if (new_trans) {
+ /* Now this block record belongs to this transaction. */
+ LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
+ }
block_rec->trans = new_trans;
- block_rec->buf = new_buf;
}
static inline struct jbd_block_rec *
jbd_trans_insert_block_rec(struct jbd_trans *trans,
- ext4_fsblk_t lba,
- struct ext4_buf *buf)
+ ext4_fsblk_t lba)
{
struct jbd_block_rec *block_rec;
block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
if (block_rec) {
- jbd_trans_change_ownership(block_rec, trans, buf);
+ jbd_trans_change_ownership(block_rec, trans);
return block_rec;
}
block_rec = calloc(1, sizeof(struct jbd_block_rec));
@@ -1515,7 +1495,6 @@
return NULL;
block_rec->lba = lba;
- block_rec->buf = buf;
block_rec->trans = trans;
TAILQ_INIT(&block_rec->dirty_buf_queue);
LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
@@ -1523,11 +1502,15 @@
return block_rec;
}
+/*
+ * This routine will do the dirty works.
+ */
static void
jbd_trans_finish_callback(struct jbd_journal *journal,
const struct jbd_trans *trans,
struct jbd_block_rec *block_rec,
- bool abort)
+ bool abort,
+ bool revoke)
{
struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
if (block_rec->trans != trans)
@@ -1539,15 +1522,16 @@
&block_rec->dirty_buf_queue,
dirty_buf_node,
tmp) {
- /* All we need is a fake ext4_buf. */
- struct ext4_buf buf;
-
jbd_trans_end_write(fs->bdev->bc,
- &buf,
+ NULL,
EOK,
jbd_buf);
}
} else {
+ /*
+ * We have to roll back data if the block is going to be
+ * aborted.
+ */
struct jbd_buf *jbd_buf;
struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
block = EXT4_BLOCK_ZERO();
@@ -1554,27 +1538,33 @@
jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
jbd_buf_dirty);
if (jbd_buf) {
- ext4_assert(ext4_block_get(fs->bdev,
- &jbd_block,
- jbd_buf->jbd_lba) == EOK);
- ext4_assert(ext4_block_get_noread(fs->bdev,
- &block,
- block_rec->lba) == EOK);
- memcpy(block.data, jbd_block.data,
- journal->block_size);
+ if (!revoke) {
+ ext4_assert(ext4_block_get_noread(fs->bdev,
+ &block,
+ block_rec->lba) == EOK);
+ ext4_assert(jbd_block_get(journal->jbd_fs,
+ &jbd_block,
+ jbd_buf->jbd_lba) == EOK);
+ memcpy(block.data, jbd_block.data,
+ journal->block_size);
- jbd_trans_change_ownership(block_rec,
- jbd_buf->trans, block.buf);
+ jbd_trans_change_ownership(block_rec,
+ jbd_buf->trans);
- block.buf->end_write = jbd_trans_end_write;
- block.buf->end_write_arg = jbd_buf;
+ block.buf->end_write = jbd_trans_end_write;
+ block.buf->end_write_arg = jbd_buf;
- ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
- ext4_bcache_set_dirty(block.buf);
+ ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
+ ext4_bcache_set_dirty(block.buf);
- ext4_block_set(fs->bdev, &jbd_block);
- ext4_block_set(fs->bdev, &block);
- return;
+ ext4_block_set(fs->bdev, &jbd_block);
+ ext4_block_set(fs->bdev, &block);
+ return;
+ } else {
+ /* The revoked buffer is yet written. */
+ jbd_trans_change_ownership(block_rec,
+ jbd_buf->trans);
+ }
}
}
}
@@ -1603,6 +1593,9 @@
struct ext4_block *block)
{
struct jbd_buf *jbd_buf;
+ struct jbd_revoke_rec *rec, tmp_rec = {
+ .lba = block->lb_id
+ };
struct jbd_block_rec *block_rec;
if (block->buf->end_write == jbd_trans_end_write) {
@@ -1615,8 +1608,7 @@
return ENOMEM;
if ((block_rec = jbd_trans_insert_block_rec(trans,
- block->lb_id,
- block->buf)) == NULL) {
+ block->lb_id)) == NULL) {
free(jbd_buf);
return ENOMEM;
}
@@ -1639,6 +1631,13 @@
TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
ext4_bcache_set_dirty(block->buf);
+ rec = RB_FIND(jbd_revoke_tree,
+ &trans->revoke_root,
+ &tmp_rec);
+ if (rec)
+ RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
+ rec);
+
return EOK;
}
@@ -1655,7 +1654,7 @@
return ENOMEM;
rec->lba = lba;
- LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
+ RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);
return EOK;
}
@@ -1668,23 +1667,22 @@
int jbd_trans_try_revoke_block(struct jbd_trans *trans,
ext4_fsblk_t lba)
{
- int r = EOK;
struct jbd_journal *journal = trans->journal;
- struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
struct jbd_block_rec *block_rec =
jbd_trans_block_rec_lookup(journal, lba);
- /* Make sure we don't flush any buffers belong to this transaction. */
- if (block_rec && block_rec->trans != trans) {
- /* If the buffer has not been flushed yet, flush it now. */
- if (block_rec->buf) {
- r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
- if (r != EOK)
- return r;
+ if (block_rec) {
+ if (block_rec->trans == trans) {
+ struct jbd_buf *jbd_buf =
+ TAILQ_LAST(&block_rec->dirty_buf_queue,
+ jbd_buf_dirty);
+ /* If there are still unwritten buffers. */
+ if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=
+ jbd_buf)
+ jbd_trans_revoke_block(trans, lba);
- }
-
- jbd_trans_revoke_block(trans, lba);
+ } else
+ jbd_trans_revoke_block(trans, lba);
}
return EOK;
@@ -1719,13 +1717,14 @@
jbd_trans_finish_callback(journal,
trans,
block_rec,
- abort);
+ abort,
+ false);
TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
free(jbd_buf);
}
- LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
+ RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
tmp2) {
- LIST_REMOVE(rec, revoke_node);
+ RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
free(rec);
}
LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
@@ -1742,18 +1741,17 @@
static int jbd_trans_write_commit_block(struct jbd_trans *trans)
{
int rc;
+ struct ext4_block block;
struct jbd_commit_header *header;
- uint32_t commit_iblock = 0;
- struct ext4_block commit_block;
+ uint32_t commit_iblock, orig_commit_iblock;
struct jbd_journal *journal = trans->journal;
commit_iblock = jbd_journal_alloc_block(journal, trans);
- rc = jbd_block_get_noread(journal->jbd_fs,
- &commit_block, commit_iblock);
+ rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
if (rc != EOK)
return rc;
- header = (struct jbd_commit_header *)commit_block.data;
+ header = (struct jbd_commit_header *)block.data;
jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
jbd_set32(&header->header, sequence, trans->trans_id);
@@ -1765,12 +1763,29 @@
jbd_set32(header, chksum[0], trans->data_csum);
}
jbd_commit_csum_set(journal->jbd_fs, header);
- ext4_bcache_set_dirty(commit_block.buf);
- rc = jbd_block_set(journal->jbd_fs, &commit_block);
+ ext4_bcache_set_dirty(block.buf);
+ ext4_bcache_set_flag(block.buf, BC_TMP);
+ rc = jbd_block_set(journal->jbd_fs, &block);
if (rc != EOK)
return rc;
- return EOK;
+ orig_commit_iblock = commit_iblock;
+ commit_iblock++;
+ wrap(&journal->jbd_fs->sb, commit_iblock);
+
+ /* To prevent accidental reference to stale journalling metadata. */
+ if (orig_commit_iblock < commit_iblock) {
+ rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
+ if (rc != EOK)
+ return rc;
+
+ memset(block.data, 0, journal->block_size);
+ ext4_bcache_set_dirty(block.buf);
+ ext4_bcache_set_flag(block.buf, BC_TMP);
+ rc = jbd_block_set(journal->jbd_fs, &block);
+ }
+
+ return rc;
}
/**@brief Write descriptor block for a transaction
@@ -1781,19 +1796,25 @@
struct jbd_trans *trans)
{
int rc = EOK, i = 0;
+ struct ext4_block desc_block = EXT4_BLOCK_ZERO(),
+ data_block = EXT4_BLOCK_ZERO();
int32_t tag_tbl_size = 0;
uint32_t desc_iblock = 0;
uint32_t data_iblock = 0;
char *tag_start = NULL, *tag_ptr = NULL;
struct jbd_buf *jbd_buf, *tmp;
- struct ext4_block desc_block, data_block;
struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
uint32_t checksum = EXT4_CRC32_INIT;
+ struct jbd_bhdr *bhdr = NULL;
+ void *data;
/* Try to remove any non-dirty buffers from the tail of
* buf_queue. */
TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
jbd_trans_buf, buf_node, tmp) {
+ struct jbd_revoke_rec tmp_rec = {
+ .lba = jbd_buf->block_rec->lba
+ };
/* We stop the iteration when we find a dirty buffer. */
if (ext4_bcache_test_flag(jbd_buf->block.buf,
BC_DIRTY))
@@ -1808,12 +1829,12 @@
jbd_trans_finish_callback(journal,
trans,
jbd_buf->block_rec,
- true);
-
- /* The buffer has not been modified, just release
- * that jbd_buf. */
+ true,
+ RB_FIND(jbd_revoke_tree,
+ &trans->revoke_root,
+ &tmp_rec));
jbd_trans_remove_block_rec(journal,
- jbd_buf->block_rec, trans);
+ jbd_buf->block_rec, trans);
trans->data_cnt--;
ext4_block_set(fs->bdev, &jbd_buf->block);
@@ -1824,6 +1845,9 @@
TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
struct tag_info tag_info;
bool uuid_exist = false;
+ struct jbd_revoke_rec tmp_rec = {
+ .lba = jbd_buf->block_rec->lba
+ };
if (!ext4_bcache_test_flag(jbd_buf->block.buf,
BC_DIRTY)) {
TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
@@ -1832,13 +1856,16 @@
jbd_buf->block.buf->end_write = NULL;
jbd_buf->block.buf->end_write_arg = NULL;
- jbd_trans_finish_callback(journal,
- trans,
- jbd_buf->block_rec,
- true);
/* The buffer has not been modified, just release
* that jbd_buf. */
+ jbd_trans_finish_callback(journal,
+ trans,
+ jbd_buf->block_rec,
+ true,
+ RB_FIND(jbd_revoke_tree,
+ &trans->revoke_root,
+ &tmp_rec));
jbd_trans_remove_block_rec(journal,
jbd_buf->block_rec, trans);
trans->data_cnt--;
@@ -1854,15 +1881,11 @@
trans->trans_id);
again:
if (!desc_iblock) {
- struct jbd_bhdr *bhdr;
desc_iblock = jbd_journal_alloc_block(journal, trans);
- rc = jbd_block_get_noread(journal->jbd_fs,
- &desc_block, desc_iblock);
+ rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock);
if (rc != EOK)
break;
- ext4_bcache_set_dirty(desc_block.buf);
-
bhdr = (struct jbd_bhdr *)desc_block.data;
jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
@@ -1880,6 +1903,8 @@
if (!trans->start_iblock)
trans->start_iblock = desc_iblock;
+ ext4_bcache_set_dirty(desc_block.buf);
+ ext4_bcache_set_flag(desc_block.buf, BC_TMP);
}
tag_info.block = jbd_buf->block.lb_id;
tag_info.uuid_exist = uuid_exist;
@@ -1899,28 +1924,37 @@
tag_tbl_size,
&tag_info);
if (rc != EOK) {
- jbd_meta_csum_set(journal->jbd_fs,
- (struct jbd_bhdr *)desc_block.data);
- jbd_block_set(journal->jbd_fs, &desc_block);
+ jbd_meta_csum_set(journal->jbd_fs, bhdr);
desc_iblock = 0;
+ rc = jbd_block_set(journal->jbd_fs, &desc_block);
+ if (rc != EOK)
+ break;
+
goto again;
}
data_iblock = jbd_journal_alloc_block(journal, trans);
- rc = jbd_block_get_noread(journal->jbd_fs,
- &data_block, data_iblock);
- if (rc != EOK)
+ rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock);
+ if (rc != EOK) {
+ desc_iblock = 0;
+ ext4_bcache_clear_dirty(desc_block.buf);
+ jbd_block_set(journal->jbd_fs, &desc_block);
break;
+ }
- ext4_bcache_set_dirty(data_block.buf);
-
- memcpy(data_block.data, jbd_buf->block.data,
+ data = data_block.data;
+ memcpy(data, jbd_buf->block.data,
journal->block_size);
- jbd_buf->jbd_lba = data_block.lb_id;
-
+ ext4_bcache_set_dirty(data_block.buf);
+ ext4_bcache_set_flag(data_block.buf, BC_TMP);
rc = jbd_block_set(journal->jbd_fs, &data_block);
- if (rc != EOK)
+ if (rc != EOK) {
+ desc_iblock = 0;
+ ext4_bcache_clear_dirty(desc_block.buf);
+ jbd_block_set(journal->jbd_fs, &desc_block);
break;
+ }
+ jbd_buf->jbd_lba = data_iblock;
tag_ptr += tag_info.tag_bytes;
tag_tbl_size -= tag_info.tag_bytes;
@@ -1929,9 +1963,9 @@
}
if (rc == EOK && desc_iblock) {
jbd_meta_csum_set(journal->jbd_fs,
- (struct jbd_bhdr *)desc_block.data);
+ (struct jbd_bhdr *)bhdr);
trans->data_csum = checksum;
- jbd_block_set(journal->jbd_fs, &desc_block);
+ rc = jbd_block_set(journal->jbd_fs, &desc_block);
}
return rc;
@@ -1946,32 +1980,29 @@
struct jbd_trans *trans)
{
int rc = EOK, i = 0;
+ struct ext4_block desc_block = EXT4_BLOCK_ZERO();
int32_t tag_tbl_size = 0;
uint32_t desc_iblock = 0;
char *blocks_entry = NULL;
struct jbd_revoke_rec *rec, *tmp;
- struct ext4_block desc_block;
struct jbd_revoke_header *header = NULL;
int32_t record_len = 4;
+ struct jbd_bhdr *bhdr = NULL;
if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
JBD_FEATURE_INCOMPAT_64BIT))
record_len = 8;
- LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
+ RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
tmp) {
again:
if (!desc_iblock) {
- struct jbd_bhdr *bhdr;
desc_iblock = jbd_journal_alloc_block(journal, trans);
- rc = jbd_block_get_noread(journal->jbd_fs,
- &desc_block, desc_iblock);
- if (rc != EOK) {
+ rc = jbd_block_get_noread(journal->jbd_fs, &desc_block,
+ desc_iblock);
+ if (rc != EOK)
break;
- }
- ext4_bcache_set_dirty(desc_block.buf);
-
bhdr = (struct jbd_bhdr *)desc_block.data;
jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
@@ -1988,16 +2019,21 @@
if (!trans->start_iblock)
trans->start_iblock = desc_iblock;
+ ext4_bcache_set_dirty(desc_block.buf);
+ ext4_bcache_set_flag(desc_block.buf, BC_TMP);
}
if (tag_tbl_size < record_len) {
jbd_set32(header, count,
journal->block_size - tag_tbl_size);
- jbd_meta_csum_set(journal->jbd_fs,
- (struct jbd_bhdr *)desc_block.data);
- jbd_block_set(journal->jbd_fs, &desc_block);
+ jbd_meta_csum_set(journal->jbd_fs, bhdr);
+ bhdr = NULL;
desc_iblock = 0;
header = NULL;
+ rc = jbd_block_set(journal->jbd_fs, &desc_block);
+ if (rc != EOK)
+ break;
+
goto again;
}
if (record_len == 8) {
@@ -2019,9 +2055,8 @@
jbd_set32(header, count,
journal->block_size - tag_tbl_size);
- jbd_meta_csum_set(journal->jbd_fs,
- (struct jbd_bhdr *)desc_block.data);
- jbd_block_set(journal->jbd_fs, &desc_block);
+ jbd_meta_csum_set(journal->jbd_fs, bhdr);
+ rc = jbd_block_set(journal->jbd_fs, &desc_block);
}
return rc;
@@ -2065,9 +2100,9 @@
jbd_trans_finish_callback(journal,
trans,
jbd_buf->block_rec,
+ false,
false);
- if (block_rec->trans == trans) {
- block_rec->buf = NULL;
+ if (block_rec->trans == trans && buf) {
/* Clear the end_write and end_write_arg fields. */
buf->end_write = NULL;
buf->end_write_arg = NULL;
@@ -2101,11 +2136,12 @@
* @param journal current journal session
* @param trans transaction
* @return standard error code*/
-int jbd_journal_commit_trans(struct jbd_journal *journal,
- struct jbd_trans *trans)
+static int __jbd_journal_commit_trans(struct jbd_journal *journal,
+ struct jbd_trans *trans)
{
int rc = EOK;
uint32_t last = journal->last;
+ struct jbd_revoke_rec *rec, *tmp;
trans->trans_id = journal->alloc_trans_id;
rc = jbd_journal_prepare(journal, trans);
@@ -2117,7 +2153,7 @@
goto Finish;
if (TAILQ_EMPTY(&trans->buf_queue) &&
- LIST_EMPTY(&trans->revoke_list)) {
+ RB_EMPTY(&trans->revoke_root)) {
/* Since there are no entries in both buffer list
* and revoke entry list, we do not consider trans as
* complete transaction and just return EOK.*/
@@ -2130,7 +2166,43 @@
goto Finish;
journal->alloc_trans_id++;
+
+ /* Complete the checkpoint of buffers which are revoked. */
+ RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+ tmp) {
+ struct jbd_block_rec *block_rec =
+ jbd_trans_block_rec_lookup(journal, rec->lba);
+ struct jbd_buf *jbd_buf = NULL;
+ if (block_rec)
+ jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
+ jbd_buf_dirty);
+ if (jbd_buf) {
+ struct ext4_buf *buf;
+ struct ext4_block block = EXT4_BLOCK_ZERO();
+ /*
+ * We do this to reset the ext4_buf::end_write and
+ * ext4_buf::end_write_arg fields so that the checkpoint
+ * callback won't be triggered again.
+ */
+ buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc,
+ &block,
+ jbd_buf->block_rec->lba);
+ jbd_trans_end_write(journal->jbd_fs->bdev->bc,
+ buf,
+ EOK,
+ jbd_buf);
+ if (buf)
+ ext4_block_set(journal->jbd_fs->bdev, &block);
+ }
+ }
+
if (TAILQ_EMPTY(&journal->cp_queue)) {
+ /*
+ * This transaction is going to be the first object in the
+ * checkpoint queue.
+ * When the first transaction in checkpoint queue is completely
+ * written to disk, we shift the tail of the log to right.
+ */
if (trans->data_cnt) {
journal->start = trans->start_iblock;
wrap(&journal->jbd_fs->sb, journal->start);
@@ -2149,18 +2221,50 @@
jbd_journal_free_trans(journal, trans, false);
}
} else {
+ /* No need to do anything to the JBD superblock. */
TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
trans_node);
if (trans->data_cnt)
jbd_journal_cp_trans(journal, trans);
-
}
Finish:
- if (rc != EOK) {
+ if (rc != EOK && rc != ENOSPC) {
journal->last = last;
jbd_journal_free_trans(journal, trans, true);
}
return rc;
+}
+
+/**@brief Allocate a new transaction
+ * @param journal current journal session
+ * @return transaction allocated*/
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal)
+{
+ struct jbd_trans *trans = NULL;
+ trans = calloc(1, sizeof(struct jbd_trans));
+ if (!trans)
+ return NULL;
+
+ /* We will assign a trans_id to this transaction,
+ * once it has been committed.*/
+ trans->journal = journal;
+ trans->data_csum = EXT4_CRC32_INIT;
+ trans->error = EOK;
+ TAILQ_INIT(&trans->buf_queue);
+ return trans;
+}
+
+/**@brief Commit a transaction to the journal immediately.
+ * @param journal current journal session
+ * @param trans transaction
+ * @return standard error code*/
+int jbd_journal_commit_trans(struct jbd_journal *journal,
+ struct jbd_trans *trans)
+{
+ int r = EOK;
+ r = __jbd_journal_commit_trans(journal, trans);
+ return r;
}
/**
--- a/src/ext4_trans.c
+++ b/src/ext4_trans.c
@@ -44,20 +44,6 @@
#include "ext4_fs.h"
#include "ext4_journal.h"
-static int ext4_trans_get_write_access(struct ext4_fs *fs __unused,
- struct ext4_block *block __unused)
-{
- int r = EOK;
-#if CONFIG_JOURNALING_ENABLE
- if (fs->jbd_journal && fs->curr_trans) {
- struct jbd_journal *journal = fs->jbd_journal;
- struct jbd_trans *trans = fs->curr_trans;
- r = jbd_trans_get_access(journal, trans, block);
- }
-#endif
- return r;
-}
-
int ext4_trans_set_block_dirty(struct ext4_buf *buf)
{
int r = EOK;
@@ -86,10 +72,6 @@
if (r != EOK)
return r;
- r = ext4_trans_get_write_access(bdev->fs, b);
- if (r != EOK)
- ext4_block_set(bdev, b);
-
return r;
}
@@ -100,10 +82,6 @@
int r = ext4_block_get(bdev, b, lba);
if (r != EOK)
return r;
-
- r = ext4_trans_get_write_access(bdev->fs, b);
- if (r != EOK)
- ext4_block_set(bdev, b);
return r;
}