Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix data race between zil_commit() and zil_suspend() #14514

Merged
merged 1 commit into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/sys/zil_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ struct zilog {
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
uint64_t zl_replayed_seq[TXG_SIZE]; /* last replayed rec seq */
uint64_t zl_replaying_seq; /* current replay seq number */
krwlock_t zl_suspend_lock; /* protects suspend count */
uint32_t zl_suspend; /* log suspend count */
kcondvar_t zl_cv_suspend; /* log suspend completion */
uint8_t zl_suspending; /* log is currently suspending */
Expand Down
27 changes: 27 additions & 0 deletions module/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -3195,6 +3195,21 @@ zil_commit(zilog_t *zilog, uint64_t foid)
return;
}

/*
* The ->zl_suspend_lock rwlock ensures that all in-flight
* zil_commit() operations finish before suspension begins and that
* no more begin. Without it, it is possible for the scheduler to
* preempt us right after the zilog->zl_suspend suspend check, run
* another thread that runs zil_suspend() and after the other thread
* has finished its call to zil_commit_impl(), resume this thread while
* zil is suspended. This can trigger an assertion failure in
* VERIFY(list_is_empty(&lwb->lwb_itxs)). If it is held, it means that
* `zil_suspend()` is executing in another thread, so we go to
* txg_wait_synced().
*/
if (!rw_tryenter(&zilog->zl_suspend_lock, RW_READER))
goto wait;

/*
* If the ZIL is suspended, we don't want to dirty it by calling
* zil_commit_itx_assign() below, nor can we write out
Expand All @@ -3203,11 +3218,14 @@ zil_commit(zilog_t *zilog, uint64_t foid)
* semantics, and avoid calling those functions altogether.
*/
if (zilog->zl_suspend > 0) {
rw_exit(&zilog->zl_suspend_lock);
wait:
txg_wait_synced(zilog->zl_dmu_pool, 0);
return;
}

zil_commit_impl(zilog, foid);
rw_exit(&zilog->zl_suspend_lock);
}

void
Expand Down Expand Up @@ -3472,6 +3490,8 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL);
cv_init(&zilog->zl_lwb_io_cv, NULL, CV_DEFAULT, NULL);

rw_init(&zilog->zl_suspend_lock, NULL, RW_DEFAULT, NULL);

return (zilog);
}

Expand Down Expand Up @@ -3511,6 +3531,8 @@ zil_free(zilog_t *zilog)
cv_destroy(&zilog->zl_cv_suspend);
cv_destroy(&zilog->zl_lwb_io_cv);

rw_destroy(&zilog->zl_suspend_lock);

kmem_free(zilog, sizeof (zilog_t));
}

Expand Down Expand Up @@ -3638,11 +3660,14 @@ zil_suspend(const char *osname, void **cookiep)
return (error);
zilog = dmu_objset_zil(os);

rw_enter(&zilog->zl_suspend_lock, RW_WRITER);

mutex_enter(&zilog->zl_lock);
zh = zilog->zl_header;

if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */
mutex_exit(&zilog->zl_lock);
rw_exit(&zilog->zl_suspend_lock);
dmu_objset_rele(os, suspend_tag);
return (SET_ERROR(EBUSY));
}
Expand All @@ -3656,6 +3681,7 @@ zil_suspend(const char *osname, void **cookiep)
if (cookiep == NULL && !zilog->zl_suspending &&
(zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) {
mutex_exit(&zilog->zl_lock);
rw_exit(&zilog->zl_suspend_lock);
dmu_objset_rele(os, suspend_tag);
return (0);
}
Expand All @@ -3664,6 +3690,7 @@ zil_suspend(const char *osname, void **cookiep)
dsl_pool_rele(dmu_objset_pool(os), suspend_tag);

zilog->zl_suspend++;
rw_exit(&zilog->zl_suspend_lock);

if (zilog->zl_suspend > 1) {
/*
Expand Down