Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Possible txg locking improvements #4333

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
#include <sys/fm/fs/zfs.h>
#include <sys/sunddi.h>
#include <sys/ctype.h>
#include <sys/disp.h>
#include <sys/trace.h>
#include <linux/dcache_compat.h>
#include <linux/utsname_compat.h>
Expand Down Expand Up @@ -262,9 +261,6 @@ extern kthread_t *zk_thread_create(caddr_t stk, size_t stksize,
proc_t *pp, int state, pri_t pri, int detachstate);
extern void zk_thread_join(kt_did_t tid);

#define kpreempt_disable() ((void)0)
#define kpreempt_enable() ((void)0)

#define PS_NONE -1

#define issig(why) (FALSE)
Expand Down
8 changes: 1 addition & 7 deletions module/zfs/fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1515,13 +1515,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
uint64_t
fm_ena_generate(uint64_t timestamp, uchar_t format)
{
uint64_t ena;

kpreempt_disable();
ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
kpreempt_enable();

return (ena);
return (fm_ena_generate_cpu(timestamp, getcpuid(), format));
}

uint64_t
Expand Down
47 changes: 22 additions & 25 deletions module/zfs/txg.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,39 +294,37 @@ uint64_t
txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
{
tx_state_t *tx = &dp->dp_tx;
tx_cpu_t *tc;
tx_cpu_t *tc = &tx->tx_cpu[CPU_SEQID];
uint64_t txg;

/*
* It appears the processor id is simply used as a "random"
* number to index into the array, and there isn't any other
* significance to the chosen tx_cpu. Because.. Why not use
* the current cpu to index into the array?
*/
kpreempt_disable();
tc = &tx->tx_cpu[CPU_SEQID];
kpreempt_enable();

mutex_enter(&tc->tc_open_lock);
txg = tx->tx_open_txg;

mutex_enter(&tc->tc_lock);
tc->tc_count[txg & TXG_MASK]++;
mutex_exit(&tc->tc_lock);
/*
* We decrement ->tc_count inside of the critical section so that the
* zero check in txg_quiesce() does not race with the increment. We use
* an atomic here so that we can minimize potential contention on
* &tc->tc_open_lock in txg_rele_to_sync(). This uses one less atomic
* instruction on both increment and decrement than using &tc->tc_lock.
* Additionally, avoiding a second lock reduces the potential for
* contention to cause us to block while holding the open lock.
*/
atomic_inc_64(&tc->tc_count[txg & TXG_MASK]);
mutex_exit(&tc->tc_open_lock);

th->th_cpu = tc;
th->th_txg = txg;

return (txg);
}

/*
* This is obsolete, but we keep it around to make porting patches easier and
* also so that it can be used as a tracepoint.
*/
void
txg_rele_to_quiesce(txg_handle_t *th)
{
tx_cpu_t *tc = th->th_cpu;

ASSERT(!MUTEX_HELD(&tc->tc_lock));
mutex_exit(&tc->tc_open_lock);
}

void
Expand All @@ -346,11 +344,9 @@ txg_rele_to_sync(txg_handle_t *th)
tx_cpu_t *tc = th->th_cpu;
int g = th->th_txg & TXG_MASK;

mutex_enter(&tc->tc_lock);
ASSERT(tc->tc_count[g] != 0);
if (--tc->tc_count[g] == 0)
if (atomic_dec_64_nv(&tc->tc_count[g]) == 0)
cv_broadcast(&tc->tc_cv[g]);
mutex_exit(&tc->tc_lock);

th->th_cpu = NULL; /* defensive */
}
Expand All @@ -367,6 +363,7 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
tx_state_t *tx = &dp->dp_tx;
int g = txg & TXG_MASK;
int c;
uint64_t tx_open_time;

/*
* Grab all tc_open_locks so nobody else can get into this txg.
Expand All @@ -376,10 +373,7 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)

ASSERT(txg == tx->tx_open_txg);
tx->tx_open_txg++;
tx->tx_open_time = gethrtime();

spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_OPEN, tx->tx_open_time);
spa_txg_history_add(dp->dp_spa, tx->tx_open_txg, tx->tx_open_time);
tx->tx_open_time = tx_open_time = gethrtime();

DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg);
DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg);
Expand All @@ -391,6 +385,9 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
for (c = 0; c < max_ncpus; c++)
mutex_exit(&tx->tx_cpu[c].tc_open_lock);

spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_OPEN, tx_open_time);
spa_txg_history_add(dp->dp_spa, txg + 1, tx_open_time);

/*
* Quiesce the transaction group by waiting for everyone to txg_exit().
*/
Expand Down
7 changes: 1 addition & 6 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1542,19 +1542,14 @@ zio_nowait(zio_t *zio)

if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
zio_unique_parent(zio) == NULL) {
zio_t *pio;

/*
* This is a logical async I/O with no parent to wait for it.
* We add it to the spa_async_root_zio "Godfather" I/O which
* will ensure they complete prior to unloading the pool.
*/
spa_t *spa = zio->io_spa;
kpreempt_disable();
pio = spa->spa_async_zio_root[CPU_SEQID];
kpreempt_enable();

zio_add_child(pio, zio);
zio_add_child(spa->spa_async_zio_root[CPU_SEQID], zio);
}

__zio_execute(zio);
Expand Down