Skip to content

Commit

Permalink
fatal in less cases
Browse files Browse the repository at this point in the history
  • Loading branch information
cbi42 committed Sep 16, 2024
1 parent faee5d2 commit a30b22a
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 19 deletions.
4 changes: 2 additions & 2 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1526,7 +1526,7 @@ Status DBImpl::FlushWAL(const WriteOptions& write_options, bool sync) {
io_s.ToString().c_str());
// In case there is a fs error we should set it globally to prevent the
// future writes
IOStatusCheck(io_s);
WALIOStatusCheck(io_s);
// whether sync or not, we should abort the rest of function upon error
return static_cast<Status>(io_s);
}
Expand Down Expand Up @@ -1683,7 +1683,7 @@ IOStatus DBImpl::SyncWalImpl(bool include_current_wal,
io_s.ToString().c_str());
// In case there is a fs error we should set it globally to prevent the
// future writes
IOStatusCheck(io_s);
WALIOStatusCheck(io_s);
}
if (io_s.ok() && need_wal_dir_sync) {
io_s = directories_.GetWalDir()->FsyncWithDirOptions(
Expand Down
3 changes: 2 additions & 1 deletion db/db_impl/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1958,6 +1958,7 @@ class DBImpl : public DB {
void ReleaseFileNumberFromPendingOutputs(
std::unique_ptr<std::list<uint64_t>::iterator>& v);

// Sets bg error if there is an error writing to WAL.
IOStatus SyncClosedWals(const WriteOptions& write_options,
JobContext* job_context, VersionEdit* synced_wals,
bool error_recovery_in_prog);
Expand Down Expand Up @@ -2176,7 +2177,7 @@ class DBImpl : public DB {

// Used by WriteImpl to update bg_error_ when IO error happens, e.g., write
// WAL, sync WAL fails, if paranoid check is enabled.
void IOStatusCheck(const IOStatus& status);
void WALIOStatusCheck(const IOStatus& status);

// Used by WriteImpl to update bg_error_ in case of memtable insert error.
void MemTableInsertStatusCheck(const Status& memtable_insert_status);
Expand Down
14 changes: 8 additions & 6 deletions db/db_impl/db_impl_write.cc
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,

if (!io_s.ok()) {
// Check WriteToWAL status
IOStatusCheck(io_s);
WALIOStatusCheck(io_s);
}
if (!w.CallbackFailed()) {
if (!io_s.ok()) {
Expand Down Expand Up @@ -799,7 +799,7 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,

if (!io_s.ok()) {
// Check WriteToWAL status
IOStatusCheck(io_s);
WALIOStatusCheck(io_s);
} else if (!w.CallbackFailed()) {
WriteStatusCheck(w.status);
}
Expand Down Expand Up @@ -1077,7 +1077,7 @@ Status DBImpl::WriteImplWALOnly(
// This error checking and return is moved up to avoid using uninitialized
// last_sequence.
if (!io_s.ok()) {
IOStatusCheck(io_s);
WALIOStatusCheck(io_s);
write_thread->ExitAsBatchGroupLeader(write_group, status);
return status;
}
Expand Down Expand Up @@ -1175,15 +1175,16 @@ void DBImpl::WriteStatusCheck(const Status& status) {
}
}

void DBImpl::IOStatusCheck(const IOStatus& io_status) {
void DBImpl::WALIOStatusCheck(const IOStatus& io_status) {
// Is setting bg_error_ enough here? This will at least stop
// compaction and fail any further writes.
if ((immutable_db_options_.paranoid_checks && !io_status.ok() &&
!io_status.IsBusy() && !io_status.IsIncomplete()) ||
io_status.IsIOFenced()) {
mutex_.Lock();
// Maybe change the return status to void?
error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback);
error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback,
/*wal_related=*/true);
mutex_.Unlock();
} else {
// Force writable file to be continue writable.
Expand Down Expand Up @@ -2326,7 +2327,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
// We may have lost data from the WritableFileBuffer in-memory buffer for
// the current log, so treat it as a fatal error and set bg_error
if (!io_s.ok()) {
error_handler_.SetBGError(io_s, BackgroundErrorReason::kMemTable);
error_handler_.SetBGError(io_s, BackgroundErrorReason::kMemTable,
/*wal_related=*/true);
} else {
error_handler_.SetBGError(s, BackgroundErrorReason::kMemTable);
}
Expand Down
19 changes: 10 additions & 9 deletions db/error_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ void ErrorHandler::HandleKnownErrors(const Status& bg_err,
// BackgroundErrorReason reason) will be called to handle other error cases
// such as delegating to SstFileManager to handle no space error.
void ErrorHandler::SetBGError(const Status& bg_status,
BackgroundErrorReason reason) {
BackgroundErrorReason reason, bool wal_related) {
db_mutex_->AssertHeld();
Status tmp_status = bg_status;
IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));
Expand Down Expand Up @@ -412,16 +412,17 @@ void ErrorHandler::SetBGError(const Status& bg_status,
recover_context_ = context;
return;
}
const bool wal_related_reason =
reason == BackgroundErrorReason::kWriteCallback ||
reason == BackgroundErrorReason::kMemTable ||
reason == BackgroundErrorReason::kFlush;
if (db_options_.manual_wal_flush && wal_related_reason) {
// We should not try auto recover from this error.
if (wal_related) {
assert(reason == BackgroundErrorReason::kWriteCallback ||
reason == BackgroundErrorReason::kMemTable ||
reason == BackgroundErrorReason::kFlush);
}
if (db_options_.manual_wal_flush && wal_related && bg_io_err.IsIOError()) {
// We should not try auto recover IOError from writing to WAL .
// With manual_wal_flush, a WAL write failure can drop buffered WAL writes.
// Memtables and WAL may not be consistent. A successful memtable flush on
// one CF can cause CFs to be inconsistent upon restart. Set severity to
// fatal to disallow resume.
// one CF can cause CFs to be inconsistent upon restart. Set the error
// severity to fatal to disallow resume.
bool auto_recovery = false;
Status bg_err(new_bg_io_err, Status::Severity::kFatalError);
CheckAndSetRecoveryAndBGError(bg_err);
Expand Down
3 changes: 2 additions & 1 deletion db/error_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ class ErrorHandler {
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
Status::Code code, Status::SubCode subcode);

void SetBGError(const Status& bg_err, BackgroundErrorReason reason);
void SetBGError(const Status& bg_err, BackgroundErrorReason reason,
bool wal_related = false);

Status GetBGError() const { return bg_error_; }

Expand Down

0 comments on commit a30b22a

Please sign in to comment.