Skip to content

Commit

Permalink
Fix corrupted DB on networks where the first slot is skipped (Holesky) (
Browse files Browse the repository at this point in the history
#4985)

* Fix zero block roots on skip slots.

* Remove temporary comment, println code and unused imports.

* Remove `println!` in test.
  • Loading branch information
jimmygchen committed Dec 7, 2023
1 parent 8ba39cb commit 67e0569
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ pub fn upgrade_to_v18<T: BeaconChainTypes>(
db: Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
log: Logger,
) -> Result<Vec<KeyValueStoreOp>, Error> {
db.heal_freezer_block_roots()?;
db.heal_freezer_block_roots_at_split()?;
db.heal_freezer_block_roots_at_genesis()?;
info!(log, "Healed freezer block roots");

// No-op, even if Deneb has already occurred. The database is probably borked in this case, but
Expand Down
95 changes: 84 additions & 11 deletions beacon_node/beacon_chain/tests/store_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use std::collections::HashSet;
use std::convert::TryInto;
use std::sync::Arc;
use std::time::Duration;
use store::chunked_vector::Chunk;
use store::metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION, STATE_UPPER_LIMIT_NO_RETAIN};
use store::{
chunked_vector::{chunk_key, Field},
Expand Down Expand Up @@ -106,10 +107,10 @@ fn get_harness_generic(
harness
}

/// Tests that `store.heal_freezer_block_roots` inserts block roots between last restore point
/// Tests that `store.heal_freezer_block_roots_at_split` inserts block roots between last restore point
/// slot and the split slot.
#[tokio::test]
async fn heal_freezer_block_roots() {
async fn heal_freezer_block_roots_at_split() {
// chunk_size is hard-coded to 128
let num_blocks_produced = E::slots_per_epoch() * 20;
let db_path = tempdir().unwrap();
Expand All @@ -136,7 +137,7 @@ async fn heal_freezer_block_roots() {

// Do a heal before deleting to make sure that it doesn't break.
let last_restore_point_slot = Slot::new(16 * E::slots_per_epoch());
store.heal_freezer_block_roots().unwrap();
store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);

// Delete block roots between `last_restore_point_slot` and `split_slot`.
Expand Down Expand Up @@ -164,7 +165,7 @@ async fn heal_freezer_block_roots() {
assert!(matches!(block_root_err, store::Error::NoContinuationData));

// Re-insert block roots
store.heal_freezer_block_roots().unwrap();
store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);

// Run for another two epochs to check that the invariant is maintained.
Expand Down Expand Up @@ -243,7 +244,7 @@ async fn heal_freezer_block_roots_with_skip_slots() {
assert!(matches!(block_root_err, store::Error::NoContinuationData));

// heal function
store.heal_freezer_block_roots().unwrap();
store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);

// Run for another two epochs to check that the invariant is maintained.
Expand All @@ -257,12 +258,84 @@ async fn heal_freezer_block_roots_with_skip_slots() {
check_iterators(&harness);
}

fn check_freezer_block_roots(
harness: &TestHarness,
last_restore_point_slot: Slot,
split_slot: Slot,
) {
for slot in (last_restore_point_slot.as_u64()..split_slot.as_u64()).map(Slot::new) {
/// Tests that `store.heal_freezer_block_roots_at_genesis` replaces 0x0 block roots between slot
/// 0 and the first non-skip slot with genesis block root.
#[tokio::test]
async fn heal_freezer_block_roots_at_genesis() {
// Run for a few epochs to ensure we're past finalization.
let num_blocks_produced = E::slots_per_epoch() * 4;
let db_path = tempdir().unwrap();
let store = get_store(&db_path);
let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT);

// Start with 2 skip slots.
harness.advance_slot();
harness.advance_slot();

harness
.extend_chain(
num_blocks_produced as usize,
BlockStrategy::OnCanonicalHead,
AttestationStrategy::AllValidators,
)
.await;

// Do a heal before deleting to make sure that it doesn't break.
store.heal_freezer_block_roots_at_genesis().unwrap();
check_freezer_block_roots(
&harness,
Slot::new(0),
Epoch::new(1).end_slot(E::slots_per_epoch()),
);

// Write 0x0 block roots at slot 1 and slot 2.
let chunk_index = 0;
let chunk_db_key = chunk_key(chunk_index);
let mut chunk =
Chunk::<Hash256>::load(&store.cold_db, DBColumn::BeaconBlockRoots, &chunk_db_key)
.unwrap()
.unwrap();

chunk.values[1] = Hash256::zero();
chunk.values[2] = Hash256::zero();

let mut ops = vec![];
chunk
.store(DBColumn::BeaconBlockRoots, &chunk_db_key, &mut ops)
.unwrap();
store.cold_db.do_atomically(ops).unwrap();

// Ensure the DB is corrupted
let block_roots = store
.forwards_block_roots_iterator_until(
Slot::new(1),
Slot::new(2),
|| unreachable!(),
&harness.chain.spec,
)
.unwrap()
.map(Result::unwrap)
.take(2)
.collect::<Vec<_>>();
assert_eq!(
block_roots,
vec![
(Hash256::zero(), Slot::new(1)),
(Hash256::zero(), Slot::new(2))
]
);

// Insert genesis block roots at skip slots before first block slot
store.heal_freezer_block_roots_at_genesis().unwrap();
check_freezer_block_roots(
&harness,
Slot::new(0),
Epoch::new(1).end_slot(E::slots_per_epoch()),
);
}

fn check_freezer_block_roots(harness: &TestHarness, start_slot: Slot, end_slot: Slot) {
for slot in (start_slot.as_u64()..end_slot.as_u64()).map(Slot::new) {
let (block_root, result_slot) = harness
.chain
.store
Expand Down
51 changes: 49 additions & 2 deletions beacon_node/store/src/hot_cold_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2216,7 +2216,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>

/// This function fills in missing block roots between last restore point slot and split
/// slot, if any.
pub fn heal_freezer_block_roots(&self) -> Result<(), Error> {
pub fn heal_freezer_block_roots_at_split(&self) -> Result<(), Error> {
let split = self.get_split_info();
let last_restore_point_slot = (split.slot - 1) / self.config.slots_per_restore_point
* self.config.slots_per_restore_point;
Expand Down Expand Up @@ -2245,6 +2245,53 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
Ok(())
}

pub fn heal_freezer_block_roots_at_genesis(&self) -> Result<(), Error> {
let oldest_block_slot = self.get_oldest_block_slot();
let split_slot = self.get_split_slot();

// Check if backfill has been completed AND the freezer db has data in it
if oldest_block_slot != 0 || split_slot == 0 {
return Ok(());
}

let mut block_root_iter = self.forwards_block_roots_iterator_until(
Slot::new(0),
split_slot - 1,
|| {
Err(Error::DBError {
message: "Should not require end state".to_string(),
})
},
&self.spec,
)?;

let (genesis_block_root, _) = block_root_iter.next().ok_or_else(|| Error::DBError {
message: "Genesis block root missing".to_string(),
})??;

let slots_to_fix = itertools::process_results(block_root_iter, |iter| {
iter.take_while(|(block_root, _)| block_root.is_zero())
.map(|(_, slot)| slot)
.collect::<Vec<_>>()
})?;

let Some(first_slot) = slots_to_fix.first() else {
return Ok(());
};

let mut chunk_writer =
ChunkWriter::<BlockRoots, _, _>::new(&self.cold_db, first_slot.as_usize())?;
let mut ops = vec![];
for slot in slots_to_fix {
chunk_writer.set(slot.as_usize(), genesis_block_root, &mut ops)?;
}

chunk_writer.write(&mut ops)?;
self.cold_db.do_atomically(ops)?;

Ok(())
}

/// Delete *all* states from the freezer database and update the anchor accordingly.
///
/// WARNING: this method deletes the genesis state and replaces it with the provided
Expand All @@ -2257,7 +2304,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
genesis_state: &BeaconState<E>,
) -> Result<(), Error> {
// Make sure there is no missing block roots before pruning
self.heal_freezer_block_roots()?;
self.heal_freezer_block_roots_at_split()?;

// Update the anchor to use the dummy state upper limit and disable historic state storage.
let old_anchor = self.get_anchor_info();
Expand Down

0 comments on commit 67e0569

Please sign in to comment.