From 67e0569d9b15ab0c659ac2b515fa12229398d4f4 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 7 Dec 2023 15:12:06 +1100 Subject: [PATCH] Fix corrupted DB on networks where the first slot is skipped (Holesky) (#4985) * Fix zero block roots on skip slots. * Remove temporary comment, println code and unused imports. * Remove `println!` in test. --- .../src/schema_change/migration_schema_v18.rs | 3 +- beacon_node/beacon_chain/tests/store_tests.rs | 95 ++++++++++++++++--- beacon_node/store/src/hot_cold_store.rs | 51 +++++++++- 3 files changed, 135 insertions(+), 14 deletions(-) diff --git a/beacon_node/beacon_chain/src/schema_change/migration_schema_v18.rs b/beacon_node/beacon_chain/src/schema_change/migration_schema_v18.rs index e7b68eb4184..04a9da84128 100644 --- a/beacon_node/beacon_chain/src/schema_change/migration_schema_v18.rs +++ b/beacon_node/beacon_chain/src/schema_change/migration_schema_v18.rs @@ -46,7 +46,8 @@ pub fn upgrade_to_v18( db: Arc>, log: Logger, ) -> Result, Error> { - db.heal_freezer_block_roots()?; + db.heal_freezer_block_roots_at_split()?; + db.heal_freezer_block_roots_at_genesis()?; info!(log, "Healed freezer block roots"); // No-op, even if Deneb has already occurred. The database is probably borked in this case, but diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index 9f7199cf3cf..8ba099ec73c 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -26,6 +26,7 @@ use std::collections::HashSet; use std::convert::TryInto; use std::sync::Arc; use std::time::Duration; +use store::chunked_vector::Chunk; use store::metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION, STATE_UPPER_LIMIT_NO_RETAIN}; use store::{ chunked_vector::{chunk_key, Field}, @@ -106,10 +107,10 @@ fn get_harness_generic( harness } -/// Tests that `store.heal_freezer_block_roots` inserts block roots between last restore point +/// Tests that `store.heal_freezer_block_roots_at_split` inserts block roots between last restore point /// slot and the split slot. #[tokio::test] -async fn heal_freezer_block_roots() { +async fn heal_freezer_block_roots_at_split() { // chunk_size is hard-coded to 128 let num_blocks_produced = E::slots_per_epoch() * 20; let db_path = tempdir().unwrap(); @@ -136,7 +137,7 @@ async fn heal_freezer_block_roots() { // Do a heal before deleting to make sure that it doesn't break. let last_restore_point_slot = Slot::new(16 * E::slots_per_epoch()); - store.heal_freezer_block_roots().unwrap(); + store.heal_freezer_block_roots_at_split().unwrap(); check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); // Delete block roots between `last_restore_point_slot` and `split_slot`. @@ -164,7 +165,7 @@ async fn heal_freezer_block_roots() { assert!(matches!(block_root_err, store::Error::NoContinuationData)); // Re-insert block roots - store.heal_freezer_block_roots().unwrap(); + store.heal_freezer_block_roots_at_split().unwrap(); check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); // Run for another two epochs to check that the invariant is maintained. @@ -243,7 +244,7 @@ async fn heal_freezer_block_roots_with_skip_slots() { assert!(matches!(block_root_err, store::Error::NoContinuationData)); // heal function - store.heal_freezer_block_roots().unwrap(); + store.heal_freezer_block_roots_at_split().unwrap(); check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); // Run for another two epochs to check that the invariant is maintained. @@ -257,12 +258,84 @@ async fn heal_freezer_block_roots_with_skip_slots() { check_iterators(&harness); } -fn check_freezer_block_roots( - harness: &TestHarness, - last_restore_point_slot: Slot, - split_slot: Slot, -) { - for slot in (last_restore_point_slot.as_u64()..split_slot.as_u64()).map(Slot::new) { +/// Tests that `store.heal_freezer_block_roots_at_genesis` replaces 0x0 block roots between slot +/// 0 and the first non-skip slot with genesis block root. +#[tokio::test] +async fn heal_freezer_block_roots_at_genesis() { + // Run for a few epochs to ensure we're past finalization. + let num_blocks_produced = E::slots_per_epoch() * 4; + let db_path = tempdir().unwrap(); + let store = get_store(&db_path); + let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); + + // Start with 2 skip slots. + harness.advance_slot(); + harness.advance_slot(); + + harness + .extend_chain( + num_blocks_produced as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + // Do a heal before deleting to make sure that it doesn't break. + store.heal_freezer_block_roots_at_genesis().unwrap(); + check_freezer_block_roots( + &harness, + Slot::new(0), + Epoch::new(1).end_slot(E::slots_per_epoch()), + ); + + // Write 0x0 block roots at slot 1 and slot 2. + let chunk_index = 0; + let chunk_db_key = chunk_key(chunk_index); + let mut chunk = + Chunk::::load(&store.cold_db, DBColumn::BeaconBlockRoots, &chunk_db_key) + .unwrap() + .unwrap(); + + chunk.values[1] = Hash256::zero(); + chunk.values[2] = Hash256::zero(); + + let mut ops = vec![]; + chunk + .store(DBColumn::BeaconBlockRoots, &chunk_db_key, &mut ops) + .unwrap(); + store.cold_db.do_atomically(ops).unwrap(); + + // Ensure the DB is corrupted + let block_roots = store + .forwards_block_roots_iterator_until( + Slot::new(1), + Slot::new(2), + || unreachable!(), + &harness.chain.spec, + ) + .unwrap() + .map(Result::unwrap) + .take(2) + .collect::>(); + assert_eq!( + block_roots, + vec![ + (Hash256::zero(), Slot::new(1)), + (Hash256::zero(), Slot::new(2)) + ] + ); + + // Insert genesis block roots at skip slots before first block slot + store.heal_freezer_block_roots_at_genesis().unwrap(); + check_freezer_block_roots( + &harness, + Slot::new(0), + Epoch::new(1).end_slot(E::slots_per_epoch()), + ); +} + +fn check_freezer_block_roots(harness: &TestHarness, start_slot: Slot, end_slot: Slot) { + for slot in (start_slot.as_u64()..end_slot.as_u64()).map(Slot::new) { let (block_root, result_slot) = harness .chain .store diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 43e14c30970..208dcfdb045 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -2216,7 +2216,7 @@ impl, Cold: ItemStore> HotColdDB /// This function fills in missing block roots between last restore point slot and split /// slot, if any. - pub fn heal_freezer_block_roots(&self) -> Result<(), Error> { + pub fn heal_freezer_block_roots_at_split(&self) -> Result<(), Error> { let split = self.get_split_info(); let last_restore_point_slot = (split.slot - 1) / self.config.slots_per_restore_point * self.config.slots_per_restore_point; @@ -2245,6 +2245,53 @@ impl, Cold: ItemStore> HotColdDB Ok(()) } + pub fn heal_freezer_block_roots_at_genesis(&self) -> Result<(), Error> { + let oldest_block_slot = self.get_oldest_block_slot(); + let split_slot = self.get_split_slot(); + + // Check if backfill has been completed AND the freezer db has data in it + if oldest_block_slot != 0 || split_slot == 0 { + return Ok(()); + } + + let mut block_root_iter = self.forwards_block_roots_iterator_until( + Slot::new(0), + split_slot - 1, + || { + Err(Error::DBError { + message: "Should not require end state".to_string(), + }) + }, + &self.spec, + )?; + + let (genesis_block_root, _) = block_root_iter.next().ok_or_else(|| Error::DBError { + message: "Genesis block root missing".to_string(), + })??; + + let slots_to_fix = itertools::process_results(block_root_iter, |iter| { + iter.take_while(|(block_root, _)| block_root.is_zero()) + .map(|(_, slot)| slot) + .collect::>() + })?; + + let Some(first_slot) = slots_to_fix.first() else { + return Ok(()); + }; + + let mut chunk_writer = + ChunkWriter::::new(&self.cold_db, first_slot.as_usize())?; + let mut ops = vec![]; + for slot in slots_to_fix { + chunk_writer.set(slot.as_usize(), genesis_block_root, &mut ops)?; + } + + chunk_writer.write(&mut ops)?; + self.cold_db.do_atomically(ops)?; + + Ok(()) + } + /// Delete *all* states from the freezer database and update the anchor accordingly. /// /// WARNING: this method deletes the genesis state and replaces it with the provided @@ -2257,7 +2304,7 @@ impl, Cold: ItemStore> HotColdDB genesis_state: &BeaconState, ) -> Result<(), Error> { // Make sure there is no missing block roots before pruning - self.heal_freezer_block_roots()?; + self.heal_freezer_block_roots_at_split()?; // Update the anchor to use the dummy state upper limit and disable historic state storage. let old_anchor = self.get_anchor_info();