Auto merge of #39424 - nikomatsakis:incr-comp-skip-typeck-3, r=mw

rewrite the predecessors code to create a reduced graph The old code created a flat listing of "HIR -> WorkProduct" edges. While perfectly general, this could lead to a lot of repetition if the same HIR nodes affect many work-products. This is set to be a problem when we start to skip typeck, since we will be adding a lot more "work-product"-like nodes. The newer code uses an alternative strategy: it "reduces" the graph instead. Basically we walk the dep-graph and convert it to a DAG, where we only keep intermediate nodes if they are used by multiple work-products. This DAG does not contain the same set of nodes as the original graph, but it is guaranteed that (a) every output node is included in the graph and (b) the set of input nodes that can reach each output node is unchanged. (Input nodes are basically HIR nodes and foreign metadata; output nodes are nodes that have assocaited state which we will persist to disk in some way. These are assumed to be disjoint sets.) r? @michaelwoerister Fixes #39494
rust-lang · Feb 4, 2017 · eb5cb95 · eb5cb95
2 parents 8967085 + b3096e2
commit eb5cb95
Show file tree

Hide file tree

Showing 15 changed files with 1,209 additions and 479 deletions.
diff --git a/src/librustc_incremental/lib.rs b/src/librustc_incremental/lib.rs
@@ -23,6 +23,9 @@
 #![feature(staged_api)]
 #![feature(rand)]
 #![feature(core_intrinsics)]
+#![feature(conservative_impl_trait)]
+#![feature(field_init_shorthand)]
+#![feature(pub_restricted)]
 
 extern crate graphviz;
 #[macro_use] extern crate rustc;

diff --git a/src/librustc_incremental/persist/data.rs b/src/librustc_incremental/persist/data.rs
@@ -21,7 +21,7 @@ use super::directory::DefPathIndex;
 /// Data for use when recompiling the **current crate**.
 #[derive(Debug, RustcEncodable, RustcDecodable)]
 pub struct SerializedDepGraph {
-    pub edges: Vec<SerializedEdge>,
+    pub edges: Vec<SerializedEdgeSet>,
 
     /// These are hashes of two things:
     /// - the HIR nodes in this crate
@@ -45,14 +45,13 @@ pub struct SerializedDepGraph {
     pub hashes: Vec<SerializedHash>,
 }
 
-/// Represents a "reduced" dependency edge. Unlike the full dep-graph,
-/// the dep-graph we serialize contains only edges `S -> T` where the
-/// source `S` is something hashable (a HIR node or foreign metadata)
-/// and the target `T` is something significant, like a work-product.
-/// Normally, significant nodes are only those that have saved data on
-/// disk, but in unit-testing the set of significant nodes can be
-/// increased.
-pub type SerializedEdge = (DepNode<DefPathIndex>, DepNode<DefPathIndex>);
+/// Represents a set of "reduced" dependency edge. We group the
+/// outgoing edges from a single source together.
+#[derive(Debug, RustcEncodable, RustcDecodable)]
+pub struct SerializedEdgeSet {
+    pub source: DepNode<DefPathIndex>,
+    pub targets: Vec<DepNode<DefPathIndex>>
+}
 
 #[derive(Debug, RustcEncodable, RustcDecodable)]
 pub struct SerializedHash {

diff --git a/src/librustc_incremental/persist/dirty_clean.rs b/src/librustc_incremental/persist/dirty_clean.rs
@@ -67,9 +67,9 @@ pub fn check_dirty_clean_annotations<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
     let _ignore = tcx.dep_graph.in_ignore();
     let dirty_inputs: FxHashSet<DepNode<DefId>> =
-        dirty_inputs.iter()
-                   .filter_map(|d| retraced.map(d))
-                   .collect();
+        dirty_inputs.keys()
+                    .filter_map(|d| retraced.map(d))
+                    .collect();
     let query = tcx.dep_graph.query();
     debug!("query-nodes: {:?}", query.nodes());
     let krate = tcx.hir.krate();

diff --git a/src/librustc_incremental/persist/load.rs b/src/librustc_incremental/persist/load.rs
@@ -10,7 +10,7 @@
 
 //! Code to save/load the dep-graph from files.
 
-use rustc::dep_graph::DepNode;
+use rustc::dep_graph::{DepNode, WorkProductId};
 use rustc::hir::def_id::DefId;
 use rustc::hir::svh::Svh;
 use rustc::session::Session;
@@ -19,6 +19,7 @@ use rustc_data_structures::fx::{FxHashSet, FxHashMap};
 use rustc_serialize::Decodable as RustcDecodable;
 use rustc_serialize::opaque::Decoder;
 use std::path::{Path};
+use std::sync::Arc;
 
 use IncrementalHashesMap;
 use ich::Fingerprint;
@@ -30,7 +31,9 @@ use super::fs::*;
 use super::file_format;
 use super::work_product;
 
-pub type DirtyNodes = FxHashSet<DepNode<DefPathIndex>>;
+// The key is a dirty node. The value is **some** base-input that we
+// can blame it on.
+pub type DirtyNodes = FxHashMap<DepNode<DefPathIndex>, DepNode<DefPathIndex>>;
 
 /// If we are in incremental mode, and a previous dep-graph exists,
 /// then load up those nodes/edges that are still valid into the
@@ -149,86 +152,75 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
     let directory = DefIdDirectory::decode(&mut dep_graph_decoder)?;
     let serialized_dep_graph = SerializedDepGraph::decode(&mut dep_graph_decoder)?;
 
+    let edge_map: FxHashMap<_, _> = serialized_dep_graph.edges
+                                                        .into_iter()
+                                                        .map(|s| (s.source, s.targets))
+                                                        .collect();
+
     // Retrace the paths in the directory to find their current location (if any).
     let retraced = directory.retrace(tcx);
 
-    // Compute the set of Hir nodes whose data has changed or which
-    // have been removed.  These are "raw" source nodes, which means
-    // that they still use the original `DefPathIndex` values from the
-    // encoding, rather than having been retraced to a `DefId`. The
-    // reason for this is that this way we can include nodes that have
-    // been removed (which no longer have a `DefId` in the current
-    // compilation).
-    let dirty_raw_source_nodes = dirty_nodes(tcx,
-                                             incremental_hashes_map,
-                                             &serialized_dep_graph.hashes,
-                                             &retraced);
-
-    // Create a list of (raw-source-node ->
-    // retracted-target-node) edges. In the process of retracing the
-    // target nodes, we may discover some of them def-paths no longer exist,
-    // in which case there is no need to mark the corresopnding nodes as dirty
-    // (they are just not present). So this list may be smaller than the original.
-    //
-    // Note though that in the common case the target nodes are
-    // `DepNode::WorkProduct` instances, and those don't have a
-    // def-id, so they will never be considered to not exist. Instead,
-    // we do a secondary hashing step (later, in trans) when we know
-    // the set of symbols that go into a work-product: if any symbols
-    // have been removed (or added) the hash will be different and
-    // we'll ignore the work-product then.
-    let retraced_edges: Vec<_> =
-        serialized_dep_graph.edges.iter()
-                                  .filter_map(|&(ref raw_source_node, ref raw_target_node)| {
-                                      retraced.map(raw_target_node)
-                                              .map(|target_node| (raw_source_node, target_node))
-                                  })
-                                  .collect();
-
-    // Compute which work-products have an input that has changed or
-    // been removed. Put the dirty ones into a set.
-    let mut dirty_target_nodes = FxHashSet();
-    for &(raw_source_node, ref target_node) in &retraced_edges {
-        if dirty_raw_source_nodes.contains(raw_source_node) {
-            if !dirty_target_nodes.contains(target_node) {
-                dirty_target_nodes.insert(target_node.clone());
-
-                if tcx.sess.opts.debugging_opts.incremental_info {
-                    // It'd be nice to pretty-print these paths better than just
-                    // using the `Debug` impls, but wev.
-                    println!("incremental: module {:?} is dirty because {:?} \
-                              changed or was removed",
-                             target_node,
-                             raw_source_node.map_def(|&index| {
-                                 Some(directory.def_path_string(tcx, index))
-                             }).unwrap());
+    // Compute the set of nodes from the old graph where some input
+    // has changed or been removed. These are "raw" source nodes,
+    // which means that they still use the original `DefPathIndex`
+    // values from the encoding, rather than having been retraced to a
+    // `DefId`. The reason for this is that this way we can include
+    // nodes that have been removed (which no longer have a `DefId` in
+    // the current compilation).
+    let dirty_raw_nodes = initial_dirty_nodes(tcx,
+                                              incremental_hashes_map,
+                                              &serialized_dep_graph.hashes,
+                                              &retraced);
+    let dirty_raw_nodes = transitive_dirty_nodes(&edge_map, dirty_raw_nodes);
+
+    // Recreate the edges in the graph that are still clean.
+    let mut clean_work_products = FxHashSet();
+    let mut dirty_work_products = FxHashSet(); // incomplete; just used to suppress debug output
+    for (source, targets) in &edge_map {
+        for target in targets {
+            // If the target is dirty, skip the edge. If this is an edge
+            // that targets a work-product, we can print the blame
+            // information now.
+            if let Some(blame) = dirty_raw_nodes.get(target) {
+                if let DepNode::WorkProduct(ref wp) = *target {
+                    if tcx.sess.opts.debugging_opts.incremental_info {
+                        if dirty_work_products.insert(wp.clone()) {
+                            // It'd be nice to pretty-print these paths better than just
+                            // using the `Debug` impls, but wev.
+                            println!("incremental: module {:?} is dirty because {:?} \
+                                      changed or was removed",
+                                     wp,
+                                     blame.map_def(|&index| {
+                                         Some(directory.def_path_string(tcx, index))
+                                     }).unwrap());
+                        }
+                    }
                 }
+                continue;
             }
-        }
-    }
 
-    // For work-products that are still clean, add their deps into the
-    // graph. This is needed because later we will have to save this
-    // back out again!
-    let dep_graph = tcx.dep_graph.clone();
-    for (raw_source_node, target_node) in retraced_edges {
-        if dirty_target_nodes.contains(&target_node) {
-            continue;
+            // If the source is dirty, the target will be dirty.
+            assert!(!dirty_raw_nodes.contains_key(source));
+
+            // Retrace the source -> target edges to def-ids and then
+            // create an edge in the graph. Retracing may yield none if
+            // some of the data happens to have been removed; this ought
+            // to be impossible unless it is dirty, so we can unwrap.
+            let source_node = retraced.map(source).unwrap();
+            let target_node = retraced.map(target).unwrap();
+            let _task = tcx.dep_graph.in_task(target_node);
+            tcx.dep_graph.read(source_node);
+            if let DepNode::WorkProduct(ref wp) = *target {
+                clean_work_products.insert(wp.clone());
+            }
         }
-
-        let source_node = retraced.map(raw_source_node).unwrap();
-
-        debug!("decode_dep_graph: clean edge: {:?} -> {:?}", source_node, target_node);
-
-        let _task = dep_graph.in_task(target_node);
-        dep_graph.read(source_node);
     }
 
     // Add in work-products that are still clean, and delete those that are
     // dirty.
-    reconcile_work_products(tcx, work_products, &dirty_target_nodes);
+    reconcile_work_products(tcx, work_products, &clean_work_products);
 
-    dirty_clean::check_dirty_clean_annotations(tcx, &dirty_raw_source_nodes, &retraced);
+    dirty_clean::check_dirty_clean_annotations(tcx, &dirty_raw_nodes, &retraced);
 
     load_prev_metadata_hashes(tcx,
                               &retraced,
@@ -238,13 +230,13 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
 /// Computes which of the original set of def-ids are dirty. Stored in
 /// a bit vector where the index is the DefPathIndex.
-fn dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
-                         incremental_hashes_map: &IncrementalHashesMap,
-                         serialized_hashes: &[SerializedHash],
-                         retraced: &RetracedDefIdDirectory)
-                         -> DirtyNodes {
+fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
+                                 incremental_hashes_map: &IncrementalHashesMap,
+                                 serialized_hashes: &[SerializedHash],
+                                 retraced: &RetracedDefIdDirectory)
+                                 -> DirtyNodes {
     let mut hcx = HashContext::new(tcx, incremental_hashes_map);
-    let mut dirty_nodes = FxHashSet();
+    let mut dirty_nodes = FxHashMap();
 
     for hash in serialized_hashes {
         if let Some(dep_node) = retraced.map(&hash.dep_node) {
@@ -277,21 +269,44 @@ fn dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
                    hash.dep_node);
         }
 
-        dirty_nodes.insert(hash.dep_node.clone());
+        dirty_nodes.insert(hash.dep_node.clone(), hash.dep_node.clone());
     }
 
     dirty_nodes
 }
 
+fn transitive_dirty_nodes(edge_map: &FxHashMap<DepNode<DefPathIndex>, Vec<DepNode<DefPathIndex>>>,
+                          mut dirty_nodes: DirtyNodes)
+                          -> DirtyNodes
+{
+    let mut stack: Vec<(DepNode<DefPathIndex>, DepNode<DefPathIndex>)> = vec![];
+    stack.extend(dirty_nodes.iter().map(|(s, b)| (s.clone(), b.clone())));
+    while let Some((source, blame)) = stack.pop() {
+        // we know the source is dirty (because of the node `blame`)...
+        assert!(dirty_nodes.contains_key(&source));
+
+        // ...so we dirty all the targets (with the same blame)
+        if let Some(targets) = edge_map.get(&source) {
+            for target in targets {
+                if !dirty_nodes.contains_key(target) {
+                    dirty_nodes.insert(target.clone(), blame.clone());
+                    stack.push((target.clone(), blame.clone()));
+                }
+            }
+        }
+    }
+    dirty_nodes
+}
+
 /// Go through the list of work-products produced in the previous run.
 /// Delete any whose nodes have been found to be dirty or which are
 /// otherwise no longer applicable.
 fn reconcile_work_products<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
                                      work_products: Vec<SerializedWorkProduct>,
-                                     dirty_target_nodes: &FxHashSet<DepNode<DefId>>) {
+                                     clean_work_products: &FxHashSet<Arc<WorkProductId>>) {
     debug!("reconcile_work_products({:?})", work_products);
     for swp in work_products {
-        if dirty_target_nodes.contains(&DepNode::WorkProduct(swp.id.clone())) {
+        if !clean_work_products.contains(&swp.id) {
             debug!("reconcile_work_products: dep-node for {:?} is dirty", swp);
             delete_dirty_work_product(tcx, swp);
         } else {