Improve VecCache under parallel frontend

This replaces the single Vec allocation with a series of progressively larger buckets. With the cfg for parallel enabled but with -Zthreads=1, this looks like a slight regression in i-count and cycle counts (<0.1%). With the parallel frontend at -Zthreads=4, this is an improvement (-5% wall-time from 5.788 to 5.4688 on libcore) than our current Lock-based approach, likely due to reducing the bouncing of the cache line holding the lock. At -Zthreads=32 it's a huge improvement (-46%: 8.829 -> 4.7319 seconds).
2024-05-05 19:57:24 -04:00
parent b73478b6ee
commit da58efb11d
6 changed files with 458 additions and 65 deletions
--- a/compiler/rustc_data_structures/src/vec_cache/tests.rs
+++ b/compiler/rustc_data_structures/src/vec_cache/tests.rs
@@ -0,0 +1,95 @@
+use super::*;
+
+#[test]
+#[cfg(not(miri))]
+fn vec_cache_empty() {
+    let cache: VecCache<u32, u32, u32> = VecCache::default();
+    for key in 0..u32::MAX {
+        assert!(cache.lookup(&key).is_none());
+    }
+}
+
+#[test]
+fn vec_cache_insert_and_check() {
+    let cache: VecCache<u32, u32, u32> = VecCache::default();
+    cache.complete(0, 1, 2);
+    assert_eq!(cache.lookup(&0), Some((1, 2)));
+}
+
+#[test]
+fn sparse_inserts() {
+    let cache: VecCache<u32, u8, u32> = VecCache::default();
+    let end = if cfg!(target_pointer_width = "64") && cfg!(target_os = "linux") {
+        // For paged memory, 64-bit systems we should be able to sparsely allocate all of the pages
+        // needed for these inserts cheaply (without needing to actually have gigabytes of resident
+        // memory).
+        31
+    } else {
+        // Otherwise, still run the test but scaled back:
+        //
+        // Each slot is 5 bytes, so 2^25 entries (on non-virtual memory systems, like e.g. Windows) will
+        // mean 160 megabytes of allocated memory. Going beyond that is probably not reasonable for
+        // tests.
+        25
+    };
+    for shift in 0..end {
+        let key = 1u32 << shift;
+        cache.complete(key, shift, key);
+        assert_eq!(cache.lookup(&key), Some((shift, key)));
+    }
+}
+
+#[test]
+fn concurrent_stress_check() {
+    let cache: VecCache<u32, u32, u32> = VecCache::default();
+    std::thread::scope(|s| {
+        for idx in 0..100 {
+            let cache = &cache;
+            s.spawn(move || {
+                cache.complete(idx, idx, idx);
+            });
+        }
+    });
+
+    for idx in 0..100 {
+        assert_eq!(cache.lookup(&idx), Some((idx, idx)));
+    }
+}
+
+#[test]
+fn slot_entries_table() {
+    assert_eq!(ENTRIES_BY_BUCKET, [
+        4096, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304,
+        8388608, 16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824,
+        2147483648
+    ]);
+}
+
+#[test]
+#[cfg(not(miri))]
+fn slot_index_exhaustive() {
+    let mut buckets = [0u32; 21];
+    for idx in 0..=u32::MAX {
+        buckets[SlotIndex::from_index(idx).bucket_idx] += 1;
+    }
+    let mut prev = None::<SlotIndex>;
+    for idx in 0..=u32::MAX {
+        let slot_idx = SlotIndex::from_index(idx);
+        if let Some(p) = prev {
+            if p.bucket_idx == slot_idx.bucket_idx {
+                assert_eq!(p.index_in_bucket + 1, slot_idx.index_in_bucket);
+            } else {
+                assert_eq!(slot_idx.index_in_bucket, 0);
+            }
+        } else {
+            assert_eq!(idx, 0);
+            assert_eq!(slot_idx.index_in_bucket, 0);
+            assert_eq!(slot_idx.bucket_idx, 0);
+        }
+
+        assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries as u32);
+        assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries, "{}", idx);
+
+        prev = Some(slot_idx);
+    }
+}