Add new self-profile event for aggregating query hit counts
This commit is contained in:
@@ -88,6 +88,7 @@ use std::fmt::Display;
|
|||||||
use std::intrinsics::unlikely;
|
use std::intrinsics::unlikely;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use std::{fs, process};
|
use std::{fs, process};
|
||||||
|
|
||||||
@@ -105,6 +106,8 @@ bitflags::bitflags! {
|
|||||||
struct EventFilter: u16 {
|
struct EventFilter: u16 {
|
||||||
const GENERIC_ACTIVITIES = 1 << 0;
|
const GENERIC_ACTIVITIES = 1 << 0;
|
||||||
const QUERY_PROVIDERS = 1 << 1;
|
const QUERY_PROVIDERS = 1 << 1;
|
||||||
|
/// Store detailed instant events, including timestamp and thread ID,
|
||||||
|
/// per each query cache hit. Note that this is quite expensive.
|
||||||
const QUERY_CACHE_HITS = 1 << 2;
|
const QUERY_CACHE_HITS = 1 << 2;
|
||||||
const QUERY_BLOCKED = 1 << 3;
|
const QUERY_BLOCKED = 1 << 3;
|
||||||
const INCR_CACHE_LOADS = 1 << 4;
|
const INCR_CACHE_LOADS = 1 << 4;
|
||||||
@@ -113,16 +116,20 @@ bitflags::bitflags! {
|
|||||||
const FUNCTION_ARGS = 1 << 6;
|
const FUNCTION_ARGS = 1 << 6;
|
||||||
const LLVM = 1 << 7;
|
const LLVM = 1 << 7;
|
||||||
const INCR_RESULT_HASHING = 1 << 8;
|
const INCR_RESULT_HASHING = 1 << 8;
|
||||||
const ARTIFACT_SIZES = 1 << 9;
|
const ARTIFACT_SIZES = 1 << 9;
|
||||||
|
/// Store aggregated counts of cache hits per query invocation.
|
||||||
|
const QUERY_CACHE_HIT_COUNTS = 1 << 10;
|
||||||
|
|
||||||
const DEFAULT = Self::GENERIC_ACTIVITIES.bits() |
|
const DEFAULT = Self::GENERIC_ACTIVITIES.bits() |
|
||||||
Self::QUERY_PROVIDERS.bits() |
|
Self::QUERY_PROVIDERS.bits() |
|
||||||
Self::QUERY_BLOCKED.bits() |
|
Self::QUERY_BLOCKED.bits() |
|
||||||
Self::INCR_CACHE_LOADS.bits() |
|
Self::INCR_CACHE_LOADS.bits() |
|
||||||
Self::INCR_RESULT_HASHING.bits() |
|
Self::INCR_RESULT_HASHING.bits() |
|
||||||
Self::ARTIFACT_SIZES.bits();
|
Self::ARTIFACT_SIZES.bits() |
|
||||||
|
Self::QUERY_CACHE_HIT_COUNTS.bits();
|
||||||
|
|
||||||
const ARGS = Self::QUERY_KEYS.bits() | Self::FUNCTION_ARGS.bits();
|
const ARGS = Self::QUERY_KEYS.bits() | Self::FUNCTION_ARGS.bits();
|
||||||
|
const QUERY_CACHE_HIT_COMBINED = Self::QUERY_CACHE_HITS.bits() | Self::QUERY_CACHE_HIT_COUNTS.bits();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -134,6 +141,7 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
|
|||||||
("generic-activity", EventFilter::GENERIC_ACTIVITIES),
|
("generic-activity", EventFilter::GENERIC_ACTIVITIES),
|
||||||
("query-provider", EventFilter::QUERY_PROVIDERS),
|
("query-provider", EventFilter::QUERY_PROVIDERS),
|
||||||
("query-cache-hit", EventFilter::QUERY_CACHE_HITS),
|
("query-cache-hit", EventFilter::QUERY_CACHE_HITS),
|
||||||
|
("query-cache-hit-count", EventFilter::QUERY_CACHE_HITS),
|
||||||
("query-blocked", EventFilter::QUERY_BLOCKED),
|
("query-blocked", EventFilter::QUERY_BLOCKED),
|
||||||
("incr-cache-load", EventFilter::INCR_CACHE_LOADS),
|
("incr-cache-load", EventFilter::INCR_CACHE_LOADS),
|
||||||
("query-keys", EventFilter::QUERY_KEYS),
|
("query-keys", EventFilter::QUERY_KEYS),
|
||||||
@@ -411,13 +419,24 @@ impl SelfProfilerRef {
|
|||||||
#[inline(never)]
|
#[inline(never)]
|
||||||
#[cold]
|
#[cold]
|
||||||
fn cold_call(profiler_ref: &SelfProfilerRef, query_invocation_id: QueryInvocationId) {
|
fn cold_call(profiler_ref: &SelfProfilerRef, query_invocation_id: QueryInvocationId) {
|
||||||
profiler_ref.instant_query_event(
|
if profiler_ref.event_filter_mask.contains(EventFilter::QUERY_CACHE_HIT_COUNTS) {
|
||||||
|profiler| profiler.query_cache_hit_event_kind,
|
profiler_ref
|
||||||
query_invocation_id,
|
.profiler
|
||||||
);
|
.as_ref()
|
||||||
|
.unwrap()
|
||||||
|
.increment_query_cache_hit_counters(QueryInvocationId(query_invocation_id.0));
|
||||||
|
}
|
||||||
|
if unlikely(profiler_ref.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS)) {
|
||||||
|
profiler_ref.instant_query_event(
|
||||||
|
|profiler| profiler.query_cache_hit_event_kind,
|
||||||
|
query_invocation_id,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if unlikely(self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS)) {
|
// We check both kinds of query cache hit events at once, to reduce overhead in the
|
||||||
|
// common case (with self-profile disabled).
|
||||||
|
if unlikely(self.event_filter_mask.intersects(EventFilter::QUERY_CACHE_HIT_COMBINED)) {
|
||||||
cold_call(self, query_invocation_id);
|
cold_call(self, query_invocation_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -489,6 +508,30 @@ impl SelfProfilerRef {
|
|||||||
self.profiler.as_ref().map(|p| p.get_or_alloc_cached_string(s))
|
self.profiler.as_ref().map(|p| p.get_or_alloc_cached_string(s))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Store query cache hits to the self-profile log.
|
||||||
|
/// Should be called once at the end of the compilation session.
|
||||||
|
///
|
||||||
|
/// The cache hits are stored per **query invocation**, not **per query kind/type**.
|
||||||
|
/// `analyzeme` can later deduplicate individual query labels from the QueryInvocationId event
|
||||||
|
/// IDs.
|
||||||
|
pub fn store_query_cache_hits(&self) {
|
||||||
|
if self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HIT_COUNTS) {
|
||||||
|
let profiler = self.profiler.as_ref().unwrap();
|
||||||
|
let query_hits = profiler.query_hits.read();
|
||||||
|
let builder = EventIdBuilder::new(&profiler.profiler);
|
||||||
|
let thread_id = get_thread_id();
|
||||||
|
for (query_invocation, hit_count) in query_hits.iter().enumerate() {
|
||||||
|
let event_id = builder.from_label(StringId::new_virtual(query_invocation as u64));
|
||||||
|
profiler.profiler.record_integer_event(
|
||||||
|
profiler.query_cache_hit_count_event_kind,
|
||||||
|
event_id,
|
||||||
|
thread_id,
|
||||||
|
hit_count.load(Ordering::Relaxed),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn enabled(&self) -> bool {
|
pub fn enabled(&self) -> bool {
|
||||||
self.profiler.is_some()
|
self.profiler.is_some()
|
||||||
@@ -537,6 +580,19 @@ pub struct SelfProfiler {
|
|||||||
|
|
||||||
string_cache: RwLock<FxHashMap<String, StringId>>,
|
string_cache: RwLock<FxHashMap<String, StringId>>,
|
||||||
|
|
||||||
|
/// Recording individual query cache hits as "instant" measureme events
|
||||||
|
/// is incredibly expensive. Instead of doing that, we simply aggregate
|
||||||
|
/// cache hit *counts* per query invocation, and then store the final count
|
||||||
|
/// of cache hits per invocation at the end of the compilation session.
|
||||||
|
///
|
||||||
|
/// With this approach, we don't know the individual thread IDs and timestamps
|
||||||
|
/// of cache hits, but it has very little overhead on top of `-Zself-profile`.
|
||||||
|
/// Recording the cache hits as individual events made compilation 3-5x slower.
|
||||||
|
///
|
||||||
|
/// Query invocation IDs should be monotonic integers, so we can store them in a vec,
|
||||||
|
/// rather than using a hashmap.
|
||||||
|
query_hits: RwLock<Vec<AtomicU64>>,
|
||||||
|
|
||||||
query_event_kind: StringId,
|
query_event_kind: StringId,
|
||||||
generic_activity_event_kind: StringId,
|
generic_activity_event_kind: StringId,
|
||||||
incremental_load_result_event_kind: StringId,
|
incremental_load_result_event_kind: StringId,
|
||||||
@@ -544,6 +600,8 @@ pub struct SelfProfiler {
|
|||||||
query_blocked_event_kind: StringId,
|
query_blocked_event_kind: StringId,
|
||||||
query_cache_hit_event_kind: StringId,
|
query_cache_hit_event_kind: StringId,
|
||||||
artifact_size_event_kind: StringId,
|
artifact_size_event_kind: StringId,
|
||||||
|
/// Total cache hits per query invocation
|
||||||
|
query_cache_hit_count_event_kind: StringId,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SelfProfiler {
|
impl SelfProfiler {
|
||||||
@@ -573,6 +631,7 @@ impl SelfProfiler {
|
|||||||
let query_blocked_event_kind = profiler.alloc_string("QueryBlocked");
|
let query_blocked_event_kind = profiler.alloc_string("QueryBlocked");
|
||||||
let query_cache_hit_event_kind = profiler.alloc_string("QueryCacheHit");
|
let query_cache_hit_event_kind = profiler.alloc_string("QueryCacheHit");
|
||||||
let artifact_size_event_kind = profiler.alloc_string("ArtifactSize");
|
let artifact_size_event_kind = profiler.alloc_string("ArtifactSize");
|
||||||
|
let query_cache_hit_count_event_kind = profiler.alloc_string("QueryCacheHitCount");
|
||||||
|
|
||||||
let mut event_filter_mask = EventFilter::empty();
|
let mut event_filter_mask = EventFilter::empty();
|
||||||
|
|
||||||
@@ -618,6 +677,8 @@ impl SelfProfiler {
|
|||||||
query_blocked_event_kind,
|
query_blocked_event_kind,
|
||||||
query_cache_hit_event_kind,
|
query_cache_hit_event_kind,
|
||||||
artifact_size_event_kind,
|
artifact_size_event_kind,
|
||||||
|
query_cache_hit_count_event_kind,
|
||||||
|
query_hits: Default::default(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -627,6 +688,25 @@ impl SelfProfiler {
|
|||||||
self.profiler.alloc_string(s)
|
self.profiler.alloc_string(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Store a cache hit of a query invocation
|
||||||
|
pub fn increment_query_cache_hit_counters(&self, id: QueryInvocationId) {
|
||||||
|
// Fast path: assume that the query was already encountered before, and just record
|
||||||
|
// a cache hit.
|
||||||
|
let mut guard = self.query_hits.upgradable_read();
|
||||||
|
let query_hits = &guard;
|
||||||
|
let index = id.0 as usize;
|
||||||
|
if index < query_hits.len() {
|
||||||
|
// We only want to increment the count, no other synchronization is required
|
||||||
|
query_hits[index].fetch_add(1, Ordering::Relaxed);
|
||||||
|
} else {
|
||||||
|
// If not, we need to extend the query hit map to the highest observed ID
|
||||||
|
guard.with_upgraded(|vec| {
|
||||||
|
vec.resize_with(index + 1, || AtomicU64::new(0));
|
||||||
|
vec[index] = AtomicU64::from(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Gets a `StringId` for the given string. This method makes sure that
|
/// Gets a `StringId` for the given string. This method makes sure that
|
||||||
/// any strings going through it will only be allocated once in the
|
/// any strings going through it will only be allocated once in the
|
||||||
/// profiling data.
|
/// profiling data.
|
||||||
|
|||||||
@@ -259,4 +259,5 @@ pub fn alloc_self_profile_query_strings(tcx: TyCtxt<'_>) {
|
|||||||
for alloc in super::ALLOC_SELF_PROFILE_QUERY_STRINGS.iter() {
|
for alloc in super::ALLOC_SELF_PROFILE_QUERY_STRINGS.iter() {
|
||||||
alloc(tcx, &mut string_cache)
|
alloc(tcx, &mut string_cache)
|
||||||
}
|
}
|
||||||
|
tcx.sess.prof.store_query_cache_hits();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user