Skip to content

Commit da83217

Browse files
committed
Auto merge of #139281 - petrochenkov:ctxtdecod6, r=wesleywiser
hygiene: Avoid recursion in syntax context decoding #139241 has two components - Avoiding recursion during syntax context decoding - Encoding/decoding only the non-redundant data, and recalculating the redundant data again during decoding Both of these parts may influence compilation times, possibly in opposite directions. So this PR contains only the first part to evaluate its effect in isolation.
2 parents 1e008dd + fef3cf0 commit da83217

File tree

4 files changed

+67
-103
lines changed

4 files changed

+67
-103
lines changed

compiler/rustc_metadata/src/rmeta/mod.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ use rustc_serialize::opaque::FileEncoder;
3636
use rustc_session::config::{SymbolManglingVersion, TargetModifier};
3737
use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib};
3838
use rustc_span::edition::Edition;
39-
use rustc_span::hygiene::{ExpnIndex, MacroKind, SyntaxContextData};
39+
use rustc_span::hygiene::{
40+
ExpnIndex, MacroKind, SyntaxContextDataNonRecursive as SyntaxContextData,
41+
};
4042
use rustc_span::{self, ExpnData, ExpnHash, ExpnId, Ident, Span, Symbol};
4143
use rustc_target::spec::{PanicStrategy, TargetTuple};
4244
use table::TableBuilder;

compiler/rustc_middle/src/query/on_disk_cache.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixed
1616
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
1717
use rustc_session::Session;
1818
use rustc_span::hygiene::{
19-
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
19+
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext,
20+
SyntaxContextDataNonRecursive as SyntaxContextData,
2021
};
2122
use rustc_span::source_map::Spanned;
2223
use rustc_span::{

compiler/rustc_middle/src/ty/parameterized.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ trivially_parameterized_over_tcx! {
113113
rustc_span::Span,
114114
rustc_span::Symbol,
115115
rustc_span::def_id::DefPathHash,
116-
rustc_span::hygiene::SyntaxContextData,
116+
rustc_span::hygiene::SyntaxContextDataNonRecursive,
117117
rustc_span::Ident,
118118
rustc_type_ir::Variance,
119119
rustc_hir::Attribute,

compiler/rustc_span/src/hygiene.rs

+61-100
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,14 @@
2424
// because getting it wrong can lead to nested `HygieneData::with` calls that
2525
// trigger runtime aborts. (Fortunately these are obvious and easy to fix.)
2626

27-
use std::cell::RefCell;
28-
use std::collections::hash_map::Entry;
29-
use std::collections::hash_set::Entry as SetEntry;
3027
use std::hash::Hash;
3128
use std::sync::Arc;
3229
use std::{fmt, iter, mem};
3330

3431
use rustc_data_structures::fingerprint::Fingerprint;
3532
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
3633
use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
37-
use rustc_data_structures::sync::{Lock, WorkerLocal};
34+
use rustc_data_structures::sync::Lock;
3835
use rustc_data_structures::unhash::UnhashMap;
3936
use rustc_hashes::Hash64;
4037
use rustc_index::IndexVec;
@@ -61,8 +58,8 @@ impl !PartialOrd for SyntaxContext {}
6158
/// The other fields are only for caching.
6259
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
6360

64-
#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
65-
pub struct SyntaxContextData {
61+
#[derive(Clone, Copy, Debug)]
62+
struct SyntaxContextData {
6663
outer_expn: ExpnId,
6764
outer_transparency: Transparency,
6865
parent: SyntaxContext,
@@ -74,6 +71,17 @@ pub struct SyntaxContextData {
7471
dollar_crate_name: Symbol,
7572
}
7673

74+
/// Same as `SyntaxContextData`, but `opaque(_and_semitransparent)` cannot be recursive
75+
/// and use `None` if they need to refer to self. Used for encoding and decoding metadata.
76+
#[derive(Encodable, Decodable)]
77+
pub struct SyntaxContextDataNonRecursive {
78+
outer_expn: ExpnId,
79+
outer_transparency: Transparency,
80+
parent: SyntaxContext,
81+
opaque: Option<SyntaxContext>,
82+
opaque_and_semitransparent: Option<SyntaxContext>,
83+
}
84+
7785
impl SyntaxContextData {
7886
fn new(
7987
(parent, outer_expn, outer_transparency): SyntaxContextKey,
@@ -114,6 +122,19 @@ impl SyntaxContextData {
114122
}
115123
}
116124

125+
impl SyntaxContextDataNonRecursive {
126+
fn recursive(&self, ctxt: SyntaxContext) -> SyntaxContextData {
127+
SyntaxContextData {
128+
outer_expn: self.outer_expn,
129+
outer_transparency: self.outer_transparency,
130+
parent: self.parent,
131+
opaque: self.opaque.unwrap_or(ctxt),
132+
opaque_and_semitransparent: self.opaque_and_semitransparent.unwrap_or(ctxt),
133+
dollar_crate_name: kw::DollarCrate,
134+
}
135+
}
136+
}
137+
117138
rustc_index::newtype_index! {
118139
/// A unique ID associated with a macro invocation and expansion.
119140
#[orderable]
@@ -637,6 +658,19 @@ impl HygieneData {
637658
SyntaxContextData::new(key, opaque, opaque_and_semitransparent);
638659
ctxt
639660
}
661+
662+
fn non_recursive_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContextDataNonRecursive {
663+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
664+
let data = &self.syntax_context_data[ctxt.0 as usize];
665+
SyntaxContextDataNonRecursive {
666+
outer_expn: data.outer_expn,
667+
outer_transparency: data.outer_transparency,
668+
parent: data.parent,
669+
opaque: (data.opaque != ctxt).then_some(data.opaque),
670+
opaque_and_semitransparent: (data.opaque_and_semitransparent != ctxt)
671+
.then_some(data.opaque_and_semitransparent),
672+
}
673+
}
640674
}
641675

642676
pub fn walk_chain(span: Span, to: SyntaxContext) -> Span {
@@ -1266,7 +1300,7 @@ impl HygieneEncodeContext {
12661300
pub fn encode<T>(
12671301
&self,
12681302
encoder: &mut T,
1269-
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextData),
1303+
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextDataNonRecursive),
12701304
mut encode_expn: impl FnMut(&mut T, ExpnId, &ExpnData, ExpnHash),
12711305
) {
12721306
// When we serialize a `SyntaxContextData`, we may end up serializing
@@ -1315,18 +1349,12 @@ struct HygieneDecodeContextInner {
13151349
// so that multiple occurrences of the same serialized id are decoded to the same
13161350
// `SyntaxContext`. This only stores `SyntaxContext`s which are completely decoded.
13171351
remapped_ctxts: Vec<Option<SyntaxContext>>,
1318-
1319-
/// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
1320-
decoding: FxHashMap<u32, SyntaxContext>,
13211352
}
13221353

13231354
#[derive(Default)]
13241355
/// Additional information used to assist in decoding hygiene data
13251356
pub struct HygieneDecodeContext {
13261357
inner: Lock<HygieneDecodeContextInner>,
1327-
1328-
/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
1329-
local_in_progress: WorkerLocal<RefCell<FxHashSet<u32>>>,
13301358
}
13311359

13321360
/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
@@ -1397,7 +1425,10 @@ pub fn decode_expn_id(
13971425
// to track which `SyntaxContext`s we have already decoded.
13981426
// The provided closure will be invoked to deserialize a `SyntaxContextData`
13991427
// if we haven't already seen the id of the `SyntaxContext` we are deserializing.
1400-
pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>(
1428+
pub fn decode_syntax_context<
1429+
D: Decoder,
1430+
F: FnOnce(&mut D, u32) -> SyntaxContextDataNonRecursive,
1431+
>(
14011432
d: &mut D,
14021433
context: &HygieneDecodeContext,
14031434
decode_data: F,
@@ -1409,113 +1440,43 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
14091440
return SyntaxContext::root();
14101441
}
14111442

1412-
let pending_ctxt = {
1413-
let mut inner = context.inner.lock();
1414-
1415-
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
1416-
// raw ids from different crate metadatas.
1417-
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
1418-
// This has already been decoded.
1419-
return ctxt;
1420-
}
1421-
1422-
match inner.decoding.entry(raw_id) {
1423-
Entry::Occupied(ctxt_entry) => {
1424-
let pending_ctxt = *ctxt_entry.get();
1425-
match context.local_in_progress.borrow_mut().entry(raw_id) {
1426-
// We're decoding this already on the current thread. Return here and let the
1427-
// function higher up the stack finish decoding to handle recursive cases.
1428-
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
1429-
// during reminder of the decoding process, it's certainly not ok after the
1430-
// top level decoding function returns.
1431-
SetEntry::Occupied(..) => return pending_ctxt,
1432-
// Some other thread is currently decoding this.
1433-
// Race with it (alternatively we could wait here).
1434-
// We cannot return this value, unlike in the recursive case above, because it
1435-
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
1436-
SetEntry::Vacant(entry) => {
1437-
entry.insert();
1438-
pending_ctxt
1439-
}
1440-
}
1441-
}
1442-
Entry::Vacant(entry) => {
1443-
// We are the first thread to start decoding. Mark the current thread as being
1444-
// progress.
1445-
context.local_in_progress.borrow_mut().insert(raw_id);
1446-
1447-
// Allocate and store SyntaxContext id *before* calling the decoder function,
1448-
// as the SyntaxContextData may reference itself.
1449-
let new_ctxt = HygieneData::with(|hygiene_data| {
1450-
// Push a dummy SyntaxContextData to ensure that nobody else can get the
1451-
// same ID as us. This will be overwritten after call `decode_data`.
1452-
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
1453-
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
1454-
});
1455-
entry.insert(new_ctxt);
1456-
new_ctxt
1457-
}
1458-
}
1459-
};
1443+
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
1444+
// raw ids from different crate metadatas.
1445+
if let Some(ctxt) = context.inner.lock().remapped_ctxts.get(raw_id as usize).copied().flatten()
1446+
{
1447+
// This has already been decoded.
1448+
return ctxt;
1449+
}
14601450

14611451
// Don't try to decode data while holding the lock, since we need to
14621452
// be able to recursively decode a SyntaxContext
14631453
let ctxt_data = decode_data(d, raw_id);
1464-
let ctxt_key = ctxt_data.key();
14651454

14661455
let ctxt = HygieneData::with(|hygiene_data| {
1467-
match hygiene_data.syntax_context_map.get(&ctxt_key) {
1468-
// Ensure that syntax contexts are unique.
1469-
// If syntax contexts with the given key already exists, reuse it instead of
1470-
// using `pending_ctxt`.
1471-
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
1472-
// Hopefully its value isn't stored anywhere during decoding and its dummy data
1473-
// is never accessed later. The `is_decode_placeholder` asserts on all
1474-
// accesses to syntax context data attempt to ensure it.
1475-
Some(&ctxt) => ctxt,
1476-
// This is a completely new context.
1477-
// Overwrite its placeholder data with our decoded data.
1478-
None => {
1479-
let ctxt_data_ref =
1480-
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
1481-
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
1482-
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
1483-
// We don't care what the encoding crate set this to - we want to resolve it
1484-
// from the perspective of the current compilation session.
1485-
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
1486-
// Make sure nothing weird happened while `decode_data` was running.
1487-
if !prev_ctxt_data.is_decode_placeholder() {
1488-
// Another thread may have already inserted the decoded data,
1489-
// but the decoded data should match.
1490-
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
1491-
}
1492-
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
1493-
pending_ctxt
1494-
}
1495-
}
1456+
let ctxt_key = (ctxt_data.parent, ctxt_data.outer_expn, ctxt_data.outer_transparency);
1457+
*hygiene_data.syntax_context_map.entry(ctxt_key).or_insert_with(|| {
1458+
let ctxt = SyntaxContext::from_usize(hygiene_data.syntax_context_data.len());
1459+
hygiene_data.syntax_context_data.push(ctxt_data.recursive(ctxt));
1460+
ctxt
1461+
})
14961462
});
14971463

1498-
// Mark the context as completed
1499-
context.local_in_progress.borrow_mut().remove(&raw_id);
1500-
15011464
let mut inner = context.inner.lock();
15021465
let new_len = raw_id as usize + 1;
15031466
if inner.remapped_ctxts.len() < new_len {
15041467
inner.remapped_ctxts.resize(new_len, None);
15051468
}
15061469
inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
1507-
inner.decoding.remove(&raw_id);
15081470

15091471
ctxt
15101472
}
15111473

1512-
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(
1474+
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextDataNonRecursive)>(
15131475
ctxts: impl Iterator<Item = SyntaxContext>,
15141476
mut f: F,
15151477
) {
1516-
let all_data: Vec<_> = HygieneData::with(|data| {
1517-
ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect()
1518-
});
1478+
let all_data: Vec<_> =
1479+
HygieneData::with(|data| ctxts.map(|ctxt| (ctxt, data.non_recursive_ctxt(ctxt))).collect());
15191480
for (ctxt, data) in all_data.into_iter() {
15201481
f(ctxt.0, ctxt, &data);
15211482
}

0 commit comments

Comments
 (0)