Skip to content

Commit fef3cf0

Browse files
committed
hygiene: Avoid recursion in syntax context decoding
1 parent 4f0de4c commit fef3cf0

File tree

4 files changed

+67
-103
lines changed

4 files changed

+67
-103
lines changed

compiler/rustc_metadata/src/rmeta/mod.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ use rustc_serialize::opaque::FileEncoder;
3636
use rustc_session::config::{SymbolManglingVersion, TargetModifier};
3737
use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib};
3838
use rustc_span::edition::Edition;
39-
use rustc_span::hygiene::{ExpnIndex, MacroKind, SyntaxContextData};
39+
use rustc_span::hygiene::{
40+
ExpnIndex, MacroKind, SyntaxContextDataNonRecursive as SyntaxContextData,
41+
};
4042
use rustc_span::{self, ExpnData, ExpnHash, ExpnId, Ident, Span, Symbol};
4143
use rustc_target::spec::{PanicStrategy, TargetTuple};
4244
use table::TableBuilder;

compiler/rustc_middle/src/query/on_disk_cache.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixed
1616
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
1717
use rustc_session::Session;
1818
use rustc_span::hygiene::{
19-
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
19+
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext,
20+
SyntaxContextDataNonRecursive as SyntaxContextData,
2021
};
2122
use rustc_span::source_map::Spanned;
2223
use rustc_span::{

compiler/rustc_middle/src/ty/parameterized.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ trivially_parameterized_over_tcx! {
113113
rustc_span::Span,
114114
rustc_span::Symbol,
115115
rustc_span::def_id::DefPathHash,
116-
rustc_span::hygiene::SyntaxContextData,
116+
rustc_span::hygiene::SyntaxContextDataNonRecursive,
117117
rustc_span::Ident,
118118
rustc_type_ir::Variance,
119119
rustc_hir::Attribute,

compiler/rustc_span/src/hygiene.rs

+61-100
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,14 @@
2424
// because getting it wrong can lead to nested `HygieneData::with` calls that
2525
// trigger runtime aborts. (Fortunately these are obvious and easy to fix.)
2626

27-
use std::cell::RefCell;
28-
use std::collections::hash_map::Entry;
29-
use std::collections::hash_set::Entry as SetEntry;
3027
use std::hash::Hash;
3128
use std::sync::Arc;
3229
use std::{fmt, iter, mem};
3330

3431
use rustc_data_structures::fingerprint::Fingerprint;
3532
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
3633
use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
37-
use rustc_data_structures::sync::{Lock, WorkerLocal};
34+
use rustc_data_structures::sync::Lock;
3835
use rustc_data_structures::unhash::UnhashMap;
3936
use rustc_hashes::Hash64;
4037
use rustc_index::IndexVec;
@@ -61,8 +58,8 @@ impl !PartialOrd for SyntaxContext {}
6158
/// The other fields are only for caching.
6259
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
6360

64-
#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
65-
pub struct SyntaxContextData {
61+
#[derive(Clone, Copy, Debug)]
62+
struct SyntaxContextData {
6663
outer_expn: ExpnId,
6764
outer_transparency: Transparency,
6865
parent: SyntaxContext,
@@ -74,6 +71,17 @@ pub struct SyntaxContextData {
7471
dollar_crate_name: Symbol,
7572
}
7673

74+
/// Same as `SyntaxContextData`, but `opaque(_and_semitransparent)` cannot be recursive
75+
/// and use `None` if they need to refer to self. Used for encoding and decoding metadata.
76+
#[derive(Encodable, Decodable)]
77+
pub struct SyntaxContextDataNonRecursive {
78+
outer_expn: ExpnId,
79+
outer_transparency: Transparency,
80+
parent: SyntaxContext,
81+
opaque: Option<SyntaxContext>,
82+
opaque_and_semitransparent: Option<SyntaxContext>,
83+
}
84+
7785
impl SyntaxContextData {
7886
fn new(
7987
(parent, outer_expn, outer_transparency): SyntaxContextKey,
@@ -114,6 +122,19 @@ impl SyntaxContextData {
114122
}
115123
}
116124

125+
impl SyntaxContextDataNonRecursive {
126+
fn recursive(&self, ctxt: SyntaxContext) -> SyntaxContextData {
127+
SyntaxContextData {
128+
outer_expn: self.outer_expn,
129+
outer_transparency: self.outer_transparency,
130+
parent: self.parent,
131+
opaque: self.opaque.unwrap_or(ctxt),
132+
opaque_and_semitransparent: self.opaque_and_semitransparent.unwrap_or(ctxt),
133+
dollar_crate_name: kw::DollarCrate,
134+
}
135+
}
136+
}
137+
117138
rustc_index::newtype_index! {
118139
/// A unique ID associated with a macro invocation and expansion.
119140
#[orderable]
@@ -637,6 +658,19 @@ impl HygieneData {
637658
SyntaxContextData::new(key, opaque, opaque_and_semitransparent);
638659
ctxt
639660
}
661+
662+
fn non_recursive_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContextDataNonRecursive {
663+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
664+
let data = &self.syntax_context_data[ctxt.0 as usize];
665+
SyntaxContextDataNonRecursive {
666+
outer_expn: data.outer_expn,
667+
outer_transparency: data.outer_transparency,
668+
parent: data.parent,
669+
opaque: (data.opaque != ctxt).then_some(data.opaque),
670+
opaque_and_semitransparent: (data.opaque_and_semitransparent != ctxt)
671+
.then_some(data.opaque_and_semitransparent),
672+
}
673+
}
640674
}
641675

642676
pub fn walk_chain(span: Span, to: SyntaxContext) -> Span {
@@ -1266,7 +1300,7 @@ impl HygieneEncodeContext {
12661300
pub fn encode<T>(
12671301
&self,
12681302
encoder: &mut T,
1269-
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextData),
1303+
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextDataNonRecursive),
12701304
mut encode_expn: impl FnMut(&mut T, ExpnId, &ExpnData, ExpnHash),
12711305
) {
12721306
// When we serialize a `SyntaxContextData`, we may end up serializing
@@ -1315,18 +1349,12 @@ struct HygieneDecodeContextInner {
13151349
// so that multiple occurrences of the same serialized id are decoded to the same
13161350
// `SyntaxContext`. This only stores `SyntaxContext`s which are completely decoded.
13171351
remapped_ctxts: Vec<Option<SyntaxContext>>,
1318-
1319-
/// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
1320-
decoding: FxHashMap<u32, SyntaxContext>,
13211352
}
13221353

13231354
#[derive(Default)]
13241355
/// Additional information used to assist in decoding hygiene data
13251356
pub struct HygieneDecodeContext {
13261357
inner: Lock<HygieneDecodeContextInner>,
1327-
1328-
/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
1329-
local_in_progress: WorkerLocal<RefCell<FxHashSet<u32>>>,
13301358
}
13311359

13321360
/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
@@ -1397,7 +1425,10 @@ pub fn decode_expn_id(
13971425
// to track which `SyntaxContext`s we have already decoded.
13981426
// The provided closure will be invoked to deserialize a `SyntaxContextData`
13991427
// if we haven't already seen the id of the `SyntaxContext` we are deserializing.
1400-
pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>(
1428+
pub fn decode_syntax_context<
1429+
D: Decoder,
1430+
F: FnOnce(&mut D, u32) -> SyntaxContextDataNonRecursive,
1431+
>(
14011432
d: &mut D,
14021433
context: &HygieneDecodeContext,
14031434
decode_data: F,
@@ -1409,113 +1440,43 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
14091440
return SyntaxContext::root();
14101441
}
14111442

1412-
let pending_ctxt = {
1413-
let mut inner = context.inner.lock();
1414-
1415-
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
1416-
// raw ids from different crate metadatas.
1417-
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
1418-
// This has already been decoded.
1419-
return ctxt;
1420-
}
1421-
1422-
match inner.decoding.entry(raw_id) {
1423-
Entry::Occupied(ctxt_entry) => {
1424-
let pending_ctxt = *ctxt_entry.get();
1425-
match context.local_in_progress.borrow_mut().entry(raw_id) {
1426-
// We're decoding this already on the current thread. Return here and let the
1427-
// function higher up the stack finish decoding to handle recursive cases.
1428-
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
1429-
// during reminder of the decoding process, it's certainly not ok after the
1430-
// top level decoding function returns.
1431-
SetEntry::Occupied(..) => return pending_ctxt,
1432-
// Some other thread is currently decoding this.
1433-
// Race with it (alternatively we could wait here).
1434-
// We cannot return this value, unlike in the recursive case above, because it
1435-
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
1436-
SetEntry::Vacant(entry) => {
1437-
entry.insert();
1438-
pending_ctxt
1439-
}
1440-
}
1441-
}
1442-
Entry::Vacant(entry) => {
1443-
// We are the first thread to start decoding. Mark the current thread as being
1444-
// progress.
1445-
context.local_in_progress.borrow_mut().insert(raw_id);
1446-
1447-
// Allocate and store SyntaxContext id *before* calling the decoder function,
1448-
// as the SyntaxContextData may reference itself.
1449-
let new_ctxt = HygieneData::with(|hygiene_data| {
1450-
// Push a dummy SyntaxContextData to ensure that nobody else can get the
1451-
// same ID as us. This will be overwritten after call `decode_data`.
1452-
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
1453-
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
1454-
});
1455-
entry.insert(new_ctxt);
1456-
new_ctxt
1457-
}
1458-
}
1459-
};
1443+
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
1444+
// raw ids from different crate metadatas.
1445+
if let Some(ctxt) = context.inner.lock().remapped_ctxts.get(raw_id as usize).copied().flatten()
1446+
{
1447+
// This has already been decoded.
1448+
return ctxt;
1449+
}
14601450

14611451
// Don't try to decode data while holding the lock, since we need to
14621452
// be able to recursively decode a SyntaxContext
14631453
let ctxt_data = decode_data(d, raw_id);
1464-
let ctxt_key = ctxt_data.key();
14651454

14661455
let ctxt = HygieneData::with(|hygiene_data| {
1467-
match hygiene_data.syntax_context_map.get(&ctxt_key) {
1468-
// Ensure that syntax contexts are unique.
1469-
// If syntax contexts with the given key already exists, reuse it instead of
1470-
// using `pending_ctxt`.
1471-
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
1472-
// Hopefully its value isn't stored anywhere during decoding and its dummy data
1473-
// is never accessed later. The `is_decode_placeholder` asserts on all
1474-
// accesses to syntax context data attempt to ensure it.
1475-
Some(&ctxt) => ctxt,
1476-
// This is a completely new context.
1477-
// Overwrite its placeholder data with our decoded data.
1478-
None => {
1479-
let ctxt_data_ref =
1480-
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
1481-
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
1482-
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
1483-
// We don't care what the encoding crate set this to - we want to resolve it
1484-
// from the perspective of the current compilation session.
1485-
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
1486-
// Make sure nothing weird happened while `decode_data` was running.
1487-
if !prev_ctxt_data.is_decode_placeholder() {
1488-
// Another thread may have already inserted the decoded data,
1489-
// but the decoded data should match.
1490-
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
1491-
}
1492-
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
1493-
pending_ctxt
1494-
}
1495-
}
1456+
let ctxt_key = (ctxt_data.parent, ctxt_data.outer_expn, ctxt_data.outer_transparency);
1457+
*hygiene_data.syntax_context_map.entry(ctxt_key).or_insert_with(|| {
1458+
let ctxt = SyntaxContext::from_usize(hygiene_data.syntax_context_data.len());
1459+
hygiene_data.syntax_context_data.push(ctxt_data.recursive(ctxt));
1460+
ctxt
1461+
})
14961462
});
14971463

1498-
// Mark the context as completed
1499-
context.local_in_progress.borrow_mut().remove(&raw_id);
1500-
15011464
let mut inner = context.inner.lock();
15021465
let new_len = raw_id as usize + 1;
15031466
if inner.remapped_ctxts.len() < new_len {
15041467
inner.remapped_ctxts.resize(new_len, None);
15051468
}
15061469
inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
1507-
inner.decoding.remove(&raw_id);
15081470

15091471
ctxt
15101472
}
15111473

1512-
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(
1474+
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextDataNonRecursive)>(
15131475
ctxts: impl Iterator<Item = SyntaxContext>,
15141476
mut f: F,
15151477
) {
1516-
let all_data: Vec<_> = HygieneData::with(|data| {
1517-
ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect()
1518-
});
1478+
let all_data: Vec<_> =
1479+
HygieneData::with(|data| ctxts.map(|ctxt| (ctxt, data.non_recursive_ctxt(ctxt))).collect());
15191480
for (ctxt, data) in all_data.into_iter() {
15201481
f(ctxt.0, ctxt, &data);
15211482
}

0 commit comments

Comments
 (0)