Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Format/Macros.h
35233 views
1
//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file contains the main building blocks of macro support in
11
/// clang-format.
12
///
13
/// In order to not violate the requirement that clang-format can format files
14
/// in isolation, clang-format's macro support uses expansions users provide
15
/// as part of clang-format's style configuration.
16
///
17
/// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support
18
/// one level of expansion (\see MacroExpander for a full description of what
19
/// is supported).
20
///
21
/// As part of parsing, clang-format uses the MacroExpander to expand the
22
/// spelled token streams into expanded token streams when it encounters a
23
/// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
24
/// from the expanded token stream.
25
/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
26
/// matches the spelled token stream into unwrapped lines that best resemble the
27
/// structure of the expanded unwrapped lines. These reconstructed unwrapped
28
/// lines are aliasing the tokens in the expanded token stream, so that token
29
/// annotations will be reused when formatting the spelled macro calls.
30
///
31
/// When formatting, clang-format annotates and formats the expanded unwrapped
32
/// lines first, determining the token types. Next, it formats the spelled
33
/// unwrapped lines, keeping the token types fixed, while allowing other
34
/// formatting decisions to change.
35
///
36
//===----------------------------------------------------------------------===//
37
38
#ifndef CLANG_LIB_FORMAT_MACROS_H
39
#define CLANG_LIB_FORMAT_MACROS_H
40
41
#include <list>
42
43
#include "FormatToken.h"
44
#include "llvm/ADT/DenseMap.h"
45
46
namespace clang {
47
namespace format {
48
49
struct UnwrappedLine;
50
struct UnwrappedLineNode;
51
52
/// Takes a set of macro definitions as strings and allows expanding calls to
53
/// those macros.
54
///
55
/// For example:
56
/// Definition: A(x, y)=x + y
57
/// Call : A(int a = 1, 2)
58
/// Expansion : int a = 1 + 2
59
///
60
/// Expansion does not check arity of the definition.
61
/// If fewer arguments than expected are provided, the remaining parameters
62
/// are considered empty:
63
/// Call : A(a)
64
/// Expansion: a +
65
/// If more arguments than expected are provided, they will be discarded.
66
///
67
/// The expander does not support:
68
/// - recursive expansion
69
/// - stringification
70
/// - concatenation
71
/// - variadic macros
72
///
73
/// Furthermore, only a single expansion of each macro argument is supported,
74
/// so that we cannot get conflicting formatting decisions from different
75
/// expansions.
76
/// Definition: A(x)=x+x
77
/// Call : A(id)
78
/// Expansion : id+x
79
///
80
class MacroExpander {
81
public:
82
using ArgsList = ArrayRef<SmallVector<FormatToken *, 8>>;
83
84
/// Construct a macro expander from a set of macro definitions.
85
/// Macro definitions must be encoded as UTF-8.
86
///
87
/// Each entry in \p Macros must conform to the following simple
88
/// macro-definition language:
89
/// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion>
90
/// <params> ::= <id-list> | ""
91
/// <id-list> ::= <id> | <id> "," <params>
92
/// <expansion> ::= "=" <tail> | <eof>
93
/// <tail> ::= <tok> <tail> | <eof>
94
///
95
/// Macros that cannot be parsed will be silently discarded.
96
///
97
MacroExpander(const std::vector<std::string> &Macros,
98
SourceManager &SourceMgr, const FormatStyle &Style,
99
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
100
IdentifierTable &IdentTable);
101
~MacroExpander();
102
103
/// Returns whether any macro \p Name is defined, regardless of overloads.
104
bool defined(StringRef Name) const;
105
106
/// Returns whetherh there is an object-like overload, i.e. where the macro
107
/// has no arguments and should not consume subsequent parentheses.
108
bool objectLike(StringRef Name) const;
109
110
/// Returns whether macro \p Name provides an overload with the given arity.
111
bool hasArity(StringRef Name, unsigned Arity) const;
112
113
/// Returns the expanded stream of format tokens for \p ID, where
114
/// each element in \p Args is a positional argument to the macro call.
115
/// If \p Args is not set, the object-like overload is used.
116
/// If \p Args is set, the overload with the arity equal to \c Args.size() is
117
/// used.
118
SmallVector<FormatToken *, 8>
119
expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const;
120
121
private:
122
struct Definition;
123
class DefinitionParser;
124
125
void parseDefinition(const std::string &Macro);
126
127
SourceManager &SourceMgr;
128
const FormatStyle &Style;
129
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
130
IdentifierTable &IdentTable;
131
SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
132
llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;
133
llvm::StringMap<Definition> ObjectLike;
134
};
135
136
/// Converts a sequence of UnwrappedLines containing expanded macros into a
137
/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be
138
/// broken into child lines, in a way that best conveys the structure of the
139
/// expanded code.
140
///
141
/// In the simplest case, a spelled UnwrappedLine contains one macro, and after
142
/// expanding it we have one expanded UnwrappedLine. In general, macro
143
/// expansions can span UnwrappedLines, and multiple macros can contribute
144
/// tokens to the same line. We keep consuming expanded lines until:
145
/// * all expansions that started have finished (we're not chopping any macros
146
/// in half)
147
/// * *and* we've reached the end of a *spelled* unwrapped line.
148
///
149
/// A single UnwrappedLine represents this chunk of code.
150
///
151
/// After this point, the state of the spelled/expanded stream is "in sync"
152
/// (both at the start of an UnwrappedLine, with no macros open), so the
153
/// Reconstructor can be thrown away and parsing can continue.
154
///
155
/// Given a mapping from the macro name identifier token in the macro call
156
/// to the tokens of the macro call, for example:
157
/// CLASSA -> CLASSA({public: void x();})
158
///
159
/// When getting the formatted lines of the expansion via the \c addLine method
160
/// (each '->' specifies a call to \c addLine ):
161
/// -> class A {
162
/// -> public:
163
/// -> void x();
164
/// -> };
165
///
166
/// Creates the tree of unwrapped lines containing the macro call tokens so that
167
/// the macro call tokens fit the semantic structure of the expanded formatted
168
/// lines:
169
/// -> CLASSA({
170
/// -> public:
171
/// -> void x();
172
/// -> })
173
class MacroCallReconstructor {
174
public:
175
/// Create an Reconstructor whose resulting \p UnwrappedLine will start at
176
/// \p Level, using the map from name identifier token to the corresponding
177
/// tokens of the spelled macro call.
178
MacroCallReconstructor(
179
unsigned Level,
180
const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
181
&ActiveExpansions);
182
183
/// For the given \p Line, match all occurences of tokens expanded from a
184
/// macro to unwrapped lines in the spelled macro call so that the resulting
185
/// tree of unwrapped lines best resembles the structure of unwrapped lines
186
/// passed in via \c addLine.
187
void addLine(const UnwrappedLine &Line);
188
189
/// Check whether at the current state there is no open macro expansion
190
/// that needs to be processed to finish an macro call.
191
/// Only when \c finished() is true, \c takeResult() can be called to retrieve
192
/// the resulting \c UnwrappedLine.
193
/// If there are multiple subsequent macro calls within an unwrapped line in
194
/// the spelled token stream, the calling code may also continue to call
195
/// \c addLine() when \c finished() is true.
196
bool finished() const { return ActiveExpansions.empty(); }
197
198
/// Retrieve the formatted \c UnwrappedLine containing the orginal
199
/// macro calls, formatted according to the expanded token stream received
200
/// via \c addLine().
201
/// Generally, this line tries to have the same structure as the expanded,
202
/// formatted unwrapped lines handed in via \c addLine(), with the exception
203
/// that for multiple top-level lines, each subsequent line will be the
204
/// child of the last token in its predecessor. This representation is chosen
205
/// because it is a precondition to the formatter that we get what looks like
206
/// a single statement in a single \c UnwrappedLine (i.e. matching parens).
207
///
208
/// If a token in a macro argument is a child of a token in the expansion,
209
/// the parent will be the corresponding token in the macro call.
210
/// For example:
211
/// #define C(a, b) class C { a b
212
/// C(int x;, int y;)
213
/// would expand to
214
/// class C { int x; int y;
215
/// where in a formatted line "int x;" and "int y;" would both be new separate
216
/// lines.
217
///
218
/// In the result, "int x;" will be a child of the opening parenthesis in "C("
219
/// and "int y;" will be a child of the "," token:
220
/// C (
221
/// \- int x;
222
/// ,
223
/// \- int y;
224
/// )
225
UnwrappedLine takeResult() &&;
226
227
private:
228
void add(FormatToken *Token, FormatToken *ExpandedParent, bool First,
229
unsigned Level);
230
void prepareParent(FormatToken *ExpandedParent, bool First, unsigned Level);
231
FormatToken *getParentInResult(FormatToken *Parent);
232
void reconstruct(FormatToken *Token);
233
void startReconstruction(FormatToken *Token);
234
bool reconstructActiveCallUntil(FormatToken *Token);
235
void endReconstruction(FormatToken *Token);
236
bool processNextReconstructed();
237
void finalize();
238
239
struct ReconstructedLine;
240
241
void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr);
242
UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);
243
void debug(const ReconstructedLine &Line, int Level);
244
ReconstructedLine &parentLine();
245
ReconstructedLine *currentLine();
246
void debugParentMap() const;
247
248
#ifndef NDEBUG
249
enum ReconstructorState {
250
Start, // No macro expansion was found in the input yet.
251
InProgress, // During a macro reconstruction.
252
Finalized, // Past macro reconstruction, the result is finalized.
253
};
254
ReconstructorState State = Start;
255
#endif
256
257
// Node in which we build up the resulting unwrapped line; this type is
258
// analogous to UnwrappedLineNode.
259
struct LineNode {
260
LineNode() = default;
261
LineNode(FormatToken *Tok) : Tok(Tok) {}
262
FormatToken *Tok = nullptr;
263
SmallVector<std::unique_ptr<ReconstructedLine>> Children;
264
};
265
266
// Line in which we build up the resulting unwrapped line.
267
// FIXME: Investigate changing UnwrappedLine to a pointer type and using it
268
// instead of rolling our own type.
269
struct ReconstructedLine {
270
explicit ReconstructedLine(unsigned Level) : Level(Level) {}
271
unsigned Level;
272
SmallVector<std::unique_ptr<LineNode>> Tokens;
273
};
274
275
// The line in which we collect the resulting reconstructed output.
276
// To reduce special cases in the algorithm, the first level of the line
277
// contains a single null token that has the reconstructed incoming
278
// lines as children.
279
// In the end, we stich the lines together so that each subsequent line
280
// is a child of the last token of the previous line. This is necessary
281
// in order to format the overall expression as a single logical line -
282
// if we created separate lines, we'd format them with their own top-level
283
// indent depending on the semantic structure, which is not desired.
284
ReconstructedLine Result;
285
286
// Stack of currently "open" lines, where each line's predecessor's last
287
// token is the parent token for that line.
288
SmallVector<ReconstructedLine *> ActiveReconstructedLines;
289
290
// Maps from the expanded token to the token that takes its place in the
291
// reconstructed token stream in terms of parent-child relationships.
292
// Note that it might take multiple steps to arrive at the correct
293
// parent in the output.
294
// Given: #define C(a, b) []() { a; b; }
295
// And a call: C(f(), g())
296
// The structure in the incoming formatted unwrapped line will be:
297
// []() {
298
// |- f();
299
// \- g();
300
// }
301
// with f and g being children of the opening brace.
302
// In the reconstructed call:
303
// C(f(), g())
304
// \- f()
305
// \- g()
306
// We want f to be a child of the opening parenthesis and g to be a child
307
// of the comma token in the macro call.
308
// Thus, we map
309
// { -> (
310
// and add
311
// ( -> ,
312
// once we're past the comma in the reconstruction.
313
llvm::DenseMap<FormatToken *, FormatToken *>
314
SpelledParentToReconstructedParent;
315
316
// Keeps track of a single expansion while we're reconstructing tokens it
317
// generated.
318
struct Expansion {
319
// The identifier token of the macro call.
320
FormatToken *ID;
321
// Our current position in the reconstruction.
322
std::list<UnwrappedLineNode>::iterator SpelledI;
323
// The end of the reconstructed token sequence.
324
std::list<UnwrappedLineNode>::iterator SpelledE;
325
};
326
327
// Stack of macro calls for which we're in the middle of an expansion.
328
SmallVector<Expansion> ActiveExpansions;
329
330
struct MacroCallState {
331
MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken,
332
FormatToken *MacroCallLParen);
333
334
ReconstructedLine *Line;
335
336
// The last token in the parent line or expansion, or nullptr if the macro
337
// expansion is on a top-level line.
338
//
339
// For example, in the macro call:
340
// auto f = []() { ID(1); };
341
// The MacroCallState for ID will have '{' as ParentLastToken.
342
//
343
// In the macro call:
344
// ID(ID(void f()));
345
// The MacroCallState of the outer ID will have nullptr as ParentLastToken,
346
// while the MacroCallState for the inner ID will have the '(' of the outer
347
// ID as ParentLastToken.
348
//
349
// In the macro call:
350
// ID2(a, ID(b));
351
// The MacroCallState of ID will have ',' as ParentLastToken.
352
FormatToken *ParentLastToken;
353
354
// The l_paren of this MacroCallState's macro call.
355
FormatToken *MacroCallLParen;
356
};
357
358
// Keeps track of the lines into which the opening brace/parenthesis &
359
// argument separating commas for each level in the macro call go in order to
360
// put the corresponding closing brace/parenthesis into the same line in the
361
// output and keep track of which parents in the expanded token stream map to
362
// which tokens in the reconstructed stream.
363
// When an opening brace/parenthesis has children, we want the structure of
364
// the output line to be:
365
// |- MACRO
366
// |- (
367
// | \- <argument>
368
// |- ,
369
// | \- <argument>
370
// \- )
371
SmallVector<MacroCallState> MacroCallStructure;
372
373
// Maps from identifier of the macro call to an unwrapped line containing
374
// all tokens of the macro call.
375
const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
376
&IdToReconstructed;
377
};
378
379
} // namespace format
380
} // namespace clang
381
382
#endif
383
384