Bug Summary

File:build/libcpp/lex.cc
Warning:line 2096, column 8
4th function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-suse-linux -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name lex.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/build-x86_64-pc-linux-gnu/libcpp -resource-dir /usr/lib64/clang/15.0.7 -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp -I . -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/../include -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/include -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp -I . -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/../include -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/include -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13 -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/x86_64-suse-linux -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/backward -internal-isystem /usr/lib64/clang/15.0.7/include -internal-isystem /usr/local/include -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/13/../../../../x86_64-suse-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-narrowing -Wwrite-strings -Wno-long-long -fdeprecated-macro -fdebug-compilation-dir=/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/build-x86_64-pc-linux-gnu/libcpp -ferror-limit 19 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=plist-html -analyzer-config silence-checkers=core.NullDereference -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /buildworker/marxinbox-gcc-clang-static-analyzer/objdir/clang-static-analyzer/2023-03-27-141847-20772-1/report-r2aRrJ.plist -x c++ /buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000-2023 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "cpplib.h"
25#include "internal.h"
26
27enum spell_type
28{
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
33};
34
35struct token_spelling
36{
37 enum spell_type category;
38 const unsigned char *name;
39};
40
41static const unsigned char *const digraph_spellings[] =
42{ UC(const uchar *)"%:", UC(const uchar *)"%:%:", UC(const uchar *)"<:", UC(const uchar *)":>", UC(const uchar *)"<%", UC(const uchar *)"%>" };
43
44#define OP(e, s) { SPELL_OPERATOR, UC(const uchar *) s },
45#define TK(e, s) { SPELL_ ## s, UC(const uchar *) #e },
46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLEOP(EQ, "=") OP(NOT, "!") OP(GREATER, ">") OP(LESS, "<")
OP(PLUS, "+") OP(MINUS, "-") OP(MULT, "*") OP(DIV, "/") OP(MOD
, "%") OP(AND, "&") OP(OR, "|") OP(XOR, "^") OP(RSHIFT, ">>"
) OP(LSHIFT, "<<") OP(COMPL, "~") OP(AND_AND, "&&"
) OP(OR_OR, "||") OP(QUERY, "?") OP(COLON, ":") OP(COMMA, ","
) OP(OPEN_PAREN, "(") OP(CLOSE_PAREN, ")") TK((-1), NONE) OP(
EQ_EQ, "==") OP(NOT_EQ, "!=") OP(GREATER_EQ, ">=") OP(LESS_EQ
, "<=") OP(SPACESHIP, "<=>") OP(PLUS_EQ, "+=") OP(MINUS_EQ
, "-=") OP(MULT_EQ, "*=") OP(DIV_EQ, "/=") OP(MOD_EQ, "%=") OP
(AND_EQ, "&=") OP(OR_EQ, "|=") OP(XOR_EQ, "^=") OP(RSHIFT_EQ
, ">>=") OP(LSHIFT_EQ, "<<=") OP(HASH, "#") OP(PASTE
, "##") OP(OPEN_SQUARE, "[") OP(CLOSE_SQUARE, "]") OP(OPEN_BRACE
, "{") OP(CLOSE_BRACE, "}") OP(SEMICOLON, ";") OP(ELLIPSIS, "..."
) OP(PLUS_PLUS, "++") OP(MINUS_MINUS, "--") OP(DEREF, "->"
) OP(DOT, ".") OP(SCOPE, "::") OP(DEREF_STAR, "->*") OP(DOT_STAR
, ".*") OP(ATSIGN, "@") TK(NAME, IDENT) TK(AT_NAME, IDENT) TK
(NUMBER, LITERAL) TK(CHAR, LITERAL) TK(WCHAR, LITERAL) TK(CHAR16
, LITERAL) TK(CHAR32, LITERAL) TK(UTF8CHAR, LITERAL) TK(OTHER
, LITERAL) TK(STRING, LITERAL) TK(WSTRING, LITERAL) TK(STRING16
, LITERAL) TK(STRING32, LITERAL) TK(UTF8STRING, LITERAL) TK(OBJC_STRING
, LITERAL) TK(HEADER_NAME, LITERAL) TK(CHAR_USERDEF, LITERAL)
TK(WCHAR_USERDEF, LITERAL) TK(CHAR16_USERDEF, LITERAL) TK(CHAR32_USERDEF
, LITERAL) TK(UTF8CHAR_USERDEF, LITERAL) TK(STRING_USERDEF, LITERAL
) TK(WSTRING_USERDEF, LITERAL) TK(STRING16_USERDEF, LITERAL) TK
(STRING32_USERDEF, LITERAL) TK(UTF8STRING_USERDEF,LITERAL) TK
(COMMENT, LITERAL) TK(MACRO_ARG, NONE) TK(PRAGMA, NONE) TK(PRAGMA_EOL
, NONE) TK(PADDING, NONE)
};
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token)(token_spellings[(token)->type].category) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token)(token_spellings[(token)->type].name) (token_spellings[(token)->type].name)
52
53/* ISO 10646 defines the UCS codespace as the range 0-0x10FFFF inclusive. */
54#define UCS_LIMIT0x10FFFF 0x10FFFF
55
56static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
57static int skip_line_comment (cpp_reader *);
58static void skip_whitespace (cpp_reader *, cppchar_t);
59static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61static void store_comment (cpp_reader *, cpp_token *);
62static void create_literal (cpp_reader *, cpp_token *, const uchar *,
63 unsigned int, enum cpp_ttype);
64static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
65static int name_p (cpp_reader *, const cpp_string *);
66static tokenrun *next_tokenrun (tokenrun *);
67
68static _cpp_buff *new_buff (size_t);
69
70
71/* Utility routine:
72
73 Compares, the token TOKEN to the NUL-terminated string STRING.
74 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
75int
76cpp_ideq (const cpp_token *token, const char *string)
77{
78 if (token->type != CPP_NAME)
79 return 0;
80
81 return !ustrcmp (NODE_NAME (token->val.node.node)(((&(token->val.node.node)->ident))->str), (const uchar *) string);
82}
83
84/* Record a note TYPE at byte POS into the current cleaned logical
85 line. */
86static void
87add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
88{
89 if (buffer->notes_used == buffer->notes_cap)
90 {
91 buffer->notes_cap = buffer->notes_cap * 2 + 200;
92 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,((_cpp_line_note *) xrealloc ((void *) (buffer->notes), sizeof
(_cpp_line_note) * (buffer->notes_cap)))
93 buffer->notes_cap)((_cpp_line_note *) xrealloc ((void *) (buffer->notes), sizeof
(_cpp_line_note) * (buffer->notes_cap)))
;
94 }
95
96 buffer->notes[buffer->notes_used].pos = pos;
97 buffer->notes[buffer->notes_used].type = type;
98 buffer->notes_used++;
99}
100
101
102/* Fast path to find line special characters using optimized character
103 scanning algorithms. Anything complicated falls back to the slow
104 path below. Since this loop is very hot it's worth doing these kinds
105 of optimizations.
106
107 One of the paths through the ifdefs should provide
108
109 const uchar *search_line_fast (const uchar *s, const uchar *end);
110
111 Between S and END, search for \n, \r, \\, ?. Return a pointer to
112 the found character.
113
114 Note that the last character of the buffer is *always* a newline,
115 as forced by _cpp_convert_input. This fact can be used to avoid
116 explicitly looking for the end of the buffer. */
117
118/* Configure gives us an ifdef test. */
119#ifndef WORDS_BIGENDIAN0
120#define WORDS_BIGENDIAN0 0
121#endif
122
123/* We'd like the largest integer that fits into a register. There's nothing
124 in <stdint.h> that gives us that. For most hosts this is unsigned long,
125 but MS decided on an LLP64 model. Thankfully when building with GCC we
126 can get the "real" word size. */
127#ifdef __GNUC__4
128typedef unsigned int word_type __attribute__((__mode__(__word__)));
129#else
130typedef unsigned long word_type;
131#endif
132
133/* The code below is only expecting sizes 4 or 8.
134 Die at compile-time if this expectation is violated. */
135typedef char check_word_type_size
136 [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
137
138/* Return X with the first N bytes forced to values that won't match one
139 of the interesting characters. Note that NUL is not interesting. */
140
141static inline word_type
142acc_char_mask_misalign (word_type val, unsigned int n)
143{
144 word_type mask = -1;
145 if (WORDS_BIGENDIAN0)
146 mask >>= n * 8;
147 else
148 mask <<= n * 8;
149 return val & mask;
150}
151
152/* Return X replicated to all byte positions within WORD_TYPE. */
153
154static inline word_type
155acc_char_replicate (uchar x)
156{
157 word_type ret;
158
159 ret = (x << 24) | (x << 16) | (x << 8) | x;
160 if (sizeof(word_type) == 8)
161 ret = (ret << 16 << 16) | ret;
162 return ret;
163}
164
165/* Return non-zero if some byte of VAL is (probably) C. */
166
167static inline word_type
168acc_char_cmp (word_type val, word_type c)
169{
170#if defined(__GNUC__4) && defined(__alpha__)
171 /* We can get exact results using a compare-bytes instruction.
172 Get (val == c) via (0 >= (val ^ c)). */
173 return __builtin_alpha_cmpbge (0, val ^ c);
174#else
175 word_type magic = 0x7efefefeU;
176 if (sizeof(word_type) == 8)
177 magic = (magic << 16 << 16) | 0xfefefefeU;
178 magic |= 1;
179
180 val ^= c;
181 return ((val + magic) ^ ~val) & ~magic;
182#endif
183}
184
185/* Given the result of acc_char_cmp is non-zero, return the index of
186 the found character. If this was a false positive, return -1. */
187
188static inline int
189acc_char_index (word_type cmp ATTRIBUTE_UNUSED__attribute__ ((__unused__)),
190 word_type val ATTRIBUTE_UNUSED__attribute__ ((__unused__)))
191{
192#if defined(__GNUC__4) && defined(__alpha__) && !WORDS_BIGENDIAN0
193 /* The cmpbge instruction sets *bits* of the result corresponding to
194 matches in the bytes with no false positives. */
195 return __builtin_ctzl (cmp);
196#else
197 unsigned int i;
198
199 /* ??? It would be nice to force unrolling here,
200 and have all of these constants folded. */
201 for (i = 0; i < sizeof(word_type); ++i)
202 {
203 uchar c;
204 if (WORDS_BIGENDIAN0)
205 c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
206 else
207 c = (val >> i * 8) & 0xff;
208
209 if (c == '\n' || c == '\r' || c == '\\' || c == '?')
210 return i;
211 }
212
213 return -1;
214#endif
215}
216
217/* A version of the fast scanner using bit fiddling techniques.
218
219 For 32-bit words, one would normally perform 16 comparisons and
220 16 branches. With this algorithm one performs 24 arithmetic
221 operations and one branch. Whether this is faster with a 32-bit
222 word size is going to be somewhat system dependent.
223
224 For 64-bit words, we eliminate twice the number of comparisons
225 and branches without increasing the number of arithmetic operations.
226 It's almost certainly going to be a win with 64-bit word size. */
227
228static const uchar * search_line_acc_char (const uchar *, const uchar *)
229 ATTRIBUTE_UNUSED__attribute__ ((__unused__));
230
231static const uchar *
232search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED__attribute__ ((__unused__)))
233{
234 const word_type repl_nl = acc_char_replicate ('\n');
235 const word_type repl_cr = acc_char_replicate ('\r');
236 const word_type repl_bs = acc_char_replicate ('\\');
237 const word_type repl_qm = acc_char_replicate ('?');
238
239 unsigned int misalign;
240 const word_type *p;
241 word_type val, t;
242
243 /* Align the buffer. Mask out any bytes from before the beginning. */
244 p = (word_type *)((uintptr_t)s & -sizeof(word_type));
245 val = *p;
246 misalign = (uintptr_t)s & (sizeof(word_type) - 1);
247 if (misalign)
248 val = acc_char_mask_misalign (val, misalign);
249
250 /* Main loop. */
251 while (1)
252 {
253 t = acc_char_cmp (val, repl_nl);
254 t |= acc_char_cmp (val, repl_cr);
255 t |= acc_char_cmp (val, repl_bs);
256 t |= acc_char_cmp (val, repl_qm);
257
258 if (__builtin_expect (t != 0, 0))
259 {
260 int i = acc_char_index (t, val);
261 if (i >= 0)
262 return (const uchar *)p + i;
263 }
264
265 val = *++p;
266 }
267}
268
269/* Disable on Solaris 2/x86 until the following problem can be properly
270 autoconfed:
271
272 The Solaris 10+ assembler tags objects with the instruction set
273 extensions used, so SSE4.2 executables cannot run on machines that
274 don't support that extension. */
275
276#if (GCC_VERSION(4 * 1000 + 2) >= 4005) && (__GNUC__4 >= 5 || !defined(__PIC__2)) && (defined(__i386__) || defined(__x86_64__1)) && !(defined(__sun__) && defined(__svr4__))
277
278/* Replicated character data to be shared between implementations.
279 Recall that outside of a context with vector support we can't
280 define compatible vector types, therefore these are all defined
281 in terms of raw characters. */
282static const char repl_chars[4][16] __attribute__((aligned(16))) = {
283 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
284 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
285 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
286 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
287 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
288 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
289 { '?', '?', '?', '?', '?', '?', '?', '?',
290 '?', '?', '?', '?', '?', '?', '?', '?' },
291};
292
293/* A version of the fast scanner using MMX vectorized byte compare insns.
294
295 This uses the PMOVMSKB instruction which was introduced with "MMX2",
296 which was packaged into SSE1; it is also present in the AMD MMX
297 extension. Mark the function as using "sse" so that we emit a real
298 "emms" instruction, rather than the 3dNOW "femms" instruction. */
299
300static const uchar *
301#ifndef __SSE__1
302__attribute__((__target__("sse")))
303#endif
304search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED__attribute__ ((__unused__)))
305{
306 typedef char v8qi __attribute__ ((__vector_size__ (8)));
307 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
308
309 const v8qi repl_nl = *(const v8qi *)repl_chars[0];
310 const v8qi repl_cr = *(const v8qi *)repl_chars[1];
311 const v8qi repl_bs = *(const v8qi *)repl_chars[2];
312 const v8qi repl_qm = *(const v8qi *)repl_chars[3];
313
314 unsigned int misalign, found, mask;
315 const v8qi *p;
316 v8qi data, t, c;
317
318 /* Align the source pointer. While MMX doesn't generate unaligned data
319 faults, this allows us to safely scan to the end of the buffer without
320 reading beyond the end of the last page. */
321 misalign = (uintptr_t)s & 7;
322 p = (const v8qi *)((uintptr_t)s & -8);
323 data = *p;
324
325 /* Create a mask for the bytes that are valid within the first
326 16-byte block. The Idea here is that the AND with the mask
327 within the loop is "free", since we need some AND or TEST
328 insn in order to set the flags for the branch anyway. */
329 mask = -1u << misalign;
330
331 /* Main loop processing 8 bytes at a time. */
332 goto start;
333 do
334 {
335 data = *++p;
336 mask = -1;
337
338 start:
339 t = __builtin_ia32_pcmpeqb(data, repl_nl);
340 c = __builtin_ia32_pcmpeqb(data, repl_cr);
341 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
342 c = __builtin_ia32_pcmpeqb(data, repl_bs);
343 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
344 c = __builtin_ia32_pcmpeqb(data, repl_qm);
345 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
346 found = __builtin_ia32_pmovmskb (t);
347 found &= mask;
348 }
349 while (!found);
350
351 __builtin_ia32_emms ();
352
353 /* FOUND contains 1 in bits for which we matched a relevant
354 character. Conversion to the byte index is trivial. */
355 found = __builtin_ctz(found);
356 return (const uchar *)p + found;
357}
358
359/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
360
361static const uchar *
362#ifndef __SSE2__1
363__attribute__((__target__("sse2")))
364#endif
365search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED__attribute__ ((__unused__)))
366{
367 typedef char v16qi __attribute__ ((__vector_size__ (16)));
368
369 const v16qi repl_nl = *(const v16qi *)repl_chars[0];
370 const v16qi repl_cr = *(const v16qi *)repl_chars[1];
371 const v16qi repl_bs = *(const v16qi *)repl_chars[2];
372 const v16qi repl_qm = *(const v16qi *)repl_chars[3];
373
374 unsigned int misalign, found, mask;
375 const v16qi *p;
376 v16qi data, t;
377
378 /* Align the source pointer. */
379 misalign = (uintptr_t)s & 15;
380 p = (const v16qi *)((uintptr_t)s & -16);
381 data = *p;
382
383 /* Create a mask for the bytes that are valid within the first
384 16-byte block. The Idea here is that the AND with the mask
385 within the loop is "free", since we need some AND or TEST
386 insn in order to set the flags for the branch anyway. */
387 mask = -1u << misalign;
388
389 /* Main loop processing 16 bytes at a time. */
390 goto start;
391 do
392 {
393 data = *++p;
394 mask = -1;
395
396 start:
397 t = data == repl_nl;
398 t |= data == repl_cr;
399 t |= data == repl_bs;
400 t |= data == repl_qm;
401 found = __builtin_ia32_pmovmskb128 (t);
402 found &= mask;
403 }
404 while (!found);
405
406 /* FOUND contains 1 in bits for which we matched a relevant
407 character. Conversion to the byte index is trivial. */
408 found = __builtin_ctz(found);
409 return (const uchar *)p + found;
410}
411
412#ifdef HAVE_SSE41
413/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
414
415static const uchar *
416#ifndef __SSE4_2__
417__attribute__((__target__("sse4.2")))
418#endif
419search_line_sse42 (const uchar *s, const uchar *end)
420{
421 typedef char v16qi __attribute__ ((__vector_size__ (16)));
422 static const v16qi search = { '\n', '\r', '?', '\\' };
423
424 uintptr_t si = (uintptr_t)s;
425 uintptr_t index;
426
427 /* Check for unaligned input. */
428 if (si & 15)
429 {
430 v16qi sv;
431
432 if (__builtin_expect (end - s < 16, 0)
433 && __builtin_expect ((si & 0xfff) > 0xff0, 0))
434 {
435 /* There are less than 16 bytes left in the buffer, and less
436 than 16 bytes left on the page. Reading 16 bytes at this
437 point might generate a spurious page fault. Defer to the
438 SSE2 implementation, which already handles alignment. */
439 return search_line_sse2 (s, end);
440 }
441
442 /* ??? The builtin doesn't understand that the PCMPESTRI read from
443 memory need not be aligned. */
444 sv = __builtin_ia32_loaddqu ((const char *) s);
445 index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
446
447 if (__builtin_expect (index < 16, 0))
448 goto found;
449
450 /* Advance the pointer to an aligned address. We will re-scan a
451 few bytes, but we no longer need care for reading past the
452 end of a page, since we're guaranteed a match. */
453 s = (const uchar *)((si + 15) & -16);
454 }
455
456 /* Main loop, processing 16 bytes at a time. */
457#ifdef __GCC_ASM_FLAG_OUTPUTS__1
458 while (1)
459 {
460 char f;
461
462 /* By using inline assembly instead of the builtin,
463 we can use the result, as well as the flags set. */
464 __asm ("%vpcmpestri\t$0, %2, %3"
465 : "=c"(index), "=@ccc"(f)
466 : "m"(*s), "x"(search), "a"(4), "d"(16));
467 if (f)
468 break;
469
470 s += 16;
471 }
472#else
473 s -= 16;
474 /* By doing the whole loop in inline assembly,
475 we can make proper use of the flags set. */
476 __asm ( ".balign 16\n"
477 "0: add $16, %1\n"
478 " %vpcmpestri\t$0, (%1), %2\n"
479 " jnc 0b"
480 : "=&c"(index), "+r"(s)
481 : "x"(search), "a"(4), "d"(16));
482#endif
483
484 found:
485 return s + index;
486}
487
488#else
489/* Work around out-dated assemblers without sse4 support. */
490#define search_line_sse42 search_line_sse2
491#endif
492
493/* Check the CPU capabilities. */
494
495#include "../gcc/config/i386/cpuid.h"
496
497typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
498static search_line_fast_type search_line_fastsearch_line_acc_char;
499
500#define HAVE_init_vectorized_lexer 1
501static inline void
502init_vectorized_lexer (void)
503{
504 unsigned dummy, ecx = 0, edx = 0;
505 search_line_fast_type impl = search_line_acc_char;
506 int minimum = 0;
507
508#if defined(__SSE4_2__)
509 minimum = 3;
510#elif defined(__SSE2__1)
511 minimum = 2;
512#elif defined(__SSE__1)
513 minimum = 1;
514#endif
515
516 if (minimum == 3)
517 impl = search_line_sse42;
518 else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
519 {
520 if (minimum == 3 || (ecx & bit_SSE4_2))
521 impl = search_line_sse42;
522 else if (minimum == 2 || (edx & bit_SSE2))
523 impl = search_line_sse2;
524 else if (minimum == 1 || (edx & bit_SSE))
525 impl = search_line_mmx;
526 }
527 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
528 {
529 if (minimum == 1
530 || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
531 impl = search_line_mmx;
532 }
533
534 search_line_fastsearch_line_acc_char = impl;
535}
536
537#elif (GCC_VERSION(4 * 1000 + 2) >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
538
539/* A vection of the fast scanner using AltiVec vectorized byte compares
540 and VSX unaligned loads (when VSX is available). This is otherwise
541 the same as the AltiVec version. */
542
543ATTRIBUTE_NO_SANITIZE_UNDEFINED
544static const uchar *
545search_line_fastsearch_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED__attribute__ ((__unused__)))
546{
547 typedef __attribute__((altivec(vector))) unsigned char vc;
548
549 const vc repl_nl = {
550 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
551 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
552 };
553 const vc repl_cr = {
554 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
555 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
556 };
557 const vc repl_bs = {
558 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
559 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
560 };
561 const vc repl_qm = {
562 '?', '?', '?', '?', '?', '?', '?', '?',
563 '?', '?', '?', '?', '?', '?', '?', '?',
564 };
565 const vc zero = { 0 };
566
567 vc data, t;
568
569 /* Main loop processing 16 bytes at a time. */
570 do
571 {
572 vc m_nl, m_cr, m_bs, m_qm;
573
574 data = __builtin_vec_vsx_ld (0, s);
575 s += 16;
576
577 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
578 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
579 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
580 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
581 t = (m_nl | m_cr) | (m_bs | m_qm);
582
583 /* T now contains 0xff in bytes for which we matched one of the relevant
584 characters. We want to exit the loop if any byte in T is non-zero.
585 Below is the expansion of vec_any_ne(t, zero). */
586 }
587 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
588
589 /* Restore s to to point to the 16 bytes we just processed. */
590 s -= 16;
591
592 {
593#define N (sizeof(vc) / sizeof(long))
594
595 union {
596 vc v;
597 /* Statically assert that N is 2 or 4. */
598 unsigned long l[(N == 2 || N == 4) ? N : -1];
599 } u;
600 unsigned long l, i = 0;
601
602 u.v = t;
603
604 /* Find the first word of T that is non-zero. */
605 switch (N)
606 {
607 case 4:
608 l = u.l[i++];
609 if (l != 0)
610 break;
611 s += sizeof(unsigned long);
612 l = u.l[i++];
613 if (l != 0)
614 break;
615 s += sizeof(unsigned long);
616 /* FALLTHRU */
617 case 2:
618 l = u.l[i++];
619 if (l != 0)
620 break;
621 s += sizeof(unsigned long);
622 l = u.l[i];
623 }
624
625 /* L now contains 0xff in bytes for which we matched one of the
626 relevant characters. We can find the byte index by finding
627 its bit index and dividing by 8. */
628#ifdef __BIG_ENDIAN__
629 l = __builtin_clzl(l) >> 3;
630#else
631 l = __builtin_ctzl(l) >> 3;
632#endif
633 return s + l;
634
635#undef N
636 }
637}
638
639#elif (GCC_VERSION(4 * 1000 + 2) >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
640
641/* A vection of the fast scanner using AltiVec vectorized byte compares.
642 This cannot be used for little endian because vec_lvsl/lvsr are
643 deprecated for little endian and the code won't work properly. */
644/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
645 so we can't compile this function without -maltivec on the command line
646 (or implied by some other switch). */
647
648static const uchar *
649search_line_fastsearch_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED__attribute__ ((__unused__)))
650{
651 typedef __attribute__((altivec(vector))) unsigned char vc;
652
653 const vc repl_nl = {
654 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
655 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
656 };
657 const vc repl_cr = {
658 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
659 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
660 };
661 const vc repl_bs = {
662 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
663 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
664 };
665 const vc repl_qm = {
666 '?', '?', '?', '?', '?', '?', '?', '?',
667 '?', '?', '?', '?', '?', '?', '?', '?',
668 };
669 const vc ones = {
670 -1, -1, -1, -1, -1, -1, -1, -1,
671 -1, -1, -1, -1, -1, -1, -1, -1,
672 };
673 const vc zero = { 0 };
674
675 vc data, mask, t;
676
677 /* Altivec loads automatically mask addresses with -16. This lets us
678 issue the first load as early as possible. */
679 data = __builtin_vec_ld(0, (const vc *)s);
680
681 /* Discard bytes before the beginning of the buffer. Do this by
682 beginning with all ones and shifting in zeros according to the
683 mis-alignment. The LVSR instruction pulls the exact shift we
684 want from the address. */
685 mask = __builtin_vec_lvsr(0, s);
686 mask = __builtin_vec_perm(zero, ones, mask);
687 data &= mask;
688
689 /* While altivec loads mask addresses, we still need to align S so
690 that the offset we compute at the end is correct. */
691 s = (const uchar *)((uintptr_t)s & -16);
692
693 /* Main loop processing 16 bytes at a time. */
694 goto start;
695 do
696 {
697 vc m_nl, m_cr, m_bs, m_qm;
698
699 s += 16;
700 data = __builtin_vec_ld(0, (const vc *)s);
701
702 start:
703 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
704 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
705 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
706 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
707 t = (m_nl | m_cr) | (m_bs | m_qm);
708
709 /* T now contains 0xff in bytes for which we matched one of the relevant
710 characters. We want to exit the loop if any byte in T is non-zero.
711 Below is the expansion of vec_any_ne(t, zero). */
712 }
713 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
714
715 {
716#define N (sizeof(vc) / sizeof(long))
717
718 union {
719 vc v;
720 /* Statically assert that N is 2 or 4. */
721 unsigned long l[(N == 2 || N == 4) ? N : -1];
722 } u;
723 unsigned long l, i = 0;
724
725 u.v = t;
726
727 /* Find the first word of T that is non-zero. */
728 switch (N)
729 {
730 case 4:
731 l = u.l[i++];
732 if (l != 0)
733 break;
734 s += sizeof(unsigned long);
735 l = u.l[i++];
736 if (l != 0)
737 break;
738 s += sizeof(unsigned long);
739 /* FALLTHROUGH */
740 case 2:
741 l = u.l[i++];
742 if (l != 0)
743 break;
744 s += sizeof(unsigned long);
745 l = u.l[i];
746 }
747
748 /* L now contains 0xff in bytes for which we matched one of the
749 relevant characters. We can find the byte index by finding
750 its bit index and dividing by 8. */
751 l = __builtin_clzl(l) >> 3;
752 return s + l;
753
754#undef N
755 }
756}
757
758#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
759#include "arm_neon.h"
760
761/* This doesn't have to be the exact page size, but no system may use
762 a size smaller than this. ARMv8 requires a minimum page size of
763 4k. The impact of being conservative here is a small number of
764 cases will take the slightly slower entry path into the main
765 loop. */
766
767#define AARCH64_MIN_PAGE_SIZE 4096
768
769static const uchar *
770search_line_fastsearch_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED__attribute__ ((__unused__)))
771{
772 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
773 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
774 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
775 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
776 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
777
778#ifdef __ARM_BIG_ENDIAN
779 const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
780#else
781 const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
782#endif
783
784 unsigned int found;
785 const uint8_t *p;
786 uint8x16_t data;
787 uint8x16_t t;
788 uint16x8_t m;
789 uint8x16_t u, v, w;
790
791 /* Align the source pointer. */
792 p = (const uint8_t *)((uintptr_t)s & -16);
793
794 /* Assuming random string start positions, with a 4k page size we'll take
795 the slow path about 0.37% of the time. */
796 if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
797 - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
798 < 16, 0))
799 {
800 /* Slow path: the string starts near a possible page boundary. */
801 uint32_t misalign, mask;
802
803 misalign = (uintptr_t)s & 15;
804 mask = (-1u << misalign) & 0xffff;
805 data = vld1q_u8 (p);
806 t = vceqq_u8 (data, repl_nl);
807 u = vceqq_u8 (data, repl_cr);
808 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
809 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
810 t = vorrq_u8 (v, w);
811 t = vandq_u8 (t, xmask);
812 m = vpaddlq_u8 (t);
813 m = vshlq_u16 (m, shift);
814 found = vaddvq_u16 (m);
815 found &= mask;
816 if (found)
817 return (const uchar*)p + __builtin_ctz (found);
818 }
819 else
820 {
821 data = vld1q_u8 ((const uint8_t *) s);
822 t = vceqq_u8 (data, repl_nl);
823 u = vceqq_u8 (data, repl_cr);
824 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
825 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
826 t = vorrq_u8 (v, w);
827 if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
828 goto done;
829 }
830
831 do
832 {
833 p += 16;
834 data = vld1q_u8 (p);
835 t = vceqq_u8 (data, repl_nl);
836 u = vceqq_u8 (data, repl_cr);
837 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
838 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
839 t = vorrq_u8 (v, w);
840 } while (!vpaddd_u64 ((uint64x2_t)t));
841
842done:
843 /* Now that we've found the terminating substring, work out precisely where
844 we need to stop. */
845 t = vandq_u8 (t, xmask);
846 m = vpaddlq_u8 (t);
847 m = vshlq_u16 (m, shift);
848 found = vaddvq_u16 (m);
849 return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
850 + __builtin_ctz (found));
851}
852
853#elif defined (__ARM_NEON)
854#include "arm_neon.h"
855
856static const uchar *
857search_line_fastsearch_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED__attribute__ ((__unused__)))
858{
859 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
860 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
861 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
862 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
863 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
864
865 unsigned int misalign, found, mask;
866 const uint8_t *p;
867 uint8x16_t data;
868
869 /* Align the source pointer. */
870 misalign = (uintptr_t)s & 15;
871 p = (const uint8_t *)((uintptr_t)s & -16);
872 data = vld1q_u8 (p);
873
874 /* Create a mask for the bytes that are valid within the first
875 16-byte block. The Idea here is that the AND with the mask
876 within the loop is "free", since we need some AND or TEST
877 insn in order to set the flags for the branch anyway. */
878 mask = (-1u << misalign) & 0xffff;
879
880 /* Main loop, processing 16 bytes at a time. */
881 goto start;
882
883 do
884 {
885 uint8x8_t l;
886 uint16x4_t m;
887 uint32x2_t n;
888 uint8x16_t t, u, v, w;
889
890 p += 16;
891 data = vld1q_u8 (p);
892 mask = 0xffff;
893
894 start:
895 t = vceqq_u8 (data, repl_nl);
896 u = vceqq_u8 (data, repl_cr);
897 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
898 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
899 t = vandq_u8 (vorrq_u8 (v, w), xmask);
900 l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
901 m = vpaddl_u8 (l);
902 n = vpaddl_u16 (m);
903
904 found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
905 vshr_n_u64 ((uint64x1_t) n, 24)), 0);
906 found &= mask;
907 }
908 while (!found);
909
910 /* FOUND contains 1 in bits for which we matched a relevant
911 character. Conversion to the byte index is trivial. */
912 found = __builtin_ctz (found);
913 return (const uchar *)p + found;
914}
915
916#else
917
918/* We only have one accelerated alternative. Use a direct call so that
919 we encourage inlining. */
920
921#define search_line_fastsearch_line_acc_char search_line_acc_char
922
923#endif
924
925/* Initialize the lexer if needed. */
926
927void
928_cpp_init_lexer (void)
929{
930#ifdef HAVE_init_vectorized_lexer
931 init_vectorized_lexer ();
932#endif
933}
934
935/* Returns with a logical line that contains no escaped newlines or
936 trigraphs. This is a time-critical inner loop. */
937void
938_cpp_clean_line (cpp_reader *pfile)
939{
940 cpp_buffer *buffer;
941 const uchar *s;
942 uchar c, *d, *p;
943
944 buffer = pfile->buffer;
945 buffer->cur_note = buffer->notes_used = 0;
946 buffer->cur = buffer->line_base = buffer->next_line;
947 buffer->need_line = false;
948 s = buffer->next_line;
949
950 if (!buffer->from_stage3)
951 {
952 const uchar *pbackslash = NULL__null;
953
954 /* Fast path. This is the common case of an un-escaped line with
955 no trigraphs. The primary win here is by not writing any
956 data back to memory until we have to. */
957 while (1)
958 {
959 /* Perform an optimized search for \n, \r, \\, ?. */
960 s = search_line_fastsearch_line_acc_char (s, buffer->rlimit);
961
962 c = *s;
963 if (c == '\\')
964 {
965 /* Record the location of the backslash and continue. */
966 pbackslash = s++;
967 }
968 else if (__builtin_expect (c == '?', 0))
969 {
970 if (__builtin_expect (s[1] == '?', false)
971 && _cpp_trigraph_map[s[2]])
972 {
973 /* Have a trigraph. We may or may not have to convert
974 it. Add a line note regardless, for -Wtrigraphs. */
975 add_line_note (buffer, s, s[2]);
976 if (CPP_OPTION (pfile, trigraphs)((pfile)->opts.trigraphs))
977 {
978 /* We do, and that means we have to switch to the
979 slow path. */
980 d = (uchar *) s;
981 *d = _cpp_trigraph_map[s[2]];
982 s += 2;
983 goto slow_path;
984 }
985 }
986 /* Not a trigraph. Continue on fast-path. */
987 s++;
988 }
989 else
990 break;
991 }
992
993 /* This must be \r or \n. We're either done, or we'll be forced
994 to write back to the buffer and continue on the slow path. */
995 d = (uchar *) s;
996
997 if (__builtin_expect (s == buffer->rlimit, false))
998 goto done;
999
1000 /* DOS line ending? */
1001 if (__builtin_expect (c == '\r', false) && s[1] == '\n')
1002 {
1003 s++;
1004 if (s == buffer->rlimit)
1005 goto done;
1006 }
1007
1008 if (__builtin_expect (pbackslash == NULL__null, true))
1009 goto done;
1010
1011 /* Check for escaped newline. */
1012 p = d;
1013 while (is_nvspace (p[-1])(_sch_istable[(p[-1]) & 0xff] & (unsigned short)(_sch_isnvsp
))
)
1014 p--;
1015 if (p - 1 != pbackslash)
1016 goto done;
1017
1018 /* Have an escaped newline; process it and proceed to
1019 the slow path. */
1020 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
1021 d = p - 2;
1022 buffer->next_line = p - 1;
1023
1024 slow_path:
1025 while (1)
1026 {
1027 c = *++s;
1028 *++d = c;
1029
1030 if (c == '\n' || c == '\r')
1031 {
1032 /* Handle DOS line endings. */
1033 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
1034 s++;
1035 if (s == buffer->rlimit)
1036 break;
1037
1038 /* Escaped? */
1039 p = d;
1040 while (p != buffer->next_line && is_nvspace (p[-1])(_sch_istable[(p[-1]) & 0xff] & (unsigned short)(_sch_isnvsp
))
)
1041 p--;
1042 if (p == buffer->next_line || p[-1] != '\\')
1043 break;
1044
1045 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
1046 d = p - 2;
1047 buffer->next_line = p - 1;
1048 }
1049 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
1050 {
1051 /* Add a note regardless, for the benefit of -Wtrigraphs. */
1052 add_line_note (buffer, d, s[2]);
1053 if (CPP_OPTION (pfile, trigraphs)((pfile)->opts.trigraphs))
1054 {
1055 *d = _cpp_trigraph_map[s[2]];
1056 s += 2;
1057 }
1058 }
1059 }
1060 }
1061 else
1062 {
1063 while (*s != '\n' && *s != '\r')
1064 s++;
1065 d = (uchar *) s;
1066
1067 /* Handle DOS line endings. */
1068 if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n')
1069 s++;
1070 }
1071
1072 done:
1073 *d = '\n';
1074 /* A sentinel note that should never be processed. */
1075 add_line_note (buffer, d + 1, '\n');
1076 buffer->next_line = s + 1;
1077}
1078
1079template <bool lexing_raw_string>
1080static bool get_fresh_line_impl (cpp_reader *pfile);
1081
1082/* Return true if the trigraph indicated by NOTE should be warned
1083 about in a comment. */
1084static bool
1085warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
1086{
1087 const uchar *p;
1088
1089 /* Within comments we don't warn about trigraphs, unless the
1090 trigraph forms an escaped newline, as that may change
1091 behavior. */
1092 if (note->type != '/')
1093 return false;
1094
1095 /* If -trigraphs, then this was an escaped newline iff the next note
1096 is coincident. */
1097 if (CPP_OPTION (pfile, trigraphs)((pfile)->opts.trigraphs))
1098 return note[1].pos == note->pos;
1099
1100 /* Otherwise, see if this forms an escaped newline. */
1101 p = note->pos + 3;
1102 while (is_nvspace (*p)(_sch_istable[(*p) & 0xff] & (unsigned short)(_sch_isnvsp
))
)
1103 p++;
1104
1105 /* There might have been escaped newlines between the trigraph and the
1106 newline we found. Hence the position test. */
1107 return (*p == '\n' && p < note[1].pos);
1108}
1109
1110/* Process the notes created by add_line_note as far as the current
1111 location. */
1112void
1113_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
1114{
1115 cpp_buffer *buffer = pfile->buffer;
1116
1117 for (;;)
1118 {
1119 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1120 unsigned int col;
1121
1122 if (note->pos > buffer->cur)
1123 break;
1124
1125 buffer->cur_note++;
1126 col = CPP_BUF_COLUMN (buffer, note->pos + 1)((note->pos + 1) - (buffer)->line_base);
1127
1128 if (note->type == '\\' || note->type == ' ')
1129 {
1130 if (note->type == ' ' && !in_comment)
1131 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1132 "backslash and newline separated by space");
1133
1134 if (buffer->next_line > buffer->rlimit)
1135 {
1136 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1137 "backslash-newline at end of file");
1138 /* Prevent "no newline at end of file" warning. */
1139 buffer->next_line = buffer->rlimit;
1140 }
1141
1142 buffer->line_base = note->pos;
1143 CPP_INCREMENT_LINE (pfile, 0)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, 0); } while (0)
;
1144 }
1145 else if (_cpp_trigraph_map[note->type])
1146 {
1147 if (CPP_OPTION (pfile, warn_trigraphs)((pfile)->opts.warn_trigraphs)
1148 && (!in_comment || warn_in_comment (pfile, note)))
1149 {
1150 if (CPP_OPTION (pfile, trigraphs)((pfile)->opts.trigraphs))
1151 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1152 pfile->line_table->highest_line, col,
1153 "trigraph ??%c converted to %c",
1154 note->type,
1155 (int) _cpp_trigraph_map[note->type]);
1156 else
1157 {
1158 cpp_warning_with_line
1159 (pfile, CPP_W_TRIGRAPHS,
1160 pfile->line_table->highest_line, col,
1161 "trigraph ??%c ignored, use -trigraphs to enable",
1162 note->type);
1163 }
1164 }
1165 }
1166 else if (note->type == 0)
1167 /* Already processed in lex_raw_string. */;
1168 else
1169 abort ()fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 1169, __FUNCTION__)
;
1170 }
1171}
1172
1173namespace bidi {
1174 enum class kind {
1175 NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
1176 };
1177
1178 /* All the UTF-8 encodings of bidi characters start with E2. */
1179 constexpr uchar utf8_start = 0xe2;
1180
1181 struct context
1182 {
1183 context () {}
1184 context (location_t loc, kind k, bool pdf, bool ucn)
1185 : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
1186 {
1187 }
1188
1189 kind get_pop_kind () const
1190 {
1191 return m_pdf ? kind::PDF : kind::PDI;
1192 }
1193 bool ucn_p () const
1194 {
1195 return m_ucn;
1196 }
1197
1198 location_t m_loc;
1199 kind m_kind;
1200 unsigned m_pdf : 1;
1201 unsigned m_ucn : 1;
1202 };
1203
1204 /* A vector holding currently open bidi contexts. We use a char for
1205 each context, its LSB is 1 if it represents a PDF context, 0 if it
1206 represents a PDI context. The next bit is 1 if this context was open
1207 by a bidi character written as a UCN, and 0 when it was UTF-8. */
1208 semi_embedded_vec <context, 16> vec;
1209
1210 /* Close the whole comment/identifier/string literal/character constant
1211 context. */
1212 void on_close ()
1213 {
1214 vec.truncate (0);
1215 }
1216
1217 /* Pop the last element in the vector. */
1218 void pop ()
1219 {
1220 unsigned int len = vec.count ();
1221 gcc_checking_assert (len > 0)((void)(!(len > 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 1221, __FUNCTION__), 0 : 0))
;
1222 vec.truncate (len - 1);
1223 }
1224
1225 /* Return the pop kind of the context of the Ith element. */
1226 kind pop_kind_at (unsigned int i)
1227 {
1228 return vec[i].get_pop_kind ();
1229 }
1230
1231 /* Return the pop kind of the context that is currently opened. */
1232 kind current_ctx ()
1233 {
1234 unsigned int len = vec.count ();
1235 if (len == 0)
1236 return kind::NONE;
1237 return vec[len - 1].get_pop_kind ();
1238 }
1239
1240 /* Return true if the current context comes from a UCN origin, that is,
1241 the bidi char which started this bidi context was written as a UCN. */
1242 bool current_ctx_ucn_p ()
1243 {
1244 unsigned int len = vec.count ();
1245 gcc_checking_assert (len > 0)((void)(!(len > 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 1245, __FUNCTION__), 0 : 0))
;
1246 return vec[len - 1].m_ucn;
1247 }
1248
1249 location_t current_ctx_loc ()
1250 {
1251 unsigned int len = vec.count ();
1252 gcc_checking_assert (len > 0)((void)(!(len > 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 1252, __FUNCTION__), 0 : 0))
;
1253 return vec[len - 1].m_loc;
1254 }
1255
1256 /* We've read a bidi char, update the current vector as necessary.
1257 LOC is only valid when K is not kind::NONE. */
1258 void on_char (kind k, bool ucn_p, location_t loc)
1259 {
1260 switch (k)
1261 {
1262 case kind::LRE:
1263 case kind::RLE:
1264 case kind::LRO:
1265 case kind::RLO:
1266 vec.push (context (loc, k, true, ucn_p));
1267 break;
1268 case kind::LRI:
1269 case kind::RLI:
1270 case kind::FSI:
1271 vec.push (context (loc, k, false, ucn_p));
1272 break;
1273 /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
1274 whose scope has not yet been terminated. */
1275 case kind::PDF:
1276 if (current_ctx () == kind::PDF)
1277 pop ();
1278 break;
1279 /* PDI terminates the scope of the last LRI, RLI, or FSI whose
1280 scope has not yet been terminated, as well as the scopes of
1281 any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
1282 yet been terminated. */
1283 case kind::PDI:
1284 for (int i = vec.count () - 1; i >= 0; --i)
1285 if (pop_kind_at (i) == kind::PDI)
1286 {
1287 vec.truncate (i);
1288 break;
1289 }
1290 break;
1291 case kind::LTR:
1292 case kind::RTL:
1293 /* These aren't popped by a PDF/PDI. */
1294 break;
1295 ATTR_LIKELY[[likely]] case kind::NONE:
1296 break;
1297 default:
1298 abort ()fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 1298, __FUNCTION__)
;
1299 }
1300 }
1301
1302 /* Return a descriptive string for K. */
1303 const char *to_str (kind k)
1304 {
1305 switch (k)
1306 {
1307 case kind::LRE:
1308 return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
1309 case kind::RLE:
1310 return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
1311 case kind::LRO:
1312 return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
1313 case kind::RLO:
1314 return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
1315 case kind::LRI:
1316 return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
1317 case kind::RLI:
1318 return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
1319 case kind::FSI:
1320 return "U+2068 (FIRST STRONG ISOLATE)";
1321 case kind::PDF:
1322 return "U+202C (POP DIRECTIONAL FORMATTING)";
1323 case kind::PDI:
1324 return "U+2069 (POP DIRECTIONAL ISOLATE)";
1325 case kind::LTR:
1326 return "U+200E (LEFT-TO-RIGHT MARK)";
1327 case kind::RTL:
1328 return "U+200F (RIGHT-TO-LEFT MARK)";
1329 default:
1330 abort ()fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 1330, __FUNCTION__)
;
1331 }
1332 }
1333}
1334
1335/* Get location_t for the range of bytes [START, START + NUM_BYTES)
1336 within the current line in FILE, with the caret at START. */
1337
1338static location_t
1339get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
1340 const unsigned char *const start,
1341 size_t num_bytes)
1342{
1343 gcc_checking_assert (num_bytes > 0)((void)(!(num_bytes > 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 1343, __FUNCTION__), 0 : 0))
;
1344
1345 /* CPP_BUF_COLUMN and linemap_position_for_column both refer
1346 to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
1347 whereas linemap_position_for_column is 1-based. */
1348
1349 /* Get 0-based offsets within the line. */
1350 size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start)((start) - (pfile->buffer)->line_base);
1351 size_t end_offset = start_offset + num_bytes - 1;
1352
1353 /* Now convert to location_t, where "columns" are 1-based byte offsets. */
1354 location_t start_loc = linemap_position_for_column (pfile->line_table,
1355 start_offset + 1);
1356 location_t end_loc = linemap_position_for_column (pfile->line_table,
1357 end_offset + 1);
1358
1359 if (start_loc == end_loc)
1360 return start_loc;
1361
1362 source_range src_range;
1363 src_range.m_start = start_loc;
1364 src_range.m_finish = end_loc;
1365 location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table,
1366 start_loc,
1367 src_range,
1368 NULL__null,
1369 0);
1370 return combined_loc;
1371}
1372
1373/* Parse a sequence of 3 bytes starting with P and return its bidi code. */
1374
1375static bidi::kind
1376get_bidi_utf8_1 (const unsigned char *const p)
1377{
1378 gcc_checking_assert (p[0] == bidi::utf8_start)((void)(!(p[0] == bidi::utf8_start) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 1378, __FUNCTION__), 0 : 0))
;
1379
1380 if (p[1] == 0x80)
1381 switch (p[2])
1382 {
1383 case 0xaa:
1384 return bidi::kind::LRE;
1385 case 0xab:
1386 return bidi::kind::RLE;
1387 case 0xac:
1388 return bidi::kind::PDF;
1389 case 0xad:
1390 return bidi::kind::LRO;
1391 case 0xae:
1392 return bidi::kind::RLO;
1393 case 0x8e:
1394 return bidi::kind::LTR;
1395 case 0x8f:
1396 return bidi::kind::RTL;
1397 default:
1398 break;
1399 }
1400 else if (p[1] == 0x81)
1401 switch (p[2])
1402 {
1403 case 0xa6:
1404 return bidi::kind::LRI;
1405 case 0xa7:
1406 return bidi::kind::RLI;
1407 case 0xa8:
1408 return bidi::kind::FSI;
1409 case 0xa9:
1410 return bidi::kind::PDI;
1411 default:
1412 break;
1413 }
1414
1415 return bidi::kind::NONE;
1416}
1417
1418/* Parse a sequence of 3 bytes starting with P and return its bidi code.
1419 If the kind is not NONE, write the location to *OUT.*/
1420
1421static bidi::kind
1422get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
1423{
1424 bidi::kind result = get_bidi_utf8_1 (p);
1425 if (result
17.1
'result' is equal to NONE
!= bidi::kind::NONE)
18
Taking false branch
1426 {
1427 /* We have a sequence of 3 bytes starting at P. */
1428 *out = get_location_for_byte_range_in_cur_line (pfile, p, 3);
1429 }
1430 return result;
19
Returning without writing to '*out'
1431}
1432
1433/* Parse a UCN where P points just past \u or \U and return its bidi code. */
1434
1435static bidi::kind
1436get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
1437{
1438 /* 6.4.3 Universal Character Names
1439 \u hex-quad
1440 \U hex-quad hex-quad
1441 \u { simple-hexadecimal-digit-sequence }
1442 where \unnnn means \U0000nnnn. */
1443
1444 *end = p + 4;
1445 if (is_U)
1446 {
1447 if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
1448 return bidi::kind::NONE;
1449 /* Skip 4B so we can treat \u and \U the same below. */
1450 p += 4;
1451 *end += 4;
1452 }
1453 else if (p[0] == '{')
1454 {
1455 p++;
1456 while (*p == '0')
1457 p++;
1458 if (p[0] != '2'
1459 || p[1] != '0'
1460 || !ISXDIGIT (p[2])(_sch_istable[(p[2]) & 0xff] & (unsigned short)(_sch_isxdigit
))
1461 || !ISXDIGIT (p[3])(_sch_istable[(p[3]) & 0xff] & (unsigned short)(_sch_isxdigit
))
1462 || p[4] != '}')
1463 return bidi::kind::NONE;
1464 *end = p + 5;
1465 }
1466
1467 /* All code points we are looking for start with 20xx. */
1468 if (p[0] != '2' || p[1] != '0')
1469 return bidi::kind::NONE;
1470 else if (p[2] == '2')
1471 switch (p[3])
1472 {
1473 case 'a':
1474 case 'A':
1475 return bidi::kind::LRE;
1476 case 'b':
1477 case 'B':
1478 return bidi::kind::RLE;
1479 case 'c':
1480 case 'C':
1481 return bidi::kind::PDF;
1482 case 'd':
1483 case 'D':
1484 return bidi::kind::LRO;
1485 case 'e':
1486 case 'E':
1487 return bidi::kind::RLO;
1488 default:
1489 break;
1490 }
1491 else if (p[2] == '6')
1492 switch (p[3])
1493 {
1494 case '6':
1495 return bidi::kind::LRI;
1496 case '7':
1497 return bidi::kind::RLI;
1498 case '8':
1499 return bidi::kind::FSI;
1500 case '9':
1501 return bidi::kind::PDI;
1502 default:
1503 break;
1504 }
1505 else if (p[2] == '0')
1506 switch (p[3])
1507 {
1508 case 'e':
1509 case 'E':
1510 return bidi::kind::LTR;
1511 case 'f':
1512 case 'F':
1513 return bidi::kind::RTL;
1514 default:
1515 break;
1516 }
1517
1518 return bidi::kind::NONE;
1519}
1520
1521/* Parse a UCN where P points just past \u or \U and return its bidi code.
1522 If the kind is not NONE, write the location to *OUT. */
1523
1524static bidi::kind
1525get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
1526 location_t *out)
1527{
1528 const unsigned char *end;
1529 bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
1530 if (result != bidi::kind::NONE)
1531 {
1532 const unsigned char *start = p - 2;
1533 size_t num_bytes = end - start;
1534 *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
1535 }
1536 return result;
1537}
1538
1539/* Parse a named universal character escape where P points just past \N and
1540 return its bidi code. If the kind is not NONE, write the location to
1541 *OUT. */
1542
1543static bidi::kind
1544get_bidi_named (cpp_reader *pfile, const unsigned char *p, location_t *out)
1545{
1546 bidi::kind result = bidi::kind::NONE;
1547 if (*p != '{')
1548 return bidi::kind::NONE;
1549 if (strncmp ((const char *) (p + 1), "LEFT-TO-RIGHT ", 14) == 0)
1550 {
1551 if (strncmp ((const char *) (p + 15), "MARK}", 5) == 0)
1552 result = bidi::kind::LTR;
1553 else if (strncmp ((const char *) (p + 15), "EMBEDDING}", 10) == 0)
1554 result = bidi::kind::LRE;
1555 else if (strncmp ((const char *) (p + 15), "OVERRIDE}", 9) == 0)
1556 result = bidi::kind::LRO;
1557 else if (strncmp ((const char *) (p + 15), "ISOLATE}", 8) == 0)
1558 result = bidi::kind::LRI;
1559 }
1560 else if (strncmp ((const char *) (p + 1), "RIGHT-TO-LEFT ", 14) == 0)
1561 {
1562 if (strncmp ((const char *) (p + 15), "MARK}", 5) == 0)
1563 result = bidi::kind::RTL;
1564 else if (strncmp ((const char *) (p + 15), "EMBEDDING}", 10) == 0)
1565 result = bidi::kind::RLE;
1566 else if (strncmp ((const char *) (p + 15), "OVERRIDE}", 9) == 0)
1567 result = bidi::kind::RLO;
1568 else if (strncmp ((const char *) (p + 15), "ISOLATE}", 8) == 0)
1569 result = bidi::kind::RLI;
1570 }
1571 else if (strncmp ((const char *) (p + 1), "POP DIRECTIONAL ", 16) == 0)
1572 {
1573 if (strncmp ((const char *) (p + 16), "FORMATTING}", 11) == 0)
1574 result = bidi::kind::PDF;
1575 else if (strncmp ((const char *) (p + 16), "ISOLATE}", 8) == 0)
1576 result = bidi::kind::PDI;
1577 }
1578 else if (strncmp ((const char *) (p + 1), "FIRST STRONG ISOLATE}", 21) == 0)
1579 result = bidi::kind::FSI;
1580 if (result != bidi::kind::NONE)
1581 *out = get_location_for_byte_range_in_cur_line (pfile, p - 2,
1582 (strchr ((const char *)
1583 (p + 1), '}')
1584 - (const char *) p)
1585 + 3);
1586 return result;
1587}
1588
1589/* Subclass of rich_location for reporting on unpaired UTF-8
1590 bidirectional control character(s).
1591 Escape the source lines on output, and show all unclosed
1592 bidi context, labelling everything. */
1593
1594class unpaired_bidi_rich_location : public rich_location
1595{
1596 public:
1597 class custom_range_label : public range_label
1598 {
1599 public:
1600 label_text get_text (unsigned range_idx) const final override
1601 {
1602 /* range 0 is the primary location; each subsequent range i + 1
1603 is for bidi::vec[i]. */
1604 if (range_idx > 0)
1605 {
1606 const bidi::context &ctxt (bidi::vec[range_idx - 1]);
1607 return label_text::borrow (bidi::to_str (ctxt.m_kind));
1608 }
1609 else
1610 return label_text::borrow (_("end of bidirectional context")("end of bidirectional context"));
1611 }
1612 };
1613
1614 unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
1615 : rich_location (pfile->line_table, loc, &m_custom_label)
1616 {
1617 set_escape_on_output (true);
1618 for (unsigned i = 0; i < bidi::vec.count (); i++)
1619 add_range (bidi::vec[i].m_loc,
1620 SHOW_RANGE_WITHOUT_CARET,
1621 &m_custom_label);
1622 }
1623
1624 private:
1625 custom_range_label m_custom_label;
1626};
1627
1628/* We're closing a bidi context, that is, we've encountered a newline,
1629 are closing a C-style comment, or are at the end of a string literal,
1630 character constant, or identifier. Warn if this context was not
1631 properly terminated by a PDI or PDF. P points to the last character
1632 in this context. */
1633
1634static void
1635maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
1636{
1637 const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional)((pfile)->opts.cpp_warn_bidirectional);
1638 if (bidi::vec.count () > 0
1639 && (warn_bidi & bidirectional_unpaired
1640 && (!bidi::current_ctx_ucn_p ()
1641 || (warn_bidi & bidirectional_ucn))))
1642 {
1643 const location_t loc
1644 = linemap_position_for_column (pfile->line_table,
1645 CPP_BUF_COLUMN (pfile->buffer, p)((p) - (pfile->buffer)->line_base));
1646 unpaired_bidi_rich_location rich_loc (pfile, loc);
1647 /* cpp_callbacks doesn't yet have a way to handle singular vs plural
1648 forms of a diagnostic, so fake it for now. */
1649 if (bidi::vec.count () > 1)
1650 cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1651 "unpaired UTF-8 bidirectional control characters "
1652 "detected");
1653 else
1654 cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1655 "unpaired UTF-8 bidirectional control character "
1656 "detected");
1657 }
1658 /* We're done with this context. */
1659 bidi::on_close ();
1660}
1661
1662/* We're at the beginning or in the middle of an identifier/comment/string
1663 literal/character constant. Warn if we've encountered a bidi character.
1664 KIND says which bidi control character it was; UCN_P is true iff this bidi
1665 control character was written as a UCN. LOC is the location of the
1666 character, but is only valid if KIND != bidi::kind::NONE. */
1667
1668static void
1669maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
1670 bool ucn_p, location_t loc)
1671{
1672 if (__builtin_expect (kind == bidi::kind::NONE, 1))
1673 return;
1674
1675 const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional)((pfile)->opts.cpp_warn_bidirectional);
1676
1677 if (warn_bidi & (bidirectional_unpaired|bidirectional_any))
1678 {
1679 rich_location rich_loc (pfile->line_table, loc);
1680 rich_loc.set_escape_on_output (true);
1681
1682 /* It seems excessive to warn about a PDI/PDF that is closing
1683 an opened context because we've already warned about the
1684 opening character. Except warn when we have a UCN x UTF-8
1685 mismatch, if UCN checking is enabled. */
1686 if (kind == bidi::current_ctx ())
1687 {
1688 if (warn_bidi == (bidirectional_unpaired|bidirectional_ucn)
1689 && bidi::current_ctx_ucn_p () != ucn_p)
1690 {
1691 rich_loc.add_range (bidi::current_ctx_loc ());
1692 cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1693 "UTF-8 vs UCN mismatch when closing "
1694 "a context by \"%s\"", bidi::to_str (kind));
1695 }
1696 }
1697 else if (warn_bidi & bidirectional_any
1698 && (!ucn_p || (warn_bidi & bidirectional_ucn)))
1699 {
1700 if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
1701 cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1702 "\"%s\" is closing an unopened context",
1703 bidi::to_str (kind));
1704 else
1705 cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1706 "found problematic Unicode character \"%s\"",
1707 bidi::to_str (kind));
1708 }
1709 }
1710 /* We're done with this context. */
1711 bidi::on_char (kind, ucn_p, loc);
1712}
1713
1714static const cppchar_t utf8_continuation = 0x80;
1715static const cppchar_t utf8_signifier = 0xC0;
1716
1717/* Emit -Winvalid-utf8 warning on invalid UTF-8 character starting
1718 at PFILE->buffer->cur. Return a pointer after the diagnosed
1719 invalid character. */
1720
1721static const uchar *
1722_cpp_warn_invalid_utf8 (cpp_reader *pfile)
1723{
1724 cpp_buffer *buffer = pfile->buffer;
1725 const uchar *cur = buffer->cur;
1726 bool pedantic = (CPP_PEDANTIC (pfile)((pfile)->opts.cpp_pedantic)
1727 && CPP_OPTION (pfile, cpp_warn_invalid_utf8)((pfile)->opts.cpp_warn_invalid_utf8) == 2);
1728
1729 if (cur[0] < utf8_signifier
1730 || cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
1731 {
1732 if (pedantic)
1733 cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1734 pfile->line_table->highest_line,
1735 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1736 "invalid UTF-8 character <%x>",
1737 cur[0]);
1738 else
1739 cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
1740 pfile->line_table->highest_line,
1741 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1742 "invalid UTF-8 character <%x>",
1743 cur[0]);
1744 return cur + 1;
1745 }
1746 else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)
1747 {
1748 if (pedantic)
1749 cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1750 pfile->line_table->highest_line,
1751 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1752 "invalid UTF-8 character <%x><%x>",
1753 cur[0], cur[1]);
1754 else
1755 cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
1756 pfile->line_table->highest_line,
1757 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1758 "invalid UTF-8 character <%x><%x>",
1759 cur[0], cur[1]);
1760 return cur + 2;
1761 }
1762 else if (cur[3] < utf8_continuation || cur[3] >= utf8_signifier)
1763 {
1764 if (pedantic)
1765 cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1766 pfile->line_table->highest_line,
1767 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1768 "invalid UTF-8 character <%x><%x><%x>",
1769 cur[0], cur[1], cur[2]);
1770 else
1771 cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
1772 pfile->line_table->highest_line,
1773 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1774 "invalid UTF-8 character <%x><%x><%x>",
1775 cur[0], cur[1], cur[2]);
1776 return cur + 3;
1777 }
1778 else
1779 {
1780 if (pedantic)
1781 cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1782 pfile->line_table->highest_line,
1783 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1784 "invalid UTF-8 character <%x><%x><%x><%x>",
1785 cur[0], cur[1], cur[2], cur[3]);
1786 else
1787 cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
1788 pfile->line_table->highest_line,
1789 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1790 "invalid UTF-8 character <%x><%x><%x><%x>",
1791 cur[0], cur[1], cur[2], cur[3]);
1792 return cur + 4;
1793 }
1794}
1795
1796/* Helper function of *skip_*_comment and lex*_string. For C,
1797 character at CUR[-1] with MSB set handle -Wbidi-chars* and
1798 -Winvalid-utf8 diagnostics and return pointer to first character
1799 that should be processed next. */
1800
1801static inline const uchar *
1802_cpp_handle_multibyte_utf8 (cpp_reader *pfile, uchar c,
1803 const uchar *cur, bool warn_bidi_p,
1804 bool warn_invalid_utf8_p)
1805{
1806 /* If this is a beginning of a UTF-8 encoding, it might be
1807 a bidirectional control character. */
1808 if (c == bidi::utf8_start && warn_bidi_p)
1809 {
1810 location_t loc;
1811 bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
1812 maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1813 }
1814 if (!warn_invalid_utf8_p)
1815 return cur;
1816 if (c >= utf8_signifier)
1817 {
1818 cppchar_t s;
1819 const uchar *pstr = cur - 1;
1820 if (_cpp_valid_utf8 (pfile, &pstr, pfile->buffer->rlimit, 0, NULL__null, &s)
1821 && s <= UCS_LIMIT0x10FFFF)
1822 return pstr;
1823 }
1824 pfile->buffer->cur = cur - 1;
1825 return _cpp_warn_invalid_utf8 (pfile);
1826}
1827
1828/* Skip a C-style block comment. We find the end of the comment by
1829 seeing if an asterisk is before every '/' we encounter. Returns
1830 nonzero if comment terminated by EOF, zero otherwise.
1831
1832 Buffer->cur points to the initial asterisk of the comment. */
1833bool
1834_cpp_skip_block_comment (cpp_reader *pfile)
1835{
1836 cpp_buffer *buffer = pfile->buffer;
1837 const uchar *cur = buffer->cur;
1838 uchar c;
1839 const bool warn_bidi_p = pfile->warn_bidi_p ();
1840 const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8)((pfile)->opts.cpp_warn_invalid_utf8);
1841 const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
1842
1843 cur++;
1844 if (*cur == '/')
1845 cur++;
1846
1847 for (;;)
1848 {
1849 /* People like decorating comments with '*', so check for '/'
1850 instead for efficiency. */
1851 c = *cur++;
1852
1853 if (c == '/')
1854 {
1855 if (cur[-2] == '*')
1856 {
1857 if (warn_bidi_p)
1858 maybe_warn_bidi_on_close (pfile, cur);
1859 break;
1860 }
1861
1862 /* Warn about potential nested comments, but not if the '/'
1863 comes immediately before the true comment delimiter.
1864 Don't bother to get it right across escaped newlines. */
1865 if (CPP_OPTION (pfile, warn_comments)((pfile)->opts.warn_comments)
1866 && cur[0] == '*' && cur[1] != '/')
1867 {
1868 buffer->cur = cur;
1869 cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1870 pfile->line_table->highest_line,
1871 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1872 "\"/*\" within comment");
1873 }
1874 }
1875 else if (c == '\n')
1876 {
1877 unsigned int cols;
1878 buffer->cur = cur - 1;
1879 if (warn_bidi_p)
1880 maybe_warn_bidi_on_close (pfile, cur);
1881 _cpp_process_line_notes (pfile, true);
1882 if (buffer->next_line >= buffer->rlimit)
1883 return true;
1884 _cpp_clean_line (pfile);
1885
1886 cols = buffer->next_line - buffer->line_base;
1887 CPP_INCREMENT_LINE (pfile, cols)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, cols); } while (0)
;
1888
1889 cur = buffer->cur;
1890 }
1891 else if (__builtin_expect (c >= utf8_continuation, 0)
1892 && warn_bidi_or_invalid_utf8_p)
1893 cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
1894 warn_invalid_utf8_p);
1895 }
1896
1897 buffer->cur = cur;
1898 _cpp_process_line_notes (pfile, true);
1899 return false;
1900}
1901
1902/* Skip a C++ line comment, leaving buffer->cur pointing to the
1903 terminating newline. Handles escaped newlines. Returns nonzero
1904 if a multiline comment. */
1905static int
1906skip_line_comment (cpp_reader *pfile)
1907{
1908 cpp_buffer *buffer = pfile->buffer;
1909 location_t orig_line = pfile->line_table->highest_line;
1910 const bool warn_bidi_p = pfile->warn_bidi_p ();
1911 const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8)((pfile)->opts.cpp_warn_invalid_utf8);
1912 const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
1913
1914 if (!warn_bidi_or_invalid_utf8_p)
1915 while (*buffer->cur != '\n')
1916 buffer->cur++;
1917 else if (!warn_invalid_utf8_p)
1918 {
1919 while (*buffer->cur != '\n'
1920 && *buffer->cur != bidi::utf8_start)
1921 buffer->cur++;
1922 if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
1923 {
1924 while (*buffer->cur != '\n')
1925 {
1926 if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
1927 {
1928 location_t loc;
1929 bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
1930 maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1931 }
1932 buffer->cur++;
1933 }
1934 maybe_warn_bidi_on_close (pfile, buffer->cur);
1935 }
1936 }
1937 else
1938 {
1939 while (*buffer->cur != '\n')
1940 {
1941 if (*buffer->cur < utf8_continuation)
1942 {
1943 buffer->cur++;
1944 continue;
1945 }
1946 buffer->cur
1947 = _cpp_handle_multibyte_utf8 (pfile, *buffer->cur, buffer->cur + 1,
1948 warn_bidi_p, warn_invalid_utf8_p);
1949 }
1950 if (warn_bidi_p)
1951 maybe_warn_bidi_on_close (pfile, buffer->cur);
1952 }
1953
1954 _cpp_process_line_notes (pfile, true);
1955 return orig_line != pfile->line_table->highest_line;
1956}
1957
1958/* Skips whitespace, saving the next non-whitespace character. */
1959static void
1960skip_whitespace (cpp_reader *pfile, cppchar_t c)
1961{
1962 cpp_buffer *buffer = pfile->buffer;
1963 bool saw_NUL = false;
1964
1965 do
1966 {
1967 /* Horizontal space always OK. */
1968 if (c == ' ' || c == '\t')
1969 ;
1970 /* Just \f \v or \0 left. */
1971 else if (c == '\0')
1972 saw_NUL = true;
1973 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)((pfile)->opts.cpp_pedantic))
1974 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1975 CPP_BUF_COL (buffer)(((buffer)->cur) - (buffer)->line_base),
1976 "%s in preprocessing directive",
1977 c == '\f' ? "form feed" : "vertical tab");
1978
1979 c = *buffer->cur++;
1980 }
1981 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1982 while (is_nvspace (c)(_sch_istable[(c) & 0xff] & (unsigned short)(_sch_isnvsp
))
);
1983
1984 if (saw_NUL)
1985 {
1986 encoding_rich_location rich_loc (pfile);
1987 cpp_error_at (pfile, CPP_DL_WARNING, &rich_loc,
1988 "null character(s) ignored");
1989 }
1990
1991 buffer->cur--;
1992}
1993
1994/* See if the characters of a number token are valid in a name (no
1995 '.', '+' or '-'). */
1996static int
1997name_p (cpp_reader *pfile, const cpp_string *string)
1998{
1999 unsigned int i;
2000
2001 for (i = 0; i < string->len; i++)
2002 if (!is_idchar (string->text[i])((_sch_istable[(string->text[i]) & 0xff] & (unsigned
short)(_sch_isidnum)) || ((string->text[i]) == '$' &&
((pfile)->opts.dollars_in_ident)))
)
2003 return 0;
2004
2005 return 1;
2006}
2007
2008/* After parsing an identifier or other sequence, produce a warning about
2009 sequences not in NFC/NFKC. */
2010static void
2011warn_about_normalization (cpp_reader *pfile,
2012 const cpp_token *token,
2013 const struct normalize_state *s,
2014 bool identifier)
2015{
2016 if (CPP_OPTION (pfile, warn_normalize)((pfile)->opts.warn_normalize) < NORMALIZE_STATE_RESULT (s)((s)->level)
2017 && !pfile->state.skipping)
2018 {
2019 location_t loc = token->src_loc;
2020
2021 /* If possible, create a location range for the token. */
2022 if (loc >= RESERVED_LOCATION_COUNT
2023 && token->type != CPP_EOF
2024 /* There must be no line notes to process. */
2025 && (!(pfile->buffer->cur
2026 >= pfile->buffer->notes[pfile->buffer->cur_note].pos
2027 && !pfile->overlaid_buffer)))
2028 {
2029 source_range tok_range;
2030 tok_range.m_start = loc;
2031 tok_range.m_finish
2032 = linemap_position_for_column (pfile->line_table,
2033 CPP_BUF_COLUMN (pfile->buffer,((pfile->buffer->cur) - (pfile->buffer)->line_base
)
2034 pfile->buffer->cur)((pfile->buffer->cur) - (pfile->buffer)->line_base
)
);
2035 loc = COMBINE_LOCATION_DATA (pfile->line_table,
2036 loc, tok_range, NULL__null, 0);
2037 }
2038
2039 encoding_rich_location rich_loc (pfile, loc);
2040
2041 /* Make sure that the token is printed using UCNs, even
2042 if we'd otherwise happily print UTF-8. */
2043 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token))((unsigned char *) xmalloc (sizeof (unsigned char) * (cpp_token_len
(token))))
;
2044 size_t sz;
2045
2046 sz = cpp_spell_token (pfile, token, buf, false) - buf;
2047 if (NORMALIZE_STATE_RESULT (s)((s)->level) == normalized_C)
2048 cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
2049 "`%.*s' is not in NFKC", (int) sz, buf);
2050 else if (identifier && CPP_OPTION (pfile, xid_identifiers)((pfile)->opts.xid_identifiers))
2051 cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
2052 "`%.*s' is not in NFC", (int) sz, buf);
2053 else
2054 cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
2055 "`%.*s' is not in NFC", (int) sz, buf);
2056 free (buf);
2057 }
2058}
2059
2060/* Returns TRUE if the sequence starting at buffer->cur is valid in
2061 an identifier. FIRST is TRUE if this starts an identifier. */
2062
2063static bool
2064forms_identifier_p (cpp_reader *pfile, int first,
2065 struct normalize_state *state)
2066{
2067 cpp_buffer *buffer = pfile->buffer;
2068 const bool warn_bidi_p = pfile->warn_bidi_p ();
2069
2070 if (*buffer->cur == '$')
8
Assuming the condition is false
9
Taking false branch
2071 {
2072 if (!CPP_OPTION (pfile, dollars_in_ident)((pfile)->opts.dollars_in_ident))
2073 return false;
2074
2075 buffer->cur++;
2076 if (CPP_OPTION (pfile, warn_dollars)((pfile)->opts.warn_dollars) && !pfile->state.skipping)
2077 {
2078 CPP_OPTION (pfile, warn_dollars)((pfile)->opts.warn_dollars) = 0;
2079 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
2080 }
2081
2082 return true;
2083 }
2084
2085 /* Is this a syntactically valid UCN or a valid UTF-8 char? */
2086 if (CPP_OPTION (pfile, extended_identifiers)((pfile)->opts.extended_identifiers))
10
Assuming field 'extended_identifiers' is not equal to 0
11
Taking true branch
2087 {
2088 cppchar_t s;
2089 if (*buffer->cur >= utf8_signifier)
12
Assuming the condition is true
2090 {
2091 if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
13
Assuming the condition is true
15
Taking true branch
2092 && warn_bidi_p)
14
Assuming 'warn_bidi_p' is true
2093 {
2094 location_t loc;
16
'loc' declared without an initial value
2095 bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
17
Calling 'get_bidi_utf8'
20
Returning from 'get_bidi_utf8'
2096 maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
21
4th function call argument is an uninitialized value
2097 }
2098 if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
2099 state, &s))
2100 return true;
2101 }
2102 else if (*buffer->cur == '\\'
2103 && (buffer->cur[1] == 'u'
2104 || buffer->cur[1] == 'U'
2105 || buffer->cur[1] == 'N'))
2106 {
2107 buffer->cur += 2;
2108 if (warn_bidi_p)
2109 {
2110 location_t loc;
2111 bidi::kind kind;
2112 if (buffer->cur[-1] == 'N')
2113 kind = get_bidi_named (pfile, buffer->cur, &loc);
2114 else
2115 kind = get_bidi_ucn (pfile, buffer->cur,
2116 buffer->cur[-1] == 'U', &loc);
2117 maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
2118 }
2119 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
2120 state, &s, NULL__null, NULL__null))
2121 return true;
2122 buffer->cur -= 2;
2123 }
2124 }
2125
2126 return false;
2127}
2128
2129/* Helper function to issue error about improper __VA_OPT__ use. */
2130static void
2131maybe_va_opt_error (cpp_reader *pfile)
2132{
2133 if (CPP_PEDANTIC (pfile)((pfile)->opts.cpp_pedantic) && !CPP_OPTION (pfile, va_opt)((pfile)->opts.va_opt))
2134 {
2135 /* __VA_OPT__ should not be accepted at all, but allow it in
2136 system headers. */
2137 if (!_cpp_in_system_header (pfile))
2138 {
2139 if (CPP_OPTION (pfile, cplusplus)((pfile)->opts.cplusplus))
2140 cpp_error (pfile, CPP_DL_PEDWARN,
2141 "__VA_OPT__ is not available until C++20");
2142 else
2143 cpp_error (pfile, CPP_DL_PEDWARN,
2144 "__VA_OPT__ is not available until C2X");
2145 }
2146 }
2147 else if (!pfile->state.va_args_ok)
2148 {
2149 /* __VA_OPT__ should only appear in the replacement list of a
2150 variadic macro. */
2151 cpp_error (pfile, CPP_DL_PEDWARN,
2152 "__VA_OPT__ can only appear in the expansion"
2153 " of a C++20 variadic macro");
2154 }
2155}
2156
2157/* Helper function to get the cpp_hashnode of the identifier BASE. */
2158static cpp_hashnode *
2159lex_identifier_intern (cpp_reader *pfile, const uchar *base)
2160{
2161 cpp_hashnode *result;
2162 const uchar *cur;
2163 unsigned int len;
2164 unsigned int hash = HT_HASHSTEP (0, *base)((0) * 67 + ((*base) - 113));;
2165
2166 cur = base + 1;
2167 while (ISIDNUM (*cur)(_sch_istable[(*cur) & 0xff] & (unsigned short)(_sch_isidnum
))
)
2168 {
2169 hash = HT_HASHSTEP (hash, *cur)((hash) * 67 + ((*cur) - 113));;
2170 cur++;
2171 }
2172 len = cur - base;
2173 hash = HT_HASHFINISH (hash, len)((hash) + (len));
2174 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,((cpp_hashnode *) (ht_lookup_with_hash (pfile->hash_table,
base, len, hash, HT_ALLOC)))
2175 base, len, hash, HT_ALLOC))((cpp_hashnode *) (ht_lookup_with_hash (pfile->hash_table,
base, len, hash, HT_ALLOC)))
;
2176
2177 /* Rarely, identifiers require diagnostics when lexed. */
2178 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC(1 << 2))
2179 && !pfile->state.skipping, 0))
2180 {
2181 /* It is allowed to poison the same identifier twice. */
2182 if ((result->flags & NODE_POISONED(1 << 1)) && !pfile->state.poisoned_ok)
2183 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2184 NODE_NAME (result)(((&(result)->ident))->str));
2185
2186 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
2187 replacement list of a variadic macro. */
2188 if (result == pfile->spec_nodes.n__VA_ARGS__
2189 && !pfile->state.va_args_ok)
2190 {
2191 if (CPP_OPTION (pfile, cplusplus)((pfile)->opts.cplusplus))
2192 cpp_error (pfile, CPP_DL_PEDWARN,
2193 "__VA_ARGS__ can only appear in the expansion"
2194 " of a C++11 variadic macro");
2195 else
2196 cpp_error (pfile, CPP_DL_PEDWARN,
2197 "__VA_ARGS__ can only appear in the expansion"
2198 " of a C99 variadic macro");
2199 }
2200
2201 if (result == pfile->spec_nodes.n__VA_OPT__)
2202 maybe_va_opt_error (pfile);
2203
2204 /* For -Wc++-compat, warn about use of C++ named operators. */
2205 if (result->flags & NODE_WARN_OPERATOR(1 << 7))
2206 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
2207 "identifier \"%s\" is a special operator name in C++",
2208 NODE_NAME (result)(((&(result)->ident))->str));
2209 }
2210
2211 return result;
2212}
2213
2214/* Get the cpp_hashnode of an identifier specified by NAME in
2215 the current cpp_reader object. If none is found, NULL is returned. */
2216cpp_hashnode *
2217_cpp_lex_identifier (cpp_reader *pfile, const char *name)
2218{
2219 cpp_hashnode *result;
2220 result = lex_identifier_intern (pfile, (uchar *) name);
2221 return result;
2222}
2223
2224/* Lex an identifier starting at BUFFER->CUR - 1. */
2225static cpp_hashnode *
2226lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
2227 struct normalize_state *nst, cpp_hashnode **spelling)
2228{
2229 cpp_hashnode *result;
2230 const uchar *cur;
2231 unsigned int len;
2232 unsigned int hash = HT_HASHSTEP (0, *base)((0) * 67 + ((*base) - 113));;
2233 const bool warn_bidi_p = pfile->warn_bidi_p ();
2234
2235 cur = pfile->buffer->cur;
2236 if (! starts_ucn)
2237 {
2238 while (ISIDNUM (*cur)(_sch_istable[(*cur) & 0xff] & (unsigned short)(_sch_isidnum
))
)
2239 {
2240 hash = HT_HASHSTEP (hash, *cur)((hash) * 67 + ((*cur) - 113));;
2241 cur++;
2242 }
2243 NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1))((nst)->previous = (*(cur - 1)), (nst)->prev_class = 0);
2244 }
2245 pfile->buffer->cur = cur;
2246 if (starts_ucn || forms_identifier_p (pfile, false, nst))
2247 {
2248 /* Slower version for identifiers containing UCNs
2249 or extended chars (including $). */
2250 do {
2251 while (ISIDNUM (*pfile->buffer->cur)(_sch_istable[(*pfile->buffer->cur) & 0xff] & (
unsigned short)(_sch_isidnum))
)
2252 {
2253 NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur)((nst)->previous = (*pfile->buffer->cur), (nst)->
prev_class = 0)
;
2254 pfile->buffer->cur++;
2255 }
2256 } while (forms_identifier_p (pfile, false, nst));
2257 if (warn_bidi_p)
2258 maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
2259 result = _cpp_interpret_identifier (pfile, base,
2260 pfile->buffer->cur - base);
2261 *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
2262 }
2263 else
2264 {
2265 len = cur - base;
2266 hash = HT_HASHFINISH (hash, len)((hash) + (len));
2267
2268 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,((cpp_hashnode *) (ht_lookup_with_hash (pfile->hash_table,
base, len, hash, HT_ALLOC)))
2269 base, len, hash, HT_ALLOC))((cpp_hashnode *) (ht_lookup_with_hash (pfile->hash_table,
base, len, hash, HT_ALLOC)))
;
2270 *spelling = result;
2271 }
2272
2273 /* Rarely, identifiers require diagnostics when lexed. */
2274 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC(1 << 2))
2275 && !pfile->state.skipping, 0))
2276 {
2277 /* It is allowed to poison the same identifier twice. */
2278 if ((result->flags & NODE_POISONED(1 << 1)) && !pfile->state.poisoned_ok)
2279 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2280 NODE_NAME (result)(((&(result)->ident))->str));
2281
2282 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
2283 replacement list of a variadic macro. */
2284 if (result == pfile->spec_nodes.n__VA_ARGS__
2285 && !pfile->state.va_args_ok)
2286 {
2287 if (CPP_OPTION (pfile, cplusplus)((pfile)->opts.cplusplus))
2288 cpp_error (pfile, CPP_DL_PEDWARN,
2289 "__VA_ARGS__ can only appear in the expansion"
2290 " of a C++11 variadic macro");
2291 else
2292 cpp_error (pfile, CPP_DL_PEDWARN,
2293 "__VA_ARGS__ can only appear in the expansion"
2294 " of a C99 variadic macro");
2295 }
2296
2297 /* __VA_OPT__ should only appear in the replacement list of a
2298 variadic macro. */
2299 if (result == pfile->spec_nodes.n__VA_OPT__)
2300 maybe_va_opt_error (pfile);
2301
2302 /* For -Wc++-compat, warn about use of C++ named operators. */
2303 if (result->flags & NODE_WARN_OPERATOR(1 << 7))
2304 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
2305 "identifier \"%s\" is a special operator name in C++",
2306 NODE_NAME (result)(((&(result)->ident))->str));
2307 }
2308
2309 return result;
2310}
2311
2312/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
2313static void
2314lex_number (cpp_reader *pfile, cpp_string *number,
2315 struct normalize_state *nst)
2316{
2317 const uchar *cur;
2318 const uchar *base;
2319 uchar *dest;
2320
2321 base = pfile->buffer->cur - 1;
2322 do
2323 {
2324 const uchar *adj_digit_sep = NULL__null;
2325 cur = pfile->buffer->cur;
2326
2327 /* N.B. ISIDNUM does not include $. */
2328 while (ISIDNUM (*cur)(_sch_istable[(*cur) & 0xff] & (unsigned short)(_sch_isidnum
))
2329 || (*cur == '.' && !DIGIT_SEP (cur[-1])((cur[-1]) == '\'' && ((pfile)->opts.digit_separators
))
)
2330 || DIGIT_SEP (*cur)((*cur) == '\'' && ((pfile)->opts.digit_separators
))
2331 || (VALID_SIGN (*cur, cur[-1])(((*cur) == '+' || (*cur) == '-') && ((cur[-1]) == 'e'
|| (cur[-1]) == 'E' || (((cur[-1]) == 'p' || (cur[-1]) == 'P'
) && ((pfile)->opts.extended_numbers))))
&& !DIGIT_SEP (cur[-2])((cur[-2]) == '\'' && ((pfile)->opts.digit_separators
))
))
2332 {
2333 NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur)((nst)->previous = (*cur), (nst)->prev_class = 0);
2334 /* Adjacent digit separators do not form part of the pp-number syntax.
2335 However, they can safely be diagnosed here as an error, since '' is
2336 not a valid preprocessing token. */
2337 if (DIGIT_SEP (*cur)((*cur) == '\'' && ((pfile)->opts.digit_separators
))
&& DIGIT_SEP (cur[-1])((cur[-1]) == '\'' && ((pfile)->opts.digit_separators
))
&& !adj_digit_sep)
2338 adj_digit_sep = cur;
2339 cur++;
2340 }
2341 /* A number can't end with a digit separator. */
2342 while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1])((cur[-1]) == '\'' && ((pfile)->opts.digit_separators
))
)
2343 --cur;
2344 if (adj_digit_sep && adj_digit_sep < cur)
2345 cpp_error (pfile, CPP_DL_ERROR, "adjacent digit separators");
2346
2347 pfile->buffer->cur = cur;
2348 }
2349 while (forms_identifier_p (pfile, false, nst));
2350
2351 number->len = cur - base;
2352 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
2353 memcpy (dest, base, number->len);
2354 dest[number->len] = '\0';
2355 number->text = dest;
2356}
2357
2358/* Create a token of type TYPE with a literal spelling. */
2359static void
2360create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
2361 unsigned int len, enum cpp_ttype type)
2362{
2363 token->type = type;
2364 token->val.str.len = len;
2365 token->val.str.text = cpp_alloc_token_string (pfile, base, len);
2366}
2367
2368const uchar *
2369cpp_alloc_token_string (cpp_reader *pfile,
2370 const unsigned char *ptr, unsigned len)
2371{
2372 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
2373
2374 dest[len] = 0;
2375 memcpy (dest, ptr, len);
2376 return dest;
2377}
2378
2379/* A pair of raw buffer pointers. The currently open one is [1], the
2380 first one is [0]. Used for string literal lexing. */
2381struct lit_accum {
2382 _cpp_buff *first;
2383 _cpp_buff *last;
2384 const uchar *rpos;
2385 size_t accum;
2386
2387 lit_accum ()
2388 : first (NULL__null), last (NULL__null), rpos (0), accum (0)
2389 {
2390 }
2391
2392 void append (cpp_reader *, const uchar *, size_t);
2393
2394 void read_begin (cpp_reader *);
2395 bool reading_p () const
2396 {
2397 return rpos != NULL__null;
2398 }
2399 char read_char ()
2400 {
2401 char c = *rpos++;
2402 if (rpos == BUFF_FRONT (last)((last)->cur))
2403 rpos = NULL__null;
2404 return c;
2405 }
2406};
2407
2408/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
2409 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
2410
2411void
2412lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len)
2413{
2414 if (!last)
2415 /* Starting. */
2416 first = last = _cpp_get_buff (pfile, len);
2417 else if (len > BUFF_ROOM (last)(size_t) ((last)->limit - (last)->cur))
2418 {
2419 /* There is insufficient room in the buffer. Copy what we can,
2420 and then either extend or create a new one. */
2421 size_t room = BUFF_ROOM (last)(size_t) ((last)->limit - (last)->cur);
2422 memcpy (BUFF_FRONT (last)((last)->cur), base, room);
2423 BUFF_FRONT (last)((last)->cur) += room;
2424 base += room;
2425 len -= room;
2426 accum += room;
2427
2428 gcc_checking_assert (!rpos)((void)(!(!rpos) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 2428, __FUNCTION__), 0 : 0))
;
2429
2430 last = _cpp_append_extend_buff (pfile, last, len);
2431 }
2432
2433 memcpy (BUFF_FRONT (last)((last)->cur), base, len);
2434 BUFF_FRONT (last)((last)->cur) += len;
2435 accum += len;
2436}
2437
2438void
2439lit_accum::read_begin (cpp_reader *pfile)
2440{
2441 /* We never accumulate more than 4 chars to read. */
2442 if (BUFF_ROOM (last)(size_t) ((last)->limit - (last)->cur) < 4)
2443
2444 last = _cpp_append_extend_buff (pfile, last, 4);
2445 rpos = BUFF_FRONT (last)((last)->cur);
2446}
2447
2448/* Returns true if a macro has been defined.
2449 This might not work if compile with -save-temps,
2450 or preprocess separately from compilation. */
2451
2452static bool
2453is_macro(cpp_reader *pfile, const uchar *base)
2454{
2455 const uchar *cur = base;
2456 if (! ISIDST (*cur)(_sch_istable[(*cur) & 0xff] & (unsigned short)(_sch_isidst
))
)
2457 return false;
2458 unsigned int hash = HT_HASHSTEP (0, *cur)((0) * 67 + ((*cur) - 113));;
2459 ++cur;
2460 while (ISIDNUM (*cur)(_sch_istable[(*cur) & 0xff] & (unsigned short)(_sch_isidnum
))
)
2461 {
2462 hash = HT_HASHSTEP (hash, *cur)((hash) * 67 + ((*cur) - 113));;
2463 ++cur;
2464 }
2465 hash = HT_HASHFINISH (hash, cur - base)((hash) + (cur - base));
2466
2467 cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,((cpp_hashnode *) (ht_lookup_with_hash (pfile->hash_table,
base, cur - base, hash, HT_NO_INSERT)))
2468 base, cur - base, hash, HT_NO_INSERT))((cpp_hashnode *) (ht_lookup_with_hash (pfile->hash_table,
base, cur - base, hash, HT_NO_INSERT)))
;
2469
2470 return result && cpp_macro_p (result);
2471}
2472
2473/* Returns true if a literal suffix does not have the expected form
2474 and is defined as a macro. */
2475
2476static bool
2477is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
2478{
2479 /* User-defined literals outside of namespace std must start with a single
2480 underscore, so assume anything of that form really is a UDL suffix.
2481 We don't need to worry about UDLs defined inside namespace std because
2482 their names are reserved, so cannot be used as macro names in valid
2483 programs. */
2484 if (base[0] == '_' && base[1] != '_')
2485 return false;
2486 return is_macro (pfile, base);
2487}
2488
2489/* Lexes a raw string. The stored string contains the spelling,
2490 including double quotes, delimiter string, '(' and ')', any leading
2491 'L', 'u', 'U' or 'u8' and 'R' modifier. The created token contains
2492 the type of the literal, or CPP_OTHER if it was not properly
2493 terminated.
2494
2495 BASE is the start of the token. Updates pfile->buffer->cur to just
2496 after the lexed string.
2497
2498 The spelling is NUL-terminated, but it is not guaranteed that this
2499 is the first NUL since embedded NULs are preserved. */
2500
2501static void
2502lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
2503{
2504 const uchar *pos = base;
2505 const bool warn_bidi_p = pfile->warn_bidi_p ();
2506 const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8)((pfile)->opts.cpp_warn_invalid_utf8);
2507 const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
2508
2509 /* 'tis a pity this information isn't passed down from the lexer's
2510 initial categorization of the token. */
2511 enum cpp_ttype type = CPP_STRING;
2512
2513 if (*pos == 'L')
2514 {
2515 type = CPP_WSTRING;
2516 pos++;
2517 }
2518 else if (*pos == 'U')
2519 {
2520 type = CPP_STRING32;
2521 pos++;
2522 }
2523 else if (*pos == 'u')
2524 {
2525 if (pos[1] == '8')
2526 {
2527 type = CPP_UTF8STRING;
2528 pos++;
2529 }
2530 else
2531 type = CPP_STRING16;
2532 pos++;
2533 }
2534
2535 gcc_checking_assert (pos[0] == 'R' && pos[1] == '"')((void)(!(pos[0] == 'R' && pos[1] == '"') ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 2535, __FUNCTION__), 0 : 0))
;
2536 pos += 2;
2537
2538 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
2539
2540 /* Skip notes before the ". */
2541 while (note->pos < pos)
2542 ++note;
2543
2544 lit_accum accum;
2545
2546 uchar prefix[17];
2547 unsigned prefix_len = 0;
2548 enum Phase
2549 {
2550 PHASE_PREFIX = -2,
2551 PHASE_NONE = -1,
2552 PHASE_SUFFIX = 0
2553 } phase = PHASE_PREFIX;
2554
2555 for (;;)
2556 {
2557 gcc_checking_assert (note->pos >= pos)((void)(!(note->pos >= pos) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 2557, __FUNCTION__), 0 : 0))
;
2558
2559 /* Undo any escaped newlines and trigraphs. */
2560 if (!accum.reading_p () && note->pos == pos)
2561 switch (note->type)
2562 {
2563 case '\\':
2564 case ' ':
2565 /* Restore backslash followed by newline. */
2566 accum.append (pfile, base, pos - base);
2567 base = pos;
2568 accum.read_begin (pfile);
2569 accum.append (pfile, UC(const uchar *)"\\", 1);
2570
2571 after_backslash:
2572 if (note->type == ' ')
2573 /* GNU backslash whitespace newline extension. FIXME
2574 could be any sequence of non-vertical space. When we
2575 can properly restore any such sequence, we should
2576 mark this note as handled so _cpp_process_line_notes
2577 doesn't warn. */
2578 accum.append (pfile, UC(const uchar *)" ", 1);
2579
2580 accum.append (pfile, UC(const uchar *)"\n", 1);
2581 note++;
2582 break;
2583
2584 case '\n':
2585 /* This can happen for ??/<NEWLINE> when trigraphs are not
2586 being interpretted. */
2587 gcc_checking_assert (!CPP_OPTION (pfile, trigraphs))((void)(!(!((pfile)->opts.trigraphs)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 2587, __FUNCTION__), 0 : 0))
;
2588 note->type = 0;
2589 note++;
2590 break;
2591
2592 default:
2593 gcc_checking_assert (_cpp_trigraph_map[note->type])((void)(!(_cpp_trigraph_map[note->type]) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 2593, __FUNCTION__), 0 : 0))
;
2594
2595 /* Don't warn about this trigraph in
2596 _cpp_process_line_notes, since trigraphs show up as
2597 trigraphs in raw strings. */
2598 uchar type = note->type;
2599 note->type = 0;
2600
2601 if (CPP_OPTION (pfile, trigraphs)((pfile)->opts.trigraphs))
2602 {
2603 accum.append (pfile, base, pos - base);
2604 base = pos;
2605 accum.read_begin (pfile);
2606 accum.append (pfile, UC(const uchar *)"??", 2);
2607 accum.append (pfile, &type, 1);
2608
2609 /* ??/ followed by newline gets two line notes, one for
2610 the trigraph and one for the backslash/newline. */
2611 if (type == '/' && note[1].pos == pos)
2612 {
2613 note++;
2614 gcc_assert (note->type == '\\' || note->type == ' ')((void)(!(note->type == '\\' || note->type == ' ') ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 2614, __FUNCTION__), 0 : 0))
;
2615 goto after_backslash;
2616 }
2617 /* Skip the replacement character. */
2618 base = ++pos;
2619 }
2620
2621 note++;
2622 break;
2623 }
2624
2625 /* Now get a char to process. Either from an expanded note, or
2626 from the line buffer. */
2627 bool read_note = accum.reading_p ();
2628 char c = read_note ? accum.read_char () : *pos++;
2629
2630 if (phase == PHASE_PREFIX)
2631 {
2632 if (c == '(')
2633 {
2634 /* Done. */
2635 phase = PHASE_NONE;
2636 prefix[prefix_len++] = '"';
2637 }
2638 else if (prefix_len < 16
2639 /* Prefix chars are any of the basic character set,
2640 [lex.charset] except for '
2641 ()\\\t\v\f\n'. Optimized for a contiguous
2642 alphabet. */
2643 /* Unlike a switch, this collapses down to one or
2644 two shift and bitmask operations on an ASCII
2645 system, with an outlier or two. */
2646 && (('Z' - 'A' == 25
2647 ? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
2648 : ISIDST (c)(_sch_istable[(c) & 0xff] & (unsigned short)(_sch_isidst
))
)
2649 || (c >= '0' && c <= '9')
2650 || c == '_' || c == '{' || c == '}'
2651 || c == '[' || c == ']' || c == '#'
2652 || c == '<' || c == '>' || c == '%'
2653 || c == ':' || c == ';' || c == '.' || c == '?'
2654 || c == '*' || c == '+' || c == '-' || c == '/'
2655 || c == '^' || c == '&' || c == '|' || c == '~'
2656 || c == '!' || c == '=' || c == ','
2657 || c == '"' || c == '\''))
2658 prefix[prefix_len++] = c;
2659 else
2660 {
2661 /* Something is wrong. */
2662 int col = CPP_BUF_COLUMN (pfile->buffer, pos)((pos) - (pfile->buffer)->line_base) + read_note;
2663 if (prefix_len == 16)
2664 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2665 col, "raw string delimiter longer "
2666 "than 16 characters");
2667 else if (c == '\n')
2668 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2669 col, "invalid new-line in raw "
2670 "string delimiter");
2671 else
2672 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2673 col, "invalid character '%c' in "
2674 "raw string delimiter", c);
2675 type = CPP_OTHER;
2676 phase = PHASE_NONE;
2677 /* Continue until we get a close quote, that's probably
2678 the best failure mode. */
2679 prefix_len = 0;
2680 }
2681 if (c != '\n')
2682 continue;
2683 }
2684
2685 if (phase != PHASE_NONE)
2686 {
2687 if (prefix[phase] != c)
2688 phase = PHASE_NONE;
2689 else if (unsigned (phase + 1) == prefix_len)
2690 break;
2691 else
2692 {
2693 phase = Phase (phase + 1);
2694 continue;
2695 }
2696 }
2697
2698 if (!prefix_len && c == '"')
2699 /* Failure mode lexing. */
2700 goto out;
2701 else if (prefix_len && c == ')')
2702 phase = PHASE_SUFFIX;
2703 else if (!read_note && c == '\n')
2704 {
2705 pos--;
2706 pfile->buffer->cur = pos;
2707 if ((pfile->state.in_directive || pfile->state.parsing_args)
2708 && pfile->buffer->next_line >= pfile->buffer->rlimit)
2709 {
2710 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
2711 "unterminated raw string");
2712 type = CPP_OTHER;
2713 goto out;
2714 }
2715
2716 accum.append (pfile, base, pos - base + 1);
2717 _cpp_process_line_notes (pfile, false);
2718
2719 if (pfile->buffer->next_line < pfile->buffer->rlimit)
2720 CPP_INCREMENT_LINE (pfile, 0)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, 0); } while (0)
;
2721 pfile->buffer->need_line = true;
2722
2723 if (!get_fresh_line_impl<true> (pfile))
2724 {
2725 /* We ran out of file and failed to get a line. */
2726 location_t src_loc = token->src_loc;
2727 token->type = CPP_EOF;
2728 /* Tell the compiler the line number of the EOF token. */
2729 token->src_loc = pfile->line_table->highest_line;
2730 token->flags = BOL(1 << 12);
2731 if (accum.first)
2732 _cpp_release_buff (pfile, accum.first);
2733 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
2734 "unterminated raw string");
2735
2736 /* Now pop the buffer that get_fresh_line_impl() did not. Popping
2737 is not safe if processing a directive, however this cannot
2738 happen as we already checked above that a line would be
2739 available, and get_fresh_line_impl() can't fail in this
2740 case. */
2741 gcc_assert (!pfile->state.in_directive)((void)(!(!pfile->state.in_directive) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 2741, __FUNCTION__), 0 : 0))
;
2742 _cpp_pop_buffer (pfile);
2743
2744 return;
2745 }
2746
2747 pos = base = pfile->buffer->cur;
2748 note = &pfile->buffer->notes[pfile->buffer->cur_note];
2749 }
2750 else if (__builtin_expect ((unsigned char) c >= utf8_continuation, 0)
2751 && warn_bidi_or_invalid_utf8_p)
2752 pos = _cpp_handle_multibyte_utf8 (pfile, c, pos, warn_bidi_p,
2753 warn_invalid_utf8_p);
2754 }
2755
2756 if (warn_bidi_p)
2757 maybe_warn_bidi_on_close (pfile, pos);
2758
2759 if (CPP_OPTION (pfile, user_literals)((pfile)->opts.user_literals))
2760 {
2761 /* If a string format macro, say from inttypes.h, is placed touching
2762 a string literal it could be parsed as a C++11 user-defined string
2763 literal thus breaking the program. */
2764 if (is_macro_not_literal_suffix (pfile, pos))
2765 {
2766 /* Raise a warning, but do not consume subsequent tokens. */
2767 if (CPP_OPTION (pfile, warn_literal_suffix)((pfile)->opts.warn_literal_suffix) && !pfile->state.skipping)
2768 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
2769 token->src_loc, 0,
2770 "invalid suffix on literal; C++11 requires "
2771 "a space between literal and string macro");
2772 }
2773 /* Grab user defined literal suffix. */
2774 else if (ISIDST (*pos)(_sch_istable[(*pos) & 0xff] & (unsigned short)(_sch_isidst
))
)
2775 {
2776 type = cpp_userdef_string_add_type (type);
2777 ++pos;
2778
2779 while (ISIDNUM (*pos)(_sch_istable[(*pos) & 0xff] & (unsigned short)(_sch_isidnum
))
)
2780 ++pos;
2781 }
2782 }
2783
2784 out:
2785 pfile->buffer->cur = pos;
2786 if (!accum.accum)
2787 create_literal (pfile, token, base, pos - base, type);
2788 else
2789 {
2790 size_t extra_len = pos - base;
2791 uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1);
2792
2793 token->type = type;
2794 token->val.str.len = accum.accum + extra_len;
2795 token->val.str.text = dest;
2796 for (_cpp_buff *buf = accum.first; buf; buf = buf->next)
2797 {
2798 size_t len = BUFF_FRONT (buf)((buf)->cur) - buf->base;
2799 memcpy (dest, buf->base, len);
2800 dest += len;
2801 }
2802 _cpp_release_buff (pfile, accum.first);
2803 memcpy (dest, base, extra_len);
2804 dest[extra_len] = '\0';
2805 }
2806}
2807
2808/* Lexes a string, character constant, or angle-bracketed header file
2809 name. The stored string contains the spelling, including opening
2810 quote and any leading 'L', 'u', 'U' or 'u8' and optional
2811 'R' modifier. It returns the type of the literal, or CPP_OTHER
2812 if it was not properly terminated, or CPP_LESS for an unterminated
2813 header name which must be relexed as normal tokens.
2814
2815 The spelling is NUL-terminated, but it is not guaranteed that this
2816 is the first NUL since embedded NULs are preserved. */
2817static void
2818lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
2819{
2820 bool saw_NUL = false;
2821 const uchar *cur;
2822 cppchar_t terminator;
2823 enum cpp_ttype type;
2824
2825 cur = base;
2826 terminator = *cur++;
2827 if (terminator == 'L' || terminator == 'U')
2828 terminator = *cur++;
2829 else if (terminator == 'u')
2830 {
2831 terminator = *cur++;
2832 if (terminator == '8')
2833 terminator = *cur++;
2834 }
2835 if (terminator == 'R')
2836 {
2837 lex_raw_string (pfile, token, base);
2838 return;
2839 }
2840 if (terminator == '"')
2841 type = (*base == 'L' ? CPP_WSTRING :
2842 *base == 'U' ? CPP_STRING32 :
2843 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
2844 : CPP_STRING);
2845 else if (terminator == '\'')
2846 type = (*base == 'L' ? CPP_WCHAR :
2847 *base == 'U' ? CPP_CHAR32 :
2848 *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
2849 : CPP_CHAR);
2850 else
2851 terminator = '>', type = CPP_HEADER_NAME;
2852
2853 const bool warn_bidi_p = pfile->warn_bidi_p ();
2854 const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8)((pfile)->opts.cpp_warn_invalid_utf8);
2855 const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
2856 for (;;)
2857 {
2858 cppchar_t c = *cur++;
2859
2860 /* In #include-style directives, terminators are not escapable. */
2861 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
2862 {
2863 if ((cur[0] == 'u' || cur[0] == 'U' || cur[0] == 'N') && warn_bidi_p)
2864 {
2865 location_t loc;
2866 bidi::kind kind;
2867 if (cur[0] == 'N')
2868 kind = get_bidi_named (pfile, cur + 1, &loc);
2869 else
2870 kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U', &loc);
2871 maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
2872 }
2873 cur++;
2874 }
2875 else if (c == terminator)
2876 {
2877 if (warn_bidi_p)
2878 maybe_warn_bidi_on_close (pfile, cur - 1);
2879 break;
2880 }
2881 else if (c == '\n')
2882 {
2883 cur--;
2884 /* Unmatched quotes always yield undefined behavior, but
2885 greedy lexing means that what appears to be an unterminated
2886 header name may actually be a legitimate sequence of tokens. */
2887 if (terminator == '>')
2888 {
2889 token->type = CPP_LESS;
2890 return;
2891 }
2892 type = CPP_OTHER;
2893 break;
2894 }
2895 else if (c == '\0')
2896 saw_NUL = true;
2897 else if (__builtin_expect (c >= utf8_continuation, 0)
2898 && warn_bidi_or_invalid_utf8_p)
2899 cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
2900 warn_invalid_utf8_p);
2901 }
2902
2903 if (saw_NUL && !pfile->state.skipping)
2904 cpp_error (pfile, CPP_DL_WARNING,
2905 "null character(s) preserved in literal");
2906
2907 if (type == CPP_OTHER && CPP_OPTION (pfile, lang)((pfile)->opts.lang) != CLK_ASM)
2908 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
2909 (int) terminator);
2910
2911 if (CPP_OPTION (pfile, user_literals)((pfile)->opts.user_literals))
2912 {
2913 /* If a string format macro, say from inttypes.h, is placed touching
2914 a string literal it could be parsed as a C++11 user-defined string
2915 literal thus breaking the program. */
2916 if (is_macro_not_literal_suffix (pfile, cur))
2917 {
2918 /* Raise a warning, but do not consume subsequent tokens. */
2919 if (CPP_OPTION (pfile, warn_literal_suffix)((pfile)->opts.warn_literal_suffix) && !pfile->state.skipping)
2920 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
2921 token->src_loc, 0,
2922 "invalid suffix on literal; C++11 requires "
2923 "a space between literal and string macro");
2924 }
2925 /* Grab user defined literal suffix. */
2926 else if (ISIDST (*cur)(_sch_istable[(*cur) & 0xff] & (unsigned short)(_sch_isidst
))
)
2927 {
2928 type = cpp_userdef_char_add_type (type);
2929 type = cpp_userdef_string_add_type (type);
2930 ++cur;
2931
2932 while (ISIDNUM (*cur)(_sch_istable[(*cur) & 0xff] & (unsigned short)(_sch_isidnum
))
)
2933 ++cur;
2934 }
2935 }
2936 else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)((pfile)->opts.cpp_warn_cxx11_compat)
2937 && is_macro (pfile, cur)
2938 && !pfile->state.skipping)
2939 cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
2940 token->src_loc, 0, "C++11 requires a space "
2941 "between string literal and macro");
2942
2943 pfile->buffer->cur = cur;
2944 create_literal (pfile, token, base, cur - base, type);
2945}
2946
2947/* Return the comment table. The client may not make any assumption
2948 about the ordering of the table. */
2949cpp_comment_table *
2950cpp_get_comments (cpp_reader *pfile)
2951{
2952 return &pfile->comments;
2953}
2954
2955/* Append a comment to the end of the comment table. */
2956static void
2957store_comment (cpp_reader *pfile, cpp_token *token)
2958{
2959 int len;
2960
2961 if (pfile->comments.allocated == 0)
2962 {
2963 pfile->comments.allocated = 256;
2964 pfile->comments.entries = (cpp_comment *) xmalloc
2965 (pfile->comments.allocated * sizeof (cpp_comment));
2966 }
2967
2968 if (pfile->comments.count == pfile->comments.allocated)
2969 {
2970 pfile->comments.allocated *= 2;
2971 pfile->comments.entries = (cpp_comment *) xrealloc
2972 (pfile->comments.entries,
2973 pfile->comments.allocated * sizeof (cpp_comment));
2974 }
2975
2976 len = token->val.str.len;
2977
2978 /* Copy comment. Note, token may not be NULL terminated. */
2979 pfile->comments.entries[pfile->comments.count].comment =
2980 (char *) xmalloc (sizeof (char) * (len + 1));
2981 memcpy (pfile->comments.entries[pfile->comments.count].comment,
2982 token->val.str.text, len);
2983 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
2984
2985 /* Set source location. */
2986 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
2987
2988 /* Increment the count of entries in the comment table. */
2989 pfile->comments.count++;
2990}
2991
2992/* The stored comment includes the comment start and any terminator. */
2993static void
2994save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
2995 cppchar_t type)
2996{
2997 unsigned char *buffer;
2998 unsigned int len, clen, i;
2999
3000 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3001
3002 /* C++ comments probably (not definitely) have moved past a new
3003 line, which we don't want to save in the comment. */
3004 if (is_vspace (pfile->buffer->cur[-1])(_sch_istable[(pfile->buffer->cur[-1]) & 0xff] &
(unsigned short)(_sch_isvsp))
)
3005 len--;
3006
3007 /* If we are currently in a directive or in argument parsing, then
3008 we need to store all C++ comments as C comments internally, and
3009 so we need to allocate a little extra space in that case.
3010
3011 Note that the only time we encounter a directive here is
3012 when we are saving comments in a "#define". */
3013 clen = ((pfile->state.in_directive || pfile->state.parsing_args)
3014 && type == '/') ? len + 2 : len;
3015
3016 buffer = _cpp_unaligned_alloc (pfile, clen);
3017
3018 token->type = CPP_COMMENT;
3019 token->val.str.len = clen;
3020 token->val.str.text = buffer;
3021
3022 buffer[0] = '/';
3023 memcpy (buffer + 1, from, len - 1);
3024
3025 /* Finish conversion to a C comment, if necessary. */
3026 if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
3027 {
3028 buffer[1] = '*';
3029 buffer[clen - 2] = '*';
3030 buffer[clen - 1] = '/';
3031 /* As there can be in a C++ comments illegal sequences for C comments
3032 we need to filter them out. */
3033 for (i = 2; i < (clen - 2); i++)
3034 if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
3035 buffer[i] = '|';
3036 }
3037
3038 /* Finally store this comment for use by clients of libcpp. */
3039 store_comment (pfile, token);
3040}
3041
3042/* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
3043 comment. */
3044
3045static bool
3046fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
3047{
3048 const unsigned char *from = comment_start + 1;
3049
3050 switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough)((pfile)->opts.cpp_warn_implicit_fallthrough))
3051 {
3052 /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
3053 don't recognize any comments. The latter only checks attributes,
3054 the former doesn't warn. */
3055 case 0:
3056 default:
3057 return false;
3058 /* -Wimplicit-fallthrough=1 considers any comment, no matter what
3059 content it has. */
3060 case 1:
3061 return true;
3062 case 2:
3063 /* -Wimplicit-fallthrough=2 looks for (case insensitive)
3064 .*falls?[ \t-]*thr(u|ough).* regex. */
3065 for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
3066 from++)
3067 {
3068 /* Is there anything like strpbrk with upper boundary, or
3069 memchr looking for 2 characters rather than just one? */
3070 if (from[0] != 'f' && from[0] != 'F')
3071 continue;
3072 if (from[1] != 'a' && from[1] != 'A')
3073 continue;
3074 if (from[2] != 'l' && from[2] != 'L')
3075 continue;
3076 if (from[3] != 'l' && from[3] != 'L')
3077 continue;
3078 from += sizeof "fall" - 1;
3079 if (from[0] == 's' || from[0] == 'S')
3080 from++;
3081 while (*from == ' ' || *from == '\t' || *from == '-')
3082 from++;
3083 if (from[0] != 't' && from[0] != 'T')
3084 continue;
3085 if (from[1] != 'h' && from[1] != 'H')
3086 continue;
3087 if (from[2] != 'r' && from[2] != 'R')
3088 continue;
3089 if (from[3] == 'u' || from[3] == 'U')
3090 return true;
3091 if (from[3] != 'o' && from[3] != 'O')
3092 continue;
3093 if (from[4] != 'u' && from[4] != 'U')
3094 continue;
3095 if (from[5] != 'g' && from[5] != 'G')
3096 continue;
3097 if (from[6] != 'h' && from[6] != 'H')
3098 continue;
3099 return true;
3100 }
3101 return false;
3102 case 3:
3103 case 4:
3104 break;
3105 }
3106
3107 /* Whole comment contents:
3108 -fallthrough
3109 @fallthrough@
3110 */
3111 if (*from == '-' || *from == '@')
3112 {
3113 size_t len = sizeof "fallthrough" - 1;
3114 if ((size_t) (pfile->buffer->cur - from - 1) < len)
3115 return false;
3116 if (memcmp (from + 1, "fallthrough", len))
3117 return false;
3118 if (*from == '@')
3119 {
3120 if (from[len + 1] != '@')
3121 return false;
3122 len++;
3123 }
3124 from += 1 + len;
3125 }
3126 /* Whole comment contents (regex):
3127 lint -fallthrough[ \t]*
3128 */
3129 else if (*from == 'l')
3130 {
3131 size_t len = sizeof "int -fallthrough" - 1;
3132 if ((size_t) (pfile->buffer->cur - from - 1) < len)
3133 return false;
3134 if (memcmp (from + 1, "int -fallthrough", len))
3135 return false;
3136 from += 1 + len;
3137 while (*from == ' ' || *from == '\t')
3138 from++;
3139 }
3140 /* Whole comment contents (regex):
3141 [ \t]*FALLTHR(U|OUGH)[ \t]*
3142 */
3143 else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough)((pfile)->opts.cpp_warn_implicit_fallthrough) == 4)
3144 {
3145 while (*from == ' ' || *from == '\t')
3146 from++;
3147 if ((size_t) (pfile->buffer->cur - from) < sizeof "FALLTHRU" - 1)
3148 return false;
3149 if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
3150 return false;
3151 from += sizeof "FALLTHR" - 1;
3152 if (*from == 'U')
3153 from++;
3154 else if ((size_t) (pfile->buffer->cur - from) < sizeof "OUGH" - 1)
3155 return false;
3156 else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
3157 return false;
3158 else
3159 from += sizeof "OUGH" - 1;
3160 while (*from == ' ' || *from == '\t')
3161 from++;
3162 }
3163 /* Whole comment contents (regex):
3164 [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
3165 [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
3166 [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
3167 */
3168 else
3169 {
3170 while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
3171 from++;
3172 unsigned char f = *from;
3173 bool all_upper = false;
3174 if (f == 'E' || f == 'e')
3175 {
3176 if ((size_t) (pfile->buffer->cur - from)
3177 < sizeof "else fallthru" - 1)
3178 return false;
3179 if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
3180 all_upper = true;
3181 else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
3182 return false;
3183 from += sizeof "else" - 1;
3184 if (*from == ',')
3185 from++;
3186 if (*from != ' ')
3187 return false;
3188 from++;
3189 if (all_upper && *from == 'f')
3190 return false;
3191 if (f == 'e' && *from == 'F')
3192 return false;
3193 f = *from;
3194 }
3195 else if (f == 'I' || f == 'i')
3196 {
3197 if ((size_t) (pfile->buffer->cur - from)
3198 < sizeof "intentional fallthru" - 1)
3199 return false;
3200 if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
3201 sizeof "NTENTIONAL" - 1) == 0)
3202 all_upper = true;
3203 else if (memcmp (from + 1, "ntentional",
3204 sizeof "ntentional" - 1))
3205 return false;
3206 from += sizeof "intentional" - 1;
3207 if (*from == ' ')
3208 {
3209 from++;
3210 if (all_upper && *from == 'f')
3211 return false;
3212 }
3213 else if (all_upper)
3214 {
3215 if (memcmp (from, "LY F", sizeof "LY F" - 1))
3216 return false;
3217 from += sizeof "LY " - 1;
3218 }
3219 else
3220 {
3221 if (memcmp (from, "ly ", sizeof "ly " - 1))
3222 return false;
3223 from += sizeof "ly " - 1;
3224 }
3225 if (f == 'i' && *from == 'F')
3226 return false;
3227 f = *from;
3228 }
3229 if (f != 'F' && f != 'f')
3230 return false;
3231 if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
3232 return false;
3233 if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
3234 all_upper = true;
3235 else if (all_upper)
3236 return false;
3237 else if (memcmp (from + 1, "all", sizeof "all" - 1))
3238 return false;
3239 from += sizeof "fall" - 1;
3240 if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
3241 from += 2;
3242 else if (*from == ' ' || *from == '-')
3243 from++;
3244 else if (*from != (all_upper ? 'T' : 't'))
3245 return false;
3246 if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
3247 return false;
3248 if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
3249 return false;
3250 if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
3251 {
3252 if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
3253 return false;
3254 if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
3255 sizeof "hrough" - 1))
3256 return false;
3257 from += sizeof "through" - 1;
3258 }
3259 else
3260 from += sizeof "thru" - 1;
3261 while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
3262 from++;
3263 if (*from == '-')
3264 {
3265 from++;
3266 if (*comment_start == '*')
3267 {
3268 do
3269 {
3270 while (*from && *from != '*'
3271 && *from != '\n' && *from != '\r')
3272 from++;
3273 if (*from != '*' || from[1] == '/')
3274 break;
3275 from++;
3276 }
3277 while (1);
3278 }
3279 else
3280 while (*from && *from != '\n' && *from != '\r')
3281 from++;
3282 }
3283 }
3284 /* C block comment. */
3285 if (*comment_start == '*')
3286 {
3287 if (*from != '*' || from[1] != '/')
3288 return false;
3289 }
3290 /* C++ line comment. */
3291 else if (*from != '\n')
3292 return false;
3293
3294 return true;
3295}
3296
3297/* Allocate COUNT tokens for RUN. */
3298void
3299_cpp_init_tokenrun (tokenrun *run, unsigned int count)
3300{
3301 run->base = XNEWVEC (cpp_token, count)((cpp_token *) xmalloc (sizeof (cpp_token) * (count)));
3302 run->limit = run->base + count;
3303 run->next = NULL__null;
3304}
3305
3306/* Returns the next tokenrun, or creates one if there is none. */
3307static tokenrun *
3308next_tokenrun (tokenrun *run)
3309{
3310 if (run->next == NULL__null)
3311 {
3312 run->next = XNEW (tokenrun)((tokenrun *) xmalloc (sizeof (tokenrun)));
3313 run->next->prev = run;
3314 _cpp_init_tokenrun (run->next, 250);
3315 }
3316
3317 return run->next;
3318}
3319
3320/* Return the number of not yet processed token in a given
3321 context. */
3322int
3323_cpp_remaining_tokens_num_in_context (cpp_context *context)
3324{
3325 if (context->tokens_kind == TOKENS_KIND_DIRECT)
3326 return (LAST (context)((context)->u.iso.last).token - FIRST (context)((context)->u.iso.first).token);
3327 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
3328 || context->tokens_kind == TOKENS_KIND_EXTENDED)
3329 return (LAST (context)((context)->u.iso.last).ptoken - FIRST (context)((context)->u.iso.first).ptoken);
3330 else
3331 abort ()fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3331, __FUNCTION__)
;
3332}
3333
3334/* Returns the token present at index INDEX in a given context. If
3335 INDEX is zero, the next token to be processed is returned. */
3336static const cpp_token*
3337_cpp_token_from_context_at (cpp_context *context, int index)
3338{
3339 if (context->tokens_kind == TOKENS_KIND_DIRECT)
3340 return &(FIRST (context)((context)->u.iso.first).token[index]);
3341 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
3342 || context->tokens_kind == TOKENS_KIND_EXTENDED)
3343 return FIRST (context)((context)->u.iso.first).ptoken[index];
3344 else
3345 abort ()fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3345, __FUNCTION__)
;
3346}
3347
3348/* Look ahead in the input stream. */
3349const cpp_token *
3350cpp_peek_token (cpp_reader *pfile, int index)
3351{
3352 cpp_context *context = pfile->context;
3353 const cpp_token *peektok;
3354 int count;
3355
3356 /* First, scan through any pending cpp_context objects. */
3357 while (context->prev)
3358 {
3359 ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
3360
3361 if (index < (int) sz)
3362 return _cpp_token_from_context_at (context, index);
3363 index -= (int) sz;
3364 context = context->prev;
3365 }
3366
3367 /* We will have to read some new tokens after all (and do so
3368 without invalidating preceding tokens). */
3369 count = index;
3370 pfile->keep_tokens++;
3371
3372 /* For peeked tokens temporarily disable line_change reporting,
3373 until the tokens are parsed for real. */
3374 void (*line_change) (cpp_reader *, const cpp_token *, int)
3375 = pfile->cb.line_change;
3376 pfile->cb.line_change = NULL__null;
3377
3378 do
3379 {
3380 peektok = _cpp_lex_token (pfile);
3381 if (peektok->type == CPP_EOF)
3382 {
3383 index--;
3384 break;
3385 }
3386 else if (peektok->type == CPP_PRAGMA)
3387 {
3388 /* Don't peek past a pragma. */
3389 if (peektok == &pfile->directive_result)
3390 /* Save the pragma in the buffer. */
3391 *pfile->cur_token++ = *peektok;
3392 index--;
3393 break;
3394 }
3395 }
3396 while (index--);
3397
3398 _cpp_backup_tokens_direct (pfile, count - index);
3399 pfile->keep_tokens--;
3400 pfile->cb.line_change = line_change;
3401
3402 return peektok;
3403}
3404
3405/* Allocate a single token that is invalidated at the same time as the
3406 rest of the tokens on the line. Has its line and col set to the
3407 same as the last lexed token, so that diagnostics appear in the
3408 right place. */
3409cpp_token *
3410_cpp_temp_token (cpp_reader *pfile)
3411{
3412 cpp_token *old, *result;
3413 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
3414 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
3415
3416 old = pfile->cur_token - 1;
3417 /* Any pre-existing lookaheads must not be clobbered. */
3418 if (la)
3419 {
3420 if (sz <= la)
3421 {
3422 tokenrun *next = next_tokenrun (pfile->cur_run);
3423
3424 if (sz < la)
3425 memmove (next->base + 1, next->base,
3426 (la - sz) * sizeof (cpp_token));
3427
3428 next->base[0] = pfile->cur_run->limit[-1];
3429 }
3430
3431 if (sz > 1)
3432 memmove (pfile->cur_token + 1, pfile->cur_token,
3433 MIN (la, sz - 1)((la) < (sz - 1) ? (la) : (sz - 1)) * sizeof (cpp_token));
3434 }
3435
3436 if (!sz && pfile->cur_token == pfile->cur_run->limit)
3437 {
3438 pfile->cur_run = next_tokenrun (pfile->cur_run);
3439 pfile->cur_token = pfile->cur_run->base;
3440 }
3441
3442 result = pfile->cur_token++;
3443 result->src_loc = old->src_loc;
3444 return result;
3445}
3446
3447/* We're at the beginning of a logical line (so not in
3448 directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set. See
3449 if we should enter deferred_pragma mode to tokenize the rest of the
3450 line as a module control-line. */
3451
3452static void
3453cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
3454{
3455 unsigned backup = 0; /* Tokens we peeked. */
3456 cpp_hashnode *node = result->val.node.node;
3457 cpp_token *peek = result;
3458 cpp_token *keyword = peek;
3459 cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
3460 int header_count = 0;
3461
3462 /* Make sure the incoming state is as we expect it. This way we
3463 can restore it using constants. */
3464 gcc_checking_assert (!pfile->state.in_deferred_pragma((void)(!(!pfile->state.in_deferred_pragma && !pfile
->state.skipping && !pfile->state.parsing_args &&
!pfile->state.angled_headers && (pfile->state.
save_comments == !((pfile)->opts.discard_comments))) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3469, __FUNCTION__), 0 : 0))
3465 && !pfile->state.skipping((void)(!(!pfile->state.in_deferred_pragma && !pfile
->state.skipping && !pfile->state.parsing_args &&
!pfile->state.angled_headers && (pfile->state.
save_comments == !((pfile)->opts.discard_comments))) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3469, __FUNCTION__), 0 : 0))
3466 && !pfile->state.parsing_args((void)(!(!pfile->state.in_deferred_pragma && !pfile
->state.skipping && !pfile->state.parsing_args &&
!pfile->state.angled_headers && (pfile->state.
save_comments == !((pfile)->opts.discard_comments))) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3469, __FUNCTION__), 0 : 0))
3467 && !pfile->state.angled_headers((void)(!(!pfile->state.in_deferred_pragma && !pfile
->state.skipping && !pfile->state.parsing_args &&
!pfile->state.angled_headers && (pfile->state.
save_comments == !((pfile)->opts.discard_comments))) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3469, __FUNCTION__), 0 : 0))
3468 && (pfile->state.save_comments((void)(!(!pfile->state.in_deferred_pragma && !pfile
->state.skipping && !pfile->state.parsing_args &&
!pfile->state.angled_headers && (pfile->state.
save_comments == !((pfile)->opts.discard_comments))) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3469, __FUNCTION__), 0 : 0))
3469 == !CPP_OPTION (pfile, discard_comments)))((void)(!(!pfile->state.in_deferred_pragma && !pfile
->state.skipping && !pfile->state.parsing_args &&
!pfile->state.angled_headers && (pfile->state.
save_comments == !((pfile)->opts.discard_comments))) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3469, __FUNCTION__), 0 : 0))
;
3470
3471 /* Enter directives mode sufficiently for peeking. We don't have
3472 to actually set in_directive. */
3473 pfile->state.in_deferred_pragma = true;
3474
3475 /* These two fields are needed to process tokenization in deferred
3476 pragma mode. They are not used outside deferred pragma mode or
3477 directives mode. */
3478 pfile->state.pragma_allow_expansion = true;
3479 pfile->directive_line = result->src_loc;
3480
3481 /* Saving comments is incompatible with directives mode. */
3482 pfile->state.save_comments = 0;
3483
3484 if (node == n_modules[spec_nodes::M_EXPORT][0])
3485 {
3486 peek = _cpp_lex_direct (pfile);
3487 keyword = peek;
3488 backup++;
3489 if (keyword->type != CPP_NAME)
3490 goto not_module;
3491 node = keyword->val.node.node;
3492 if (!(node->flags & NODE_MODULE(1 << 8)))
3493 goto not_module;
3494 }
3495
3496 if (node == n_modules[spec_nodes::M__IMPORT][0])
3497 /* __import */
3498 header_count = backup + 2 + 16;
3499 else if (node == n_modules[spec_nodes::M_IMPORT][0])
3500 /* import */
3501 header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed)((pfile)->opts.preprocessed) ? 16 : 0);
3502 else if (node == n_modules[spec_nodes::M_MODULE][0])
3503 ; /* module */
3504 else
3505 goto not_module;
3506
3507 /* We've seen [export] {module|import|__import}. Check the next token. */
3508 if (header_count)
3509 /* After '{,__}import' a header name may appear. */
3510 pfile->state.angled_headers = true;
3511 peek = _cpp_lex_direct (pfile);
3512 backup++;
3513
3514 /* ... import followed by identifier, ':', '<' or
3515 header-name preprocessing tokens, or module
3516 followed by cpp-identifier, ':' or ';' preprocessing
3517 tokens. C++ keywords are not yet relevant. */
3518 if (peek->type == CPP_NAME
3519 || peek->type == CPP_COLON
3520 || (header_count
3521 ? (peek->type == CPP_LESS
3522 || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
3523 || peek->type == CPP_HEADER_NAME)
3524 : peek->type == CPP_SEMICOLON))
3525 {
3526 pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed)((pfile)->opts.preprocessed);
3527 if (!pfile->state.pragma_allow_expansion)
3528 pfile->state.prevent_expansion++;
3529
3530 if (!header_count && linemap_included_from
3531 (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
3532 cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
3533 "module control-line cannot be in included file");
3534
3535 /* The first one or two tokens cannot be macro names. */
3536 for (int ix = backup; ix--;)
3537 {
3538 cpp_token *tok = ix ? keyword : result;
3539 cpp_hashnode *node = tok->val.node.node;
3540
3541 /* Don't attempt to expand the token. */
3542 tok->flags |= NO_EXPAND(1 << 10);
3543 if (_cpp_defined_macro_p (node)
3544 && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc)
3545 && !cpp_fun_like_macro_p (node))
3546 cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0,
3547 "module control-line \"%s\" cannot be"
3548 " an object-like macro",
3549 NODE_NAME (node)(((&(node)->ident))->str));
3550 }
3551
3552 /* Map to underbar variants. */
3553 keyword->val.node.node = n_modules[header_count
3554 ? spec_nodes::M_IMPORT
3555 : spec_nodes::M_MODULE][1];
3556 if (backup != 1)
3557 result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
3558
3559 /* Maybe tell the tokenizer we expect a header-name down the
3560 road. */
3561 pfile->state.directive_file_token = header_count;
3562 }
3563 else
3564 {
3565 not_module:
3566 /* Drop out of directive mode. */
3567 /* We aaserted save_comments had this value upon entry. */
3568 pfile->state.save_comments
3569 = !CPP_OPTION (pfile, discard_comments)((pfile)->opts.discard_comments);
3570 pfile->state.in_deferred_pragma = false;
3571 /* Do not let this remain on. */
3572 pfile->state.angled_headers = false;
3573 }
3574
3575 /* In either case we want to backup the peeked tokens. */
3576 if (backup)
3577 {
3578 /* If we saw EOL, we should drop it, because this isn't a module
3579 control-line after all. */
3580 bool eol = peek->type == CPP_PRAGMA_EOL;
3581 if (!eol || backup > 1)
3582 {
3583 /* Put put the peeked tokens back */
3584 _cpp_backup_tokens_direct (pfile, backup);
3585 /* But if the last one was an EOL, forget it. */
3586 if (eol)
3587 pfile->lookaheads--;
3588 }
3589 }
3590}
3591
3592/* Lex a token into RESULT (external interface). Takes care of issues
3593 like directive handling, token lookahead, multiple include
3594 optimization and skipping. */
3595const cpp_token *
3596_cpp_lex_token (cpp_reader *pfile)
3597{
3598 cpp_token *result;
3599
3600 for (;;)
3601 {
3602 if (pfile->cur_token == pfile->cur_run->limit)
3603 {
3604 pfile->cur_run = next_tokenrun (pfile->cur_run);
3605 pfile->cur_token = pfile->cur_run->base;
3606 }
3607 /* We assume that the current token is somewhere in the current
3608 run. */
3609 if (pfile->cur_token < pfile->cur_run->base
3610 || pfile->cur_token >= pfile->cur_run->limit)
3611 abort ()fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3611, __FUNCTION__)
;
3612
3613 if (pfile->lookaheads)
3614 {
3615 pfile->lookaheads--;
3616 result = pfile->cur_token++;
3617 }
3618 else
3619 result = _cpp_lex_direct (pfile);
3620
3621 if (result->flags & BOL(1 << 12))
3622 {
3623 /* Is this a directive. If _cpp_handle_directive returns
3624 false, it is an assembler #. */
3625 if (result->type == CPP_HASH
3626 /* 6.10.3 p 11: Directives in a list of macro arguments
3627 gives undefined behavior. This implementation
3628 handles the directive as normal. */
3629 && pfile->state.parsing_args != 1)
3630 {
3631 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE(1 << 0)))
3632 {
3633 if (pfile->directive_result.type == CPP_PADDING)
3634 continue;
3635 result = &pfile->directive_result;
3636 }
3637 }
3638 else if (pfile->state.in_deferred_pragma)
3639 result = &pfile->directive_result;
3640 else if (result->type == CPP_NAME
3641 && (result->val.node.node->flags & NODE_MODULE(1 << 8))
3642 && !pfile->state.skipping
3643 /* Unlike regular directives, we do not deal with
3644 tokenizing module directives as macro arguments.
3645 That's not permitted. */
3646 && !pfile->state.parsing_args)
3647 {
3648 /* P1857. Before macro expansion, At start of logical
3649 line ... */
3650 /* We don't have to consider lookaheads at this point. */
3651 gcc_checking_assert (!pfile->lookaheads)((void)(!(!pfile->lookaheads) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 3651, __FUNCTION__), 0 : 0))
;
3652
3653 cpp_maybe_module_directive (pfile, result);
3654 }
3655
3656 if (pfile->cb.line_change && !pfile->state.skipping)
3657 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
3658 }
3659
3660 /* We don't skip tokens in directives. */
3661 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
3662 break;
3663
3664 /* Outside a directive, invalidate controlling macros. At file
3665 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
3666 get here and MI optimization works. */
3667 pfile->mi_valid = false;
3668
3669 if (!pfile->state.skipping || result->type == CPP_EOF)
3670 break;
3671 }
3672
3673 return result;
3674}
3675
3676/* Returns true if a fresh line has been loaded. */
3677template <bool lexing_raw_string>
3678static bool
3679get_fresh_line_impl (cpp_reader *pfile)
3680{
3681 /* We can't get a new line until we leave the current directive, unless we
3682 are lexing a raw string, in which case it will be OK as long as we don't
3683 pop the current buffer. */
3684 if (!lexing_raw_string && pfile->state.in_directive)
3685 return false;
3686
3687 for (;;)
3688 {
3689 cpp_buffer *buffer = pfile->buffer;
3690
3691 if (!buffer->need_line)
3692 return true;
3693
3694 if (buffer->next_line < buffer->rlimit)
3695 {
3696 _cpp_clean_line (pfile);
3697 return true;
3698 }
3699
3700 /* We can't change buffers until we leave the current directive. */
3701 if (lexing_raw_string && pfile->state.in_directive)
3702 return false;
3703
3704 /* First, get out of parsing arguments state. */
3705 if (pfile->state.parsing_args)
3706 return false;
3707
3708 /* End of buffer. Non-empty files should end in a newline. */
3709 if (buffer->buf != buffer->rlimit
3710 && buffer->next_line > buffer->rlimit
3711 && !buffer->from_stage3)
3712 {
3713 /* Clip to buffer size. */
3714 buffer->next_line = buffer->rlimit;
3715 }
3716
3717 if (buffer->prev && !buffer->return_at_eof)
3718 _cpp_pop_buffer (pfile);
3719 else
3720 {
3721 /* End of translation. Do not pop the buffer yet. Increment
3722 line number so that the EOF token is on a line of its own
3723 (_cpp_lex_direct doesn't increment in that case, because
3724 it's hard for it to distinguish this special case). */
3725 CPP_INCREMENT_LINE (pfile, 0)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, 0); } while (0)
;
3726 return false;
3727 }
3728 }
3729}
3730
3731bool
3732_cpp_get_fresh_line (cpp_reader *pfile)
3733{
3734 return get_fresh_line_impl<false> (pfile);
3735}
3736
3737
3738#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)do { result->type = ELSE_TYPE; if (*buffer->cur == CHAR
) buffer->cur++, result->type = THEN_TYPE; } while (0)
\
3739 do \
3740 { \
3741 result->type = ELSE_TYPE; \
3742 if (*buffer->cur == CHAR) \
3743 buffer->cur++, result->type = THEN_TYPE; \
3744 } \
3745 while (0)
3746
3747/* Lex a token into pfile->cur_token, which is also incremented, to
3748 get diagnostics pointing to the correct location.
3749
3750 Does not handle issues such as token lookahead, multiple-include
3751 optimization, directives, skipping etc. This function is only
3752 suitable for use by _cpp_lex_token, and in special cases like
3753 lex_expansion_token which doesn't care for any of these issues.
3754
3755 When meeting a newline, returns CPP_EOF if parsing a directive,
3756 otherwise returns to the start of the token buffer if permissible.
3757 Returns the location of the lexed token. */
3758cpp_token *
3759_cpp_lex_direct (cpp_reader *pfile)
3760{
3761 cppchar_t c;
3762 cpp_buffer *buffer;
3763 const unsigned char *comment_start;
3764 bool fallthrough_comment = false;
3765 cpp_token *result = pfile->cur_token++;
3766
3767 fresh_line:
3768 result->flags = 0;
3769 buffer = pfile->buffer;
3770 if (buffer->need_line)
1
Assuming field 'need_line' is false
2
Taking false branch
3771 {
3772 if (pfile->state.in_deferred_pragma)
3773 {
3774 /* This can happen in cases like:
3775 #define loop(x) whatever
3776 #pragma omp loop
3777 where when trying to expand loop we need to peek
3778 next token after loop, but aren't still in_deferred_pragma
3779 mode but are in in_directive mode, so buffer->need_line
3780 is set, a CPP_EOF is peeked. */
3781 result->type = CPP_PRAGMA_EOL;
3782 pfile->state.in_deferred_pragma = false;
3783 if (!pfile->state.pragma_allow_expansion)
3784 pfile->state.prevent_expansion--;
3785 return result;
3786 }
3787 if (!_cpp_get_fresh_line (pfile))
3788 {
3789 result->type = CPP_EOF;
3790 /* Not a real EOF in a directive or arg parsing -- we refuse
3791 to advance to the next file now, and will once we're out
3792 of those modes. */
3793 if (!pfile->state.in_directive && !pfile->state.parsing_args)
3794 {
3795 /* Tell the compiler the line number of the EOF token. */
3796 result->src_loc = pfile->line_table->highest_line;
3797 result->flags = BOL(1 << 12);
3798 /* Now pop the buffer that _cpp_get_fresh_line did not. */
3799 _cpp_pop_buffer (pfile);
3800 }
3801 return result;
3802 }
3803 if (buffer != pfile->buffer)
3804 fallthrough_comment = false;
3805 if (!pfile->keep_tokens)
3806 {
3807 pfile->cur_run = &pfile->base_run;
3808 result = pfile->base_run.base;
3809 pfile->cur_token = result + 1;
3810 }
3811 result->flags = BOL(1 << 12);
3812 if (pfile->state.parsing_args == 2)
3813 result->flags |= PREV_WHITE(1 << 0);
3814 }
3815 buffer = pfile->buffer;
3816 update_tokens_line:
3817 result->src_loc = pfile->line_table->highest_line;
3818
3819 skipped_white:
3820 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
3
Assuming field 'cur' is < field 'pos'
3821 && !pfile->overlaid_buffer)
3822 {
3823 _cpp_process_line_notes (pfile, false);
3824 result->src_loc = pfile->line_table->highest_line;
3825 }
3826 c = *buffer->cur++;
3827
3828 if (pfile->forced_token_location)
4
Assuming field 'forced_token_location' is 0
5
Taking false branch
3829 result->src_loc = pfile->forced_token_location;
3830 else
3831 result->src_loc = linemap_position_for_column (pfile->line_table,
3832 CPP_BUF_COLUMN (buffer, buffer->cur)((buffer->cur) - (buffer)->line_base));
3833
3834 switch (c)
6
Control jumps to the 'default' case at line 4213
3835 {
3836 case ' ': case '\t': case '\f': case '\v': case '\0':
3837 result->flags |= PREV_WHITE(1 << 0);
3838 skip_whitespace (pfile, c);
3839 goto skipped_white;
3840
3841 case '\n':
3842 /* Increment the line, unless this is the last line ... */
3843 if (buffer->cur < buffer->rlimit
3844 /* ... or this is a #include, (where _cpp_stack_file needs to
3845 unwind by one line) ... */
3846 || (pfile->state.in_directive > 1
3847 /* ... except traditional-cpp increments this elsewhere. */
3848 && !CPP_OPTION (pfile, traditional)((pfile)->opts.traditional)))
3849 CPP_INCREMENT_LINE (pfile, 0)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, 0); } while (0)
;
3850 buffer->need_line = true;
3851 if (pfile->state.in_deferred_pragma)
3852 {
3853 /* Produce the PRAGMA_EOL on this line. File reading
3854 ensures there is always a \n at end of the buffer, thus
3855 in a deferred pragma we always see CPP_PRAGMA_EOL before
3856 any CPP_EOF. */
3857 result->type = CPP_PRAGMA_EOL;
3858 result->flags &= ~PREV_WHITE(1 << 0);
3859 pfile->state.in_deferred_pragma = false;
3860 if (!pfile->state.pragma_allow_expansion)
3861 pfile->state.prevent_expansion--;
3862 return result;
3863 }
3864 goto fresh_line;
3865
3866 case '0': case '1': case '2': case '3': case '4':
3867 case '5': case '6': case '7': case '8': case '9':
3868 {
3869 struct normalize_state nst = INITIAL_NORMALIZE_STATE{ 0, 0, normalized_KC };
3870 result->type = CPP_NUMBER;
3871 lex_number (pfile, &result->val.str, &nst);
3872 warn_about_normalization (pfile, result, &nst, false);
3873 break;
3874 }
3875
3876 case 'L':
3877 case 'u':
3878 case 'U':
3879 case 'R':
3880 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
3881 wide strings or raw strings. */
3882 if (c == 'L' || CPP_OPTION (pfile, rliterals)((pfile)->opts.rliterals)
3883 || (c != 'R' && CPP_OPTION (pfile, uliterals)((pfile)->opts.uliterals)))
3884 {
3885 if ((*buffer->cur == '\'' && c != 'R')
3886 || *buffer->cur == '"'
3887 || (*buffer->cur == 'R'
3888 && c != 'R'
3889 && buffer->cur[1] == '"'
3890 && CPP_OPTION (pfile, rliterals)((pfile)->opts.rliterals))
3891 || (*buffer->cur == '8'
3892 && c == 'u'
3893 && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
3894 && CPP_OPTION (pfile, utf8_char_literals)((pfile)->opts.utf8_char_literals)))
3895 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
3896 && CPP_OPTION (pfile, rliterals)((pfile)->opts.rliterals)))))
3897 {
3898 lex_string (pfile, result, buffer->cur - 1);
3899 break;
3900 }
3901 }
3902 /* Fall through. */
3903
3904 case '_':
3905 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
3906 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
3907 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
3908 case 's': case 't': case 'v': case 'w': case 'x':
3909 case 'y': case 'z':
3910 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
3911 case 'G': case 'H': case 'I': case 'J': case 'K':
3912 case 'M': case 'N': case 'O': case 'P': case 'Q':
3913 case 'S': case 'T': case 'V': case 'W': case 'X':
3914 case 'Y': case 'Z':
3915 result->type = CPP_NAME;
3916 {
3917 struct normalize_state nst = INITIAL_NORMALIZE_STATE{ 0, 0, normalized_KC };
3918 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
3919 &nst,
3920 &result->val.node.spelling);
3921 warn_about_normalization (pfile, result, &nst, true);
3922 }
3923
3924 /* Convert named operators to their proper types. */
3925 if (result->val.node.node->flags & NODE_OPERATOR(1 << 0))
3926 {
3927 result->flags |= NAMED_OP(1 << 4);
3928 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
3929 }
3930
3931 /* Signal FALLTHROUGH comment followed by another token. */
3932 if (fallthrough_comment)
3933 result->flags |= PREV_FALLTHROUGH(1 << 5);
3934 break;
3935
3936 case '\'':
3937 case '"':
3938 lex_string (pfile, result, buffer->cur - 1);
3939 break;
3940
3941 case '/':
3942 /* A potential block or line comment. */
3943 comment_start = buffer->cur;
3944 c = *buffer->cur;
3945
3946 if (c == '*')
3947 {
3948 if (_cpp_skip_block_comment (pfile))
3949 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
3950 }
3951 else if (c == '/' && ! CPP_OPTION (pfile, traditional)((pfile)->opts.traditional))
3952 {
3953 /* Don't warn for system headers. */
3954 if (_cpp_in_system_header (pfile))
3955 ;
3956 /* Warn about comments if pedantically GNUC89, and not
3957 in system headers. */
3958 else if (CPP_OPTION (pfile, lang)((pfile)->opts.lang) == CLK_GNUC89
3959 && CPP_PEDANTIC (pfile)((pfile)->opts.cpp_pedantic)
3960 && ! buffer->warned_cplusplus_comments)
3961 {
3962 if (cpp_error (pfile, CPP_DL_PEDWARN,
3963 "C++ style comments are not allowed in ISO C90"))
3964 cpp_error (pfile, CPP_DL_NOTE,
3965 "(this will be reported only once per input file)");
3966 buffer->warned_cplusplus_comments = 1;
3967 }
3968 /* Or if specifically desired via -Wc90-c99-compat. */
3969 else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat)((pfile)->opts.cpp_warn_c90_c99_compat) > 0
3970 && ! CPP_OPTION (pfile, cplusplus)((pfile)->opts.cplusplus)
3971 && ! buffer->warned_cplusplus_comments)
3972 {
3973 if (cpp_error (pfile, CPP_DL_WARNING,
3974 "C++ style comments are incompatible with C90"))
3975 cpp_error (pfile, CPP_DL_NOTE,
3976 "(this will be reported only once per input file)");
3977 buffer->warned_cplusplus_comments = 1;
3978 }
3979 /* In C89/C94, C++ style comments are forbidden. */
3980 else if ((CPP_OPTION (pfile, lang)((pfile)->opts.lang) == CLK_STDC89
3981 || CPP_OPTION (pfile, lang)((pfile)->opts.lang) == CLK_STDC94))
3982 {
3983 /* But don't be confused about valid code such as
3984 - // immediately followed by *,
3985 - // in a preprocessing directive,
3986 - // in an #if 0 block. */
3987 if (buffer->cur[1] == '*'
3988 || pfile->state.in_directive
3989 || pfile->state.skipping)
3990 {
3991 result->type = CPP_DIV;
3992 break;
3993 }
3994 else if (! buffer->warned_cplusplus_comments)
3995 {
3996 if (cpp_error (pfile, CPP_DL_ERROR,
3997 "C++ style comments are not allowed in "
3998 "ISO C90"))
3999 cpp_error (pfile, CPP_DL_NOTE,
4000 "(this will be reported only once per input "
4001 "file)");
4002 buffer->warned_cplusplus_comments = 1;
4003 }
4004 }
4005 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)((pfile)->opts.warn_comments))
4006 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
4007 }
4008 else if (c == '=')
4009 {
4010 buffer->cur++;
4011 result->type = CPP_DIV_EQ;
4012 break;
4013 }
4014 else
4015 {
4016 result->type = CPP_DIV;
4017 break;
4018 }
4019
4020 if (fallthrough_comment_p (pfile, comment_start))
4021 fallthrough_comment = true;
4022
4023 if (pfile->cb.comment)
4024 {
4025 size_t len = pfile->buffer->cur - comment_start;
4026 pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
4027 len + 1);
4028 }
4029
4030 if (!pfile->state.save_comments)
4031 {
4032 result->flags |= PREV_WHITE(1 << 0);
4033 goto update_tokens_line;
4034 }
4035
4036 if (fallthrough_comment)
4037 result->flags |= PREV_FALLTHROUGH(1 << 5);
4038
4039 /* Save the comment as a token in its own right. */
4040 save_comment (pfile, result, comment_start, c);
4041 break;
4042
4043 case '<':
4044 if (pfile->state.angled_headers)
4045 {
4046 lex_string (pfile, result, buffer->cur - 1);
4047 if (result->type != CPP_LESS)
4048 break;
4049 }
4050
4051 result->type = CPP_LESS;
4052 if (*buffer->cur == '=')
4053 {
4054 buffer->cur++, result->type = CPP_LESS_EQ;
4055 if (*buffer->cur == '>'
4056 && CPP_OPTION (pfile, cplusplus)((pfile)->opts.cplusplus)
4057 && CPP_OPTION (pfile, lang)((pfile)->opts.lang) >= CLK_GNUCXX20)
4058 buffer->cur++, result->type = CPP_SPACESHIP;
4059 }
4060 else if (*buffer->cur == '<')
4061 {
4062 buffer->cur++;
4063 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT)do { result->type = CPP_LSHIFT; if (*buffer->cur == '='
) buffer->cur++, result->type = CPP_LSHIFT_EQ; } while (
0)
;
4064 }
4065 else if (CPP_OPTION (pfile, digraphs)((pfile)->opts.digraphs))
4066 {
4067 if (*buffer->cur == ':')
4068 {
4069 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
4070 three characters are <:: and the subsequent character
4071 is neither : nor >, the < is treated as a preprocessor
4072 token by itself". */
4073 if (CPP_OPTION (pfile, cplusplus)((pfile)->opts.cplusplus)
4074 && CPP_OPTION (pfile, lang)((pfile)->opts.lang) != CLK_CXX98
4075 && CPP_OPTION (pfile, lang)((pfile)->opts.lang) != CLK_GNUCXX
4076 && buffer->cur[1] == ':'
4077 && buffer->cur[2] != ':' && buffer->cur[2] != '>')
4078 break;
4079
4080 buffer->cur++;
4081 result->flags |= DIGRAPH(1 << 1);
4082 result->type = CPP_OPEN_SQUARE;
4083 }
4084 else if (*buffer->cur == '%')
4085 {
4086 buffer->cur++;
4087 result->flags |= DIGRAPH(1 << 1);
4088 result->type = CPP_OPEN_BRACE;
4089 }
4090 }
4091 break;
4092
4093 case '>':
4094 result->type = CPP_GREATER;
4095 if (*buffer->cur == '=')
4096 buffer->cur++, result->type = CPP_GREATER_EQ;
4097 else if (*buffer->cur == '>')
4098 {
4099 buffer->cur++;
4100 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT)do { result->type = CPP_RSHIFT; if (*buffer->cur == '='
) buffer->cur++, result->type = CPP_RSHIFT_EQ; } while (
0)
;
4101 }
4102 break;
4103
4104 case '%':
4105 result->type = CPP_MOD;
4106 if (*buffer->cur == '=')
4107 buffer->cur++, result->type = CPP_MOD_EQ;
4108 else if (CPP_OPTION (pfile, digraphs)((pfile)->opts.digraphs))
4109 {
4110 if (*buffer->cur == ':')
4111 {
4112 buffer->cur++;
4113 result->flags |= DIGRAPH(1 << 1);
4114 result->type = CPP_HASH;
4115 if (*buffer->cur == '%' && buffer->cur[1] == ':')
4116 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
4117 }
4118 else if (*buffer->cur == '>')
4119 {
4120 buffer->cur++;
4121 result->flags |= DIGRAPH(1 << 1);
4122 result->type = CPP_CLOSE_BRACE;
4123 }
4124 }
4125 break;
4126
4127 case '.':
4128 result->type = CPP_DOT;
4129 if (ISDIGIT (*buffer->cur)(_sch_istable[(*buffer->cur) & 0xff] & (unsigned short
)(_sch_isdigit))
)
4130 {
4131 struct normalize_state nst = INITIAL_NORMALIZE_STATE{ 0, 0, normalized_KC };
4132 result->type = CPP_NUMBER;
4133 lex_number (pfile, &result->val.str, &nst);
4134 warn_about_normalization (pfile, result, &nst, false);
4135 }
4136 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
4137 buffer->cur += 2, result->type = CPP_ELLIPSIS;
4138 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)((pfile)->opts.cplusplus))
4139 buffer->cur++, result->type = CPP_DOT_STAR;
4140 break;
4141
4142 case '+':
4143 result->type = CPP_PLUS;
4144 if (*buffer->cur == '+')
4145 buffer->cur++, result->type = CPP_PLUS_PLUS;
4146 else if (*buffer->cur == '=')
4147 buffer->cur++, result->type = CPP_PLUS_EQ;
4148 break;
4149
4150 case '-':
4151 result->type = CPP_MINUS;
4152 if (*buffer->cur == '>')
4153 {
4154 buffer->cur++;
4155 result->type = CPP_DEREF;
4156 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)((pfile)->opts.cplusplus))
4157 buffer->cur++, result->type = CPP_DEREF_STAR;
4158 }
4159 else if (*buffer->cur == '-')
4160 buffer->cur++, result->type = CPP_MINUS_MINUS;
4161 else if (*buffer->cur == '=')
4162 buffer->cur++, result->type = CPP_MINUS_EQ;
4163 break;
4164
4165 case '&':
4166 result->type = CPP_AND;
4167 if (*buffer->cur == '&')
4168 buffer->cur++, result->type = CPP_AND_AND;
4169 else if (*buffer->cur == '=')
4170 buffer->cur++, result->type = CPP_AND_EQ;
4171 break;
4172
4173 case '|':
4174 result->type = CPP_OR;
4175 if (*buffer->cur == '|')
4176 buffer->cur++, result->type = CPP_OR_OR;
4177 else if (*buffer->cur == '=')
4178 buffer->cur++, result->type = CPP_OR_EQ;
4179 break;
4180
4181 case ':':
4182 result->type = CPP_COLON;
4183 if (*buffer->cur == ':' && CPP_OPTION (pfile, scope)((pfile)->opts.scope))
4184 buffer->cur++, result->type = CPP_SCOPE;
4185 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)((pfile)->opts.digraphs))
4186 {
4187 buffer->cur++;
4188 result->flags |= DIGRAPH(1 << 1);
4189 result->type = CPP_CLOSE_SQUARE;
4190 }
4191 break;
4192
4193 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT)do { result->type = CPP_MULT; if (*buffer->cur == '=') buffer
->cur++, result->type = CPP_MULT_EQ; } while (0)
; break;
4194 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ)do { result->type = CPP_EQ; if (*buffer->cur == '=') buffer
->cur++, result->type = CPP_EQ_EQ; } while (0)
; break;
4195 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT)do { result->type = CPP_NOT; if (*buffer->cur == '=') buffer
->cur++, result->type = CPP_NOT_EQ; } while (0)
; break;
4196 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR)do { result->type = CPP_XOR; if (*buffer->cur == '=') buffer
->cur++, result->type = CPP_XOR_EQ; } while (0)
; break;
4197 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH)do { result->type = CPP_HASH; if (*buffer->cur == '#') buffer
->cur++, result->type = CPP_PASTE; } while (0)
; result->val.token_no = 0; break;
4198
4199 case '?': result->type = CPP_QUERY; break;
4200 case '~': result->type = CPP_COMPL; break;
4201 case ',': result->type = CPP_COMMA; break;
4202 case '(': result->type = CPP_OPEN_PAREN; break;
4203 case ')': result->type = CPP_CLOSE_PAREN; break;
4204 case '[': result->type = CPP_OPEN_SQUARE; break;
4205 case ']': result->type = CPP_CLOSE_SQUARE; break;
4206 case '{': result->type = CPP_OPEN_BRACE; break;
4207 case '}': result->type = CPP_CLOSE_BRACE; break;
4208 case ';': result->type = CPP_SEMICOLON; break;
4209
4210 /* @ is a punctuator in Objective-C. */
4211 case '@': result->type = CPP_ATSIGN; break;
4212
4213 default:
4214 {
4215 const uchar *base = --buffer->cur;
4216 static int no_warn_cnt;
4217
4218 /* Check for an extended identifier ($ or UCN or UTF-8). */
4219 struct normalize_state nst = INITIAL_NORMALIZE_STATE{ 0, 0, normalized_KC };
4220 if (forms_identifier_p (pfile, true, &nst))
7
Calling 'forms_identifier_p'
4221 {
4222 result->type = CPP_NAME;
4223 result->val.node.node = lex_identifier (pfile, base, true, &nst,
4224 &result->val.node.spelling);
4225 warn_about_normalization (pfile, result, &nst, true);
4226 break;
4227 }
4228
4229 /* Otherwise this will form a CPP_OTHER token. Parse valid UTF-8 as a
4230 single token. */
4231 buffer->cur++;
4232 if (c >= utf8_signifier)
4233 {
4234 const uchar *pstr = base;
4235 cppchar_t s;
4236 if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL__null, &s))
4237 {
4238 if (s > UCS_LIMIT0x10FFFF && CPP_OPTION (pfile, cpp_warn_invalid_utf8)((pfile)->opts.cpp_warn_invalid_utf8))
4239 {
4240 buffer->cur = base;
4241 _cpp_warn_invalid_utf8 (pfile);
4242 }
4243 buffer->cur = pstr;
4244 }
4245 else if (CPP_OPTION (pfile, cpp_warn_invalid_utf8)((pfile)->opts.cpp_warn_invalid_utf8))
4246 {
4247 buffer->cur = base;
4248 const uchar *end = _cpp_warn_invalid_utf8 (pfile);
4249 buffer->cur = base + 1;
4250 no_warn_cnt = end - buffer->cur;
4251 }
4252 }
4253 else if (c >= utf8_continuation
4254 && CPP_OPTION (pfile, cpp_warn_invalid_utf8)((pfile)->opts.cpp_warn_invalid_utf8))
4255 {
4256 if (no_warn_cnt)
4257 --no_warn_cnt;
4258 else
4259 {
4260 buffer->cur = base;
4261 _cpp_warn_invalid_utf8 (pfile);
4262 buffer->cur = base + 1;
4263 }
4264 }
4265 create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
4266 break;
4267 }
4268
4269 }
4270
4271 /* Potentially convert the location of the token to a range. */
4272 if (result->src_loc >= RESERVED_LOCATION_COUNT
4273 && result->type != CPP_EOF)
4274 {
4275 /* Ensure that any line notes are processed, so that we have the
4276 correct physical line/column for the end-point of the token even
4277 when a logical line is split via one or more backslashes. */
4278 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
4279 && !pfile->overlaid_buffer)
4280 _cpp_process_line_notes (pfile, false);
4281
4282 source_range tok_range;
4283 tok_range.m_start = result->src_loc;
4284 tok_range.m_finish
4285 = linemap_position_for_column (pfile->line_table,
4286 CPP_BUF_COLUMN (buffer, buffer->cur)((buffer->cur) - (buffer)->line_base));
4287
4288 result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
4289 result->src_loc,
4290 tok_range, NULL__null, 0);
4291 }
4292
4293 return result;
4294}
4295
4296/* An upper bound on the number of bytes needed to spell TOKEN.
4297 Does not include preceding whitespace. */
4298unsigned int
4299cpp_token_len (const cpp_token *token)
4300{
4301 unsigned int len;
4302
4303 switch (TOKEN_SPELL (token)(token_spellings[(token)->type].category))
4304 {
4305 default: len = 6; break;
4306 case SPELL_LITERAL: len = token->val.str.len; break;
4307 case SPELL_IDENT: len = NODE_LEN (token->val.node.node)(((&(token->val.node.node)->ident))->len) * 10; break;
4308 }
4309
4310 return len;
4311}
4312
4313/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
4314 Return the number of bytes read out of NAME. (There are always
4315 10 bytes written to BUFFER.) */
4316
4317static size_t
4318utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
4319{
4320 int j;
4321 int ucn_len = 0;
4322 int ucn_len_c;
4323 unsigned t;
4324 unsigned long utf32;
4325
4326 /* Compute the length of the UTF-8 sequence. */
4327 for (t = *name; t & 0x80; t <<= 1)
4328 ucn_len++;
4329
4330 utf32 = *name & (0x7F >> ucn_len);
4331 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
4332 {
4333 utf32 = (utf32 << 6) | (*++name & 0x3F);
4334
4335 /* Ill-formed UTF-8. */
4336 if ((*name & ~0x3F) != 0x80)
4337 abort ()fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 4337, __FUNCTION__)
;
4338 }
4339
4340 *buffer++ = '\\';
4341 *buffer++ = 'U';
4342 for (j = 7; j >= 0; j--)
4343 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
4344 return ucn_len;
4345}
4346
4347/* Given a token TYPE corresponding to a digraph, return a pointer to
4348 the spelling of the digraph. */
4349static const unsigned char *
4350cpp_digraph2name (enum cpp_ttype type)
4351{
4352 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
4353}
4354
4355/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
4356 The buffer must already contain the enough space to hold the
4357 token's spelling. Returns a pointer to the character after the
4358 last character written. */
4359unsigned char *
4360_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
4361{
4362 size_t i;
4363 const unsigned char *name = NODE_NAME (ident)(((&(ident)->ident))->str);
4364
4365 for (i = 0; i < NODE_LEN (ident)(((&(ident)->ident))->len); i++)
4366 if (name[i] & ~0x7F)
4367 {
4368 i += utf8_to_ucn (buffer, name + i) - 1;
4369 buffer += 10;
4370 }
4371 else
4372 *buffer++ = name[i];
4373
4374 return buffer;
4375}
4376
4377/* Write the spelling of a token TOKEN to BUFFER. The buffer must
4378 already contain the enough space to hold the token's spelling.
4379 Returns a pointer to the character after the last character written.
4380 FORSTRING is true if this is to be the spelling after translation
4381 phase 1 (with the original spelling of extended identifiers), false
4382 if extended identifiers should always be written using UCNs (there is
4383 no option for always writing them in the internal UTF-8 form).
4384 FIXME: Would be nice if we didn't need the PFILE argument. */
4385unsigned char *
4386cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
4387 unsigned char *buffer, bool forstring)
4388{
4389 switch (TOKEN_SPELL (token)(token_spellings[(token)->type].category))
4390 {
4391 case SPELL_OPERATOR:
4392 {
4393 const unsigned char *spelling;
4394 unsigned char c;
4395
4396 if (token->flags & DIGRAPH(1 << 1))
4397 spelling = cpp_digraph2name (token->type);
4398 else if (token->flags & NAMED_OP(1 << 4))
4399 goto spell_ident;
4400 else
4401 spelling = TOKEN_NAME (token)(token_spellings[(token)->type].name);
4402
4403 while ((c = *spelling++) != '\0')
4404 *buffer++ = c;
4405 }
4406 break;
4407
4408 spell_ident:
4409 case SPELL_IDENT:
4410 if (forstring)
4411 {
4412 memcpy (buffer, NODE_NAME (token->val.node.spelling)(((&(token->val.node.spelling)->ident))->str),
4413 NODE_LEN (token->val.node.spelling)(((&(token->val.node.spelling)->ident))->len));
4414 buffer += NODE_LEN (token->val.node.spelling)(((&(token->val.node.spelling)->ident))->len);
4415 }
4416 else
4417 buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
4418 break;
4419
4420 case SPELL_LITERAL:
4421 memcpy (buffer, token->val.str.text, token->val.str.len);
4422 buffer += token->val.str.len;
4423 break;
4424
4425 case SPELL_NONE:
4426 cpp_error (pfile, CPP_DL_ICE,
4427 "unspellable token %s", TOKEN_NAME (token)(token_spellings[(token)->type].name));
4428 break;
4429 }
4430
4431 return buffer;
4432}
4433
4434/* Returns TOKEN spelt as a null-terminated string. The string is
4435 freed when the reader is destroyed. Useful for diagnostics. */
4436unsigned char *
4437cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
4438{
4439 unsigned int len = cpp_token_len (token) + 1;
4440 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
4441
4442 end = cpp_spell_token (pfile, token, start, false);
4443 end[0] = '\0';
4444
4445 return start;
4446}
4447
4448/* Returns a pointer to a string which spells the token defined by
4449 TYPE and FLAGS. Used by C front ends, which really should move to
4450 using cpp_token_as_text. */
4451const char *
4452cpp_type2name (enum cpp_ttype type, unsigned char flags)
4453{
4454 if (flags & DIGRAPH(1 << 1))
4455 return (const char *) cpp_digraph2name (type);
4456 else if (flags & NAMED_OP(1 << 4))
4457 return cpp_named_operator2name (type);
4458
4459 return (const char *) token_spellings[type].name;
4460}
4461
4462/* Writes the spelling of token to FP, without any preceding space.
4463 Separated from cpp_spell_token for efficiency - to avoid stdio
4464 double-buffering. */
4465void
4466cpp_output_token (const cpp_token *token, FILE *fp)
4467{
4468 switch (TOKEN_SPELL (token)(token_spellings[(token)->type].category))
4469 {
4470 case SPELL_OPERATOR:
4471 {
4472 const unsigned char *spelling;
4473 int c;
4474
4475 if (token->flags & DIGRAPH(1 << 1))
4476 spelling = cpp_digraph2name (token->type);
4477 else if (token->flags & NAMED_OP(1 << 4))
4478 goto spell_ident;
4479 else
4480 spelling = TOKEN_NAME (token)(token_spellings[(token)->type].name);
4481
4482 c = *spelling;
4483 do
4484 putc (c, fp)putc_unlocked (c, fp);
4485 while ((c = *++spelling) != '\0');
4486 }
4487 break;
4488
4489 spell_ident:
4490 case SPELL_IDENT:
4491 {
4492 size_t i;
4493 const unsigned char * name = NODE_NAME (token->val.node.node)(((&(token->val.node.node)->ident))->str);
4494
4495 for (i = 0; i < NODE_LEN (token->val.node.node)(((&(token->val.node.node)->ident))->len); i++)
4496 if (name[i] & ~0x7F)
4497 {
4498 unsigned char buffer[10];
4499 i += utf8_to_ucn (buffer, name + i) - 1;
4500 fwrite (buffer, 1, 10, fp)fwrite_unlocked (buffer, 1, 10, fp);
4501 }
4502 else
4503 fputc (NODE_NAME (token->val.node.node)[i], fp)fputc_unlocked ((((&(token->val.node.node)->ident))
->str)[i], fp)
;
4504 }
4505 break;
4506
4507 case SPELL_LITERAL:
4508 if (token->type == CPP_HEADER_NAME)
4509 fputc ('"', fp)fputc_unlocked ('"', fp);
4510 fwrite (token->val.str.text, 1, token->val.str.len, fp)fwrite_unlocked (token->val.str.text, 1, token->val.str
.len, fp)
;
4511 if (token->type == CPP_HEADER_NAME)
4512 fputc ('"', fp)fputc_unlocked ('"', fp);
4513 break;
4514
4515 case SPELL_NONE:
4516 /* An error, most probably. */
4517 break;
4518 }
4519}
4520
4521/* Compare two tokens. */
4522int
4523_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
4524{
4525 if (a->type == b->type && a->flags == b->flags)
4526 switch (TOKEN_SPELL (a)(token_spellings[(a)->type].category))
4527 {
4528 default: /* Keep compiler happy. */
4529 case SPELL_OPERATOR:
4530 /* token_no is used to track where multiple consecutive ##
4531 tokens were originally located. */
4532 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
4533 case SPELL_NONE:
4534 return (a->type != CPP_MACRO_ARG
4535 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
4536 && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
4537 case SPELL_IDENT:
4538 return (a->val.node.node == b->val.node.node
4539 && a->val.node.spelling == b->val.node.spelling);
4540 case SPELL_LITERAL:
4541 return (a->val.str.len == b->val.str.len
4542 && !memcmp (a->val.str.text, b->val.str.text,
4543 a->val.str.len));
4544 }
4545
4546 return 0;
4547}
4548
4549/* Returns nonzero if a space should be inserted to avoid an
4550 accidental token paste for output. For simplicity, it is
4551 conservative, and occasionally advises a space where one is not
4552 needed, e.g. "." and ".2". */
4553int
4554cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
4555 const cpp_token *token2)
4556{
4557 enum cpp_ttype a = token1->type, b = token2->type;
4558 cppchar_t c;
4559
4560 if (token1->flags & NAMED_OP(1 << 4))
4561 a = CPP_NAME;
4562 if (token2->flags & NAMED_OP(1 << 4))
4563 b = CPP_NAME;
4564
4565 c = EOF(-1);
4566 if (token2->flags & DIGRAPH(1 << 1))
4567 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
4568 else if (token_spellings[b].category == SPELL_OPERATOR)
4569 c = token_spellings[b].name[0];
4570
4571 /* Quickly get everything that can paste with an '='. */
4572 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
4573 return 1;
4574
4575 switch (a)
4576 {
4577 case CPP_GREATER: return c == '>';
4578 case CPP_LESS: return c == '<' || c == '%' || c == ':';
4579 case CPP_PLUS: return c == '+';
4580 case CPP_MINUS: return c == '-' || c == '>';
4581 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
4582 case CPP_MOD: return c == ':' || c == '>';
4583 case CPP_AND: return c == '&';
4584 case CPP_OR: return c == '|';
4585 case CPP_COLON: return c == ':' || c == '>';
4586 case CPP_DEREF: return c == '*';
4587 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
4588 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
4589 case CPP_PRAGMA:
4590 case CPP_NAME: return ((b == CPP_NUMBER
4591 && name_p (pfile, &token2->val.str))
4592 || b == CPP_NAME
4593 || b == CPP_CHAR || b == CPP_STRING); /* L */
4594 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
4595 || b == CPP_CHAR
4596 || c == '.' || c == '+' || c == '-');
4597 /* UCNs */
4598 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
4599 && b == CPP_NAME)
4600 || (CPP_OPTION (pfile, objc)((pfile)->opts.objc)
4601 && token1->val.str.text[0] == '@'
4602 && (b == CPP_NAME || b == CPP_STRING)));
4603 case CPP_LESS_EQ: return c == '>';
4604 case CPP_STRING:
4605 case CPP_WSTRING:
4606 case CPP_UTF8STRING:
4607 case CPP_STRING16:
4608 case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)((pfile)->opts.user_literals)
4609 && (b == CPP_NAME
4610 || (TOKEN_SPELL (token2)(token_spellings[(token2)->type].category) == SPELL_LITERAL
4611 && ISIDST (token2->val.str.text[0])(_sch_istable[(token2->val.str.text[0]) & 0xff] & (
unsigned short)(_sch_isidst))
)));
4612
4613 default: break;
4614 }
4615
4616 return 0;
4617}
4618
4619/* Output all the remaining tokens on the current line, and a newline
4620 character, to FP. Leading whitespace is removed. If there are
4621 macros, special token padding is not performed. */
4622void
4623cpp_output_line (cpp_reader *pfile, FILE *fp)
4624{
4625 const cpp_token *token;
4626
4627 token = cpp_get_token (pfile);
4628 while (token->type != CPP_EOF)
4629 {
4630 cpp_output_token (token, fp);
4631 token = cpp_get_token (pfile);
4632 if (token->flags & PREV_WHITE(1 << 0))
4633 putc (' ', fp)putc_unlocked (' ', fp);
4634 }
4635
4636 putc ('\n', fp)putc_unlocked ('\n', fp);
4637}
4638
4639/* Return a string representation of all the remaining tokens on the
4640 current line. The result is allocated using xmalloc and must be
4641 freed by the caller. */
4642unsigned char *
4643cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
4644{
4645 const cpp_token *token;
4646 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
4647 unsigned int alloced = 120 + out;
4648 unsigned char *result = (unsigned char *) xmalloc (alloced);
4649
4650 /* If DIR_NAME is empty, there are no initial contents. */
4651 if (dir_name)
4652 {
4653 sprintf ((char *) result, "#%s ", dir_name);
4654 out += 2;
4655 }
4656
4657 token = cpp_get_token (pfile);
4658 while (token->type != CPP_EOF)
4659 {
4660 unsigned char *last;
4661 /* Include room for a possible space and the terminating nul. */
4662 unsigned int len = cpp_token_len (token) + 2;
4663
4664 if (out + len > alloced)
4665 {
4666 alloced *= 2;
4667 if (out + len > alloced)
4668 alloced = out + len;
4669 result = (unsigned char *) xrealloc (result, alloced);
4670 }
4671
4672 last = cpp_spell_token (pfile, token, &result[out], 0);
4673 out = last - result;
4674
4675 token = cpp_get_token (pfile);
4676 if (token->flags & PREV_WHITE(1 << 0))
4677 result[out++] = ' ';
4678 }
4679
4680 result[out] = '\0';
4681 return result;
4682}
4683
4684/* Memory buffers. Changing these three constants can have a dramatic
4685 effect on performance. The values here are reasonable defaults,
4686 but might be tuned. If you adjust them, be sure to test across a
4687 range of uses of cpplib, including heavy nested function-like macro
4688 expansion. Also check the change in peak memory usage (NJAMD is a
4689 good tool for this). */
4690#define MIN_BUFF_SIZE8000 8000
4691#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE)(8000 + (MIN_SIZE) * 3 / 2) (MIN_BUFF_SIZE8000 + (MIN_SIZE) * 3 / 2)
4692#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA)(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) \
4693 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
4694
4695#if MIN_BUFF_SIZE8000 > BUFF_SIZE_UPPER_BOUND (0)(8000 + (0) * 3 / 2)
4696 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE8000!
4697#endif
4698
4699/* Create a new allocation buffer. Place the control block at the end
4700 of the buffer, so that buffer overflows will cause immediate chaos. */
4701static _cpp_buff *
4702new_buff (size_t len)
4703{
4704 _cpp_buff *result;
4705 unsigned char *base;
4706
4707 if (len < MIN_BUFF_SIZE8000)
4708 len = MIN_BUFF_SIZE8000;
4709 len = CPP_ALIGN (len)(((len) + ((__builtin_offsetof(struct dummy, u)) - 1)) & ~
((__builtin_offsetof(struct dummy, u)) - 1))
;
4710
4711#ifdef ENABLE_VALGRIND_ANNOTATIONS
4712 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
4713 struct first. */
4714 size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT)(((sizeof (_cpp_buff)) + ((2 * __builtin_offsetof(struct dummy
, u)) - 1)) & ~((2 * __builtin_offsetof(struct dummy, u))
- 1))
;
4715 base = XNEWVEC (unsigned char, len + slen)((unsigned char *) xmalloc (sizeof (unsigned char) * (len + slen
)))
;
4716 result = (_cpp_buff *) base;
4717 base += slen;
4718#else
4719 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff))((unsigned char *) xmalloc (sizeof (unsigned char) * (len + sizeof
(_cpp_buff))))
;
4720 result = (_cpp_buff *) (base + len);
4721#endif
4722 result->base = base;
4723 result->cur = base;
4724 result->limit = base + len;
4725 result->next = NULL__null;
4726 return result;
4727}
4728
4729/* Place a chain of unwanted allocation buffers on the free list. */
4730void
4731_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
4732{
4733 _cpp_buff *end = buff;
4734
4735 while (end->next)
4736 end = end->next;
4737 end->next = pfile->free_buffs;
4738 pfile->free_buffs = buff;
4739}
4740
4741/* Return a free buffer of size at least MIN_SIZE. */
4742_cpp_buff *
4743_cpp_get_buff (cpp_reader *pfile, size_t min_size)
4744{
4745 _cpp_buff *result, **p;
4746
4747 for (p = &pfile->free_buffs;; p = &(*p)->next)
4748 {
4749 size_t size;
4750
4751 if (*p == NULL__null)
4752 return new_buff (min_size);
4753 result = *p;
4754 size = result->limit - result->base;
4755 /* Return a buffer that's big enough, but don't waste one that's
4756 way too big. */
4757 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)(8000 + (min_size) * 3 / 2))
4758 break;
4759 }
4760
4761 *p = result->next;
4762 result->next = NULL__null;
4763 result->cur = result->base;
4764 return result;
4765}
4766
4767/* Creates a new buffer with enough space to hold the uncommitted
4768 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
4769 the excess bytes to the new buffer. Chains the new buffer after
4770 BUFF, and returns the new buffer. */
4771_cpp_buff *
4772_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
4773{
4774 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra)(min_extra + ((buff)->limit - (buff)->cur) * 2);
4775 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
4776
4777 buff->next = new_buff;
4778 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)(size_t) ((buff)->limit - (buff)->cur));
4779 return new_buff;
4780}
4781
4782/* Creates a new buffer with enough space to hold the uncommitted
4783 remaining bytes of the buffer pointed to by BUFF, and at least
4784 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
4785 Chains the new buffer before the buffer pointed to by BUFF, and
4786 updates the pointer to point to the new buffer. */
4787void
4788_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
4789{
4790 _cpp_buff *new_buff, *old_buff = *pbuff;
4791 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra)(min_extra + ((old_buff)->limit - (old_buff)->cur) * 2);
4792
4793 new_buff = _cpp_get_buff (pfile, size);
4794 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)(size_t) ((old_buff)->limit - (old_buff)->cur));
4795 new_buff->next = old_buff;
4796 *pbuff = new_buff;
4797}
4798
4799/* Free a chain of buffers starting at BUFF. */
4800void
4801_cpp_free_buff (_cpp_buff *buff)
4802{
4803 _cpp_buff *next;
4804
4805 for (; buff; buff = next)
4806 {
4807 next = buff->next;
4808#ifdef ENABLE_VALGRIND_ANNOTATIONS
4809 free (buff);
4810#else
4811 free (buff->base);
4812#endif
4813 }
4814}
4815
4816/* Allocate permanent, unaligned storage of length LEN. */
4817unsigned char *
4818_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
4819{
4820 _cpp_buff *buff = pfile->u_buff;
4821 unsigned char *result = buff->cur;
4822
4823 if (len > (size_t) (buff->limit - result))
4824 {
4825 buff = _cpp_get_buff (pfile, len);
4826 buff->next = pfile->u_buff;
4827 pfile->u_buff = buff;
4828 result = buff->cur;
4829 }
4830
4831 buff->cur = result + len;
4832 return result;
4833}
4834
4835/* Allocate permanent, unaligned storage of length LEN from a_buff.
4836 That buffer is used for growing allocations when saving macro
4837 replacement lists in a #define, and when parsing an answer to an
4838 assertion in #assert, #unassert or #if (and therefore possibly
4839 whilst expanding macros). It therefore must not be used by any
4840 code that they might call: specifically the lexer and the guts of
4841 the macro expander.
4842
4843 All existing other uses clearly fit this restriction: storing
4844 registered pragmas during initialization. */
4845unsigned char *
4846_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
4847{
4848 _cpp_buff *buff = pfile->a_buff;
4849 unsigned char *result = buff->cur;
4850
4851 if (len > (size_t) (buff->limit - result))
4852 {
4853 buff = _cpp_get_buff (pfile, len);
4854 buff->next = pfile->a_buff;
4855 pfile->a_buff = buff;
4856 result = buff->cur;
4857 }
4858
4859 buff->cur = result + len;
4860 return result;
4861}
4862
4863/* Commit or allocate storage from a buffer. */
4864
4865void *
4866_cpp_commit_buff (cpp_reader *pfile, size_t size)
4867{
4868 void *ptr = BUFF_FRONT (pfile->a_buff)((pfile->a_buff)->cur);
4869
4870 if (pfile->hash_table->alloc_subobject)
4871 {
4872 void *copy = pfile->hash_table->alloc_subobject (size);
4873 memcpy (copy, ptr, size);
4874 ptr = copy;
4875 }
4876 else
4877 BUFF_FRONT (pfile->a_buff)((pfile->a_buff)->cur) += size;
4878
4879 return ptr;
4880}
4881
4882/* Say which field of TOK is in use. */
4883
4884enum cpp_token_fld_kind
4885cpp_token_val_index (const cpp_token *tok)
4886{
4887 switch (TOKEN_SPELL (tok)(token_spellings[(tok)->type].category))
4888 {
4889 case SPELL_IDENT:
4890 return CPP_TOKEN_FLD_NODE;
4891 case SPELL_LITERAL:
4892 return CPP_TOKEN_FLD_STR;
4893 case SPELL_OPERATOR:
4894 /* Operands which were originally spelled as ident keep around
4895 the node for the exact spelling. */
4896 if (tok->flags & NAMED_OP(1 << 4))
4897 return CPP_TOKEN_FLD_NODE;
4898 else if (tok->type == CPP_PASTE)
4899 return CPP_TOKEN_FLD_TOKEN_NO;
4900 else
4901 return CPP_TOKEN_FLD_NONE;
4902 case SPELL_NONE:
4903 if (tok->type == CPP_MACRO_ARG)
4904 return CPP_TOKEN_FLD_ARG_NO;
4905 else if (tok->type == CPP_PADDING)
4906 return CPP_TOKEN_FLD_SOURCE;
4907 else if (tok->type == CPP_PRAGMA)
4908 return CPP_TOKEN_FLD_PRAGMA;
4909 /* fall through */
4910 default:
4911 return CPP_TOKEN_FLD_NONE;
4912 }
4913}
4914
4915/* All tokens lexed in R after calling this function will be forced to
4916 have their location_t to be P, until
4917 cpp_stop_forcing_token_locations is called for R. */
4918
4919void
4920cpp_force_token_locations (cpp_reader *r, location_t loc)
4921{
4922 r->forced_token_location = loc;
4923}
4924
4925/* Go back to assigning locations naturally for lexed tokens. */
4926
4927void
4928cpp_stop_forcing_token_locations (cpp_reader *r)
4929{
4930 r->forced_token_location = 0;
4931}
4932
4933/* We're looking at \, if it's escaping EOL, look past it. If at
4934 LIMIT, don't advance. */
4935
4936static const unsigned char *
4937do_peek_backslash (const unsigned char *peek, const unsigned char *limit)
4938{
4939 const unsigned char *probe = peek;
4940
4941 if (__builtin_expect (peek[1] == '\n', true))
4942 {
4943 eol:
4944 probe += 2;
4945 if (__builtin_expect (probe < limit, true))
4946 {
4947 peek = probe;
4948 if (*peek == '\\')
4949 /* The user might be perverse. */
4950 return do_peek_backslash (peek, limit);
4951 }
4952 }
4953 else if (__builtin_expect (peek[1] == '\r', false))
4954 {
4955 if (probe[2] == '\n')
4956 probe++;
4957 goto eol;
4958 }
4959
4960 return peek;
4961}
4962
4963static const unsigned char *
4964do_peek_next (const unsigned char *peek, const unsigned char *limit)
4965{
4966 if (__builtin_expect (*peek == '\\', false))
4967 peek = do_peek_backslash (peek, limit);
4968 return peek;
4969}
4970
4971static const unsigned char *
4972do_peek_prev (const unsigned char *peek, const unsigned char *bound)
4973{
4974 if (peek == bound)
4975 return NULL__null;
4976
4977 unsigned char c = *--peek;
4978 if (__builtin_expect (c == '\n', false)
4979 || __builtin_expect (c == 'r', false))
4980 {
4981 if (peek == bound)
4982 return peek;
4983 int ix = -1;
4984 if (c == '\n' && peek[ix] == '\r')
4985 {
4986 if (peek + ix == bound)
4987 return peek;
4988 ix--;
4989 }
4990
4991 if (peek[ix] == '\\')
4992 return do_peek_prev (peek + ix, bound);
4993
4994 return peek;
4995 }
4996 else
4997 return peek;
4998}
4999
5000/* If PEEK[-1] is identifier MATCH, scan past it and trailing white
5001 space. Otherwise return NULL. */
5002
5003static const unsigned char *
5004do_peek_ident (const char *match, const unsigned char *peek,
5005 const unsigned char *limit)
5006{
5007 for (; *++match; peek++)
5008 if (*peek != *match)
5009 {
5010 peek = do_peek_next (peek, limit);
5011 if (*peek != *match)
5012 return NULL__null;
5013 }
5014
5015 /* Must now not be looking at an identifier char. */
5016 peek = do_peek_next (peek, limit);
5017 if (ISIDNUM (*peek)(_sch_istable[(*peek) & 0xff] & (unsigned short)(_sch_isidnum
))
)
5018 return NULL__null;
5019
5020 /* Skip control-line whitespace. */
5021 ws:
5022 while (*peek == ' ' || *peek == '\t')
5023 peek++;
5024 if (__builtin_expect (*peek == '\\', false))
5025 {
5026 peek = do_peek_backslash (peek, limit);
5027 if (*peek != '\\')
5028 goto ws;
5029 }
5030
5031 return peek;
5032}
5033
5034/* Are we looking at a module control line starting as PEEK - 1? */
5035
5036static bool
5037do_peek_module (cpp_reader *pfile, unsigned char c,
5038 const unsigned char *peek, const unsigned char *limit)
5039{
5040 bool import = false;
5041
5042 if (__builtin_expect (c == 'e', false))
5043 {
5044 if (!((peek[0] == 'x' || peek[0] == '\\')
5045 && (peek = do_peek_ident ("export", peek, limit))))
5046 return false;
5047
5048 /* export, peek for import or module. No need to peek __import
5049 here. */
5050 if (peek[0] == 'i')
5051 {
5052 if (!((peek[1] == 'm' || peek[1] == '\\')
5053 && (peek = do_peek_ident ("import", peek + 1, limit))))
5054 return false;
5055 import = true;
5056 }
5057 else if (peek[0] == 'm')
5058 {
5059 if (!((peek[1] == 'o' || peek[1] == '\\')
5060 && (peek = do_peek_ident ("module", peek + 1, limit))))
5061 return false;
5062 }
5063 else
5064 return false;
5065 }
5066 else if (__builtin_expect (c == 'i', false))
5067 {
5068 if (!((peek[0] == 'm' || peek[0] == '\\')
5069 && (peek = do_peek_ident ("import", peek, limit))))
5070 return false;
5071 import = true;
5072 }
5073 else if (__builtin_expect (c == '_', false))
5074 {
5075 /* Needed for translated includes. */
5076 if (!((peek[0] == '_' || peek[0] == '\\')
5077 && (peek = do_peek_ident ("__import", peek, limit))))
5078 return false;
5079 import = true;
5080 }
5081 else if (__builtin_expect (c == 'm', false))
5082 {
5083 if (!((peek[0] == 'o' || peek[0] == '\\')
5084 && (peek = do_peek_ident ("module", peek, limit))))
5085 return false;
5086 }
5087 else
5088 return false;
5089
5090 /* Peek the next character to see if it's good enough. We'll be at
5091 the first non-whitespace char, including skipping an escaped
5092 newline. */
5093 /* ... import followed by identifier, ':', '<' or header-name
5094 preprocessing tokens, or module followed by identifier, ':' or
5095 ';' preprocessing tokens. */
5096 unsigned char p = *peek++;
5097
5098 /* A character literal is ... single quotes, ... optionally preceded
5099 by u8, u, U, or L */
5100 /* A string-literal is a ... double quotes, optionally prefixed by
5101 R, u8, u8R, u, uR, U, UR, L, or LR */
5102 if (p == 'u')
5103 {
5104 peek = do_peek_next (peek, limit);
5105 if (*peek == '8')
5106 {
5107 peek++;
5108 goto peek_u8;
5109 }
5110 goto peek_u;
5111 }
5112 else if (p == 'U' || p == 'L')
5113 {
5114 peek_u8:
5115 peek = do_peek_next (peek, limit);
5116 peek_u:
5117 if (*peek == '\"' || *peek == '\'')
5118 return false;
5119
5120 if (*peek == 'R')
5121 goto peek_R;
5122 /* Identifier. Ok. */
5123 }
5124 else if (p == 'R')
5125 {
5126 peek_R:
5127 if (CPP_OPTION (pfile, rliterals)((pfile)->opts.rliterals))
5128 {
5129 peek = do_peek_next (peek, limit);
5130 if (*peek == '\"')
5131 return false;
5132 }
5133 /* Identifier. Ok. */
5134 }
5135 else if ('Z' - 'A' == 25
5136 ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
5137 : ISIDST (p)(_sch_istable[(p) & 0xff] & (unsigned short)(_sch_isidst
))
)
5138 {
5139 /* Identifier. Ok. */
5140 }
5141 else if (p == '<')
5142 {
5143 /* Maybe angle header, ok for import. Reject
5144 '<=', '<<' digraph:'<:'. */
5145 if (!import)
5146 return false;
5147 peek = do_peek_next (peek, limit);
5148 if (*peek == '=' || *peek == '<'
5149 || (*peek == ':' && CPP_OPTION (pfile, digraphs)((pfile)->opts.digraphs)))
5150 return false;
5151 }
5152 else if (p == ';')
5153 {
5154 /* SEMICOLON, ok for module. */
5155 if (import)
5156 return false;
5157 }
5158 else if (p == '"')
5159 {
5160 /* STRING, ok for import. */
5161 if (!import)
5162 return false;
5163 }
5164 else if (p == ':')
5165 {
5166 /* Maybe COLON, ok. Reject '::', digraph:':>'. */
5167 peek = do_peek_next (peek, limit);
5168 if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)((pfile)->opts.digraphs)))
5169 return false;
5170 }
5171 else
5172 /* FIXME: Detect a unicode character, excluding those not
5173 permitted as the initial character. [lex.name]/1. I presume
5174 we need to check the \[uU] spellings, and directly using
5175 Unicode in say UTF8 form? Or perhaps we do the phase-1
5176 conversion of UTF8 to universal-character-names? */
5177 return false;
5178
5179 return true;
5180}
5181
5182/* Directives-only scanning. Somewhat more relaxed than correct
5183 parsing -- some ill-formed programs will not be rejected. */
5184
5185void
5186cpp_directive_only_process (cpp_reader *pfile,
5187 void *data,
5188 void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
5189{
5190 bool module_p = CPP_OPTION (pfile, module_directives)((pfile)->opts.module_directives);
5191
5192 do
5193 {
5194 restart:
5195 /* Buffer initialization, but no line cleaning. */
5196 cpp_buffer *buffer = pfile->buffer;
5197 buffer->cur_note = buffer->notes_used = 0;
5198 buffer->cur = buffer->line_base = buffer->next_line;
5199 buffer->need_line = false;
5200 /* Files always end in a newline or carriage return. We rely on this for
5201 character peeking safety. */
5202 gcc_assert (buffer->rlimit[0] == '\n' || buffer->rlimit[0] == '\r')((void)(!(buffer->rlimit[0] == '\n' || buffer->rlimit[0
] == '\r') ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 5202, __FUNCTION__), 0 : 0))
;
5203
5204 const unsigned char *base = buffer->cur;
5205 unsigned line_count = 0;
5206 const unsigned char *line_start = base;
5207
5208 bool bol = true;
5209 bool raw = false;
5210
5211 const unsigned char *lwm = base;
5212 for (const unsigned char *pos = base, *limit = buffer->rlimit;
5213 pos < limit;)
5214 {
5215 unsigned char c = *pos++;
5216 /* This matches the switch in _cpp_lex_direct. */
5217 switch (c)
5218 {
5219 case ' ': case '\t': case '\f': case '\v':
5220 /* Whitespace, do nothing. */
5221 break;
5222
5223 case '\r': /* MAC line ending, or Windows \r\n */
5224 if (*pos == '\n')
5225 pos++;
5226 /* FALLTHROUGH */
5227
5228 case '\n':
5229 bol = true;
5230
5231 next_line:
5232 CPP_INCREMENT_LINE (pfile, 0)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, 0); } while (0)
;
5233 line_count++;
5234 line_start = pos;
5235 break;
5236
5237 case '\\':
5238 /* <backslash><newline> is removed, and doesn't undo any
5239 preceeding escape or whatnot. */
5240 if (*pos == '\n')
5241 {
5242 pos++;
5243 goto next_line;
5244 }
5245 else if (*pos == '\r')
5246 {
5247 if (pos[1] == '\n')
5248 pos++;
5249 pos++;
5250 goto next_line;
5251 }
5252 goto dflt;
5253
5254 case '#':
5255 if (bol)
5256 {
5257 /* Line directive. */
5258 if (pos - 1 > base && !pfile->state.skipping)
5259 cb (pfile, CPP_DO_print, data,
5260 line_count, base, pos - 1 - base);
5261
5262 /* Prep things for directive handling. */
5263 buffer->next_line = pos;
5264 buffer->need_line = true;
5265 bool ok = _cpp_get_fresh_line (pfile);
5266 gcc_checking_assert (ok)((void)(!(ok) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 5266, __FUNCTION__), 0 : 0))
;
5267
5268 /* Ensure proper column numbering for generated
5269 error messages. */
5270 buffer->line_base -= pos - line_start;
5271
5272 _cpp_handle_directive (pfile, line_start + 1 != pos);
5273
5274 /* Sanitize the line settings. Duplicate #include's can
5275 mess things up. */
5276 // FIXME: Necessary?
5277 pfile->line_table->highest_location
5278 = pfile->line_table->highest_line;
5279
5280 if (!pfile->state.skipping
5281 && pfile->buffer->next_line < pfile->buffer->rlimit)
5282 cb (pfile, CPP_DO_location, data,
5283 pfile->line_table->highest_line);
5284
5285 goto restart;
5286 }
5287 goto dflt;
5288
5289 case '/':
5290 {
5291 const unsigned char *peek = do_peek_next (pos, limit);
5292 if (!(*peek == '/' || *peek == '*'))
5293 goto dflt;
5294
5295 /* Line or block comment */
5296 bool is_block = *peek == '*';
5297 bool star = false;
5298 bool esc = false;
5299 location_t sloc
5300 = linemap_position_for_column (pfile->line_table,
5301 pos - line_start);
5302
5303 while (pos < limit)
5304 {
5305 char c = *pos++;
5306 switch (c)
5307 {
5308 case '\\':
5309 esc = true;
5310 break;
5311
5312 case '\r':
5313 if (*pos == '\n')
5314 pos++;
5315 /* FALLTHROUGH */
5316
5317 case '\n':
5318 {
5319 CPP_INCREMENT_LINE (pfile, 0)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, 0); } while (0)
;
5320 line_count++;
5321 line_start = pos;
5322 if (!esc && !is_block)
5323 {
5324 bol = true;
5325 goto done_comment;
5326 }
5327 }
5328 if (!esc)
5329 star = false;
5330 esc = false;
5331 break;
5332
5333 case '*':
5334 if (pos > peek)
5335 star = is_block;
5336 esc = false;
5337 break;
5338
5339 case '/':
5340 if (star)
5341 goto done_comment;
5342 /* FALLTHROUGH */
5343
5344 default:
5345 star = false;
5346 esc = false;
5347 break;
5348 }
5349 }
5350 if (pos < limit || is_block)
5351 cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
5352 "unterminated comment");
5353 done_comment:
5354 lwm = pos;
5355 break;
5356 }
5357
5358 case '\'':
5359 if (!CPP_OPTION (pfile, digit_separators)((pfile)->opts.digit_separators))
5360 goto delimited_string;
5361
5362 /* Possibly a number punctuator. */
5363 if (!ISIDNUM (*do_peek_next (pos, limit))(_sch_istable[(*do_peek_next (pos, limit)) & 0xff] & (
unsigned short)(_sch_isidnum))
)
5364 goto delimited_string;
5365
5366 goto quote_peek;
5367
5368 case '\"':
5369 if (!CPP_OPTION (pfile, rliterals)((pfile)->opts.rliterals))
5370 goto delimited_string;
5371
5372 quote_peek:
5373 {
5374 /* For ' see if it's a number punctuator
5375 \.?<digit>(<digit>|<identifier-nondigit>
5376 |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
5377 /* For " see if it's a raw string
5378 {U,L,u,u8}R. This includes CPP_NUMBER detection,
5379 because that could be 0e+R. */
5380 const unsigned char *peek = pos - 1;
5381 bool quote_first = c == '"';
5382 bool quote_eight = false;
5383 bool maybe_number_start = false;
5384 bool want_number = false;
5385
5386 while ((peek = do_peek_prev (peek, lwm)))
5387 {
5388 unsigned char p = *peek;
5389 if (quote_first)
5390 {
5391 if (!raw)
5392 {
5393 if (p != 'R')
5394 break;
5395 raw = true;
5396 continue;
5397 }
5398
5399 quote_first = false;
5400 if (p == 'L' || p == 'U' || p == 'u')
5401 ;
5402 else if (p == '8')
5403 quote_eight = true;
5404 else
5405 goto second_raw;
5406 }
5407 else if (quote_eight)
5408 {
5409 if (p != 'u')
5410 {
5411 raw = false;
5412 break;
5413 }
5414 quote_eight = false;
5415 }
5416 else if (c == '"')
5417 {
5418 second_raw:;
5419 if (!want_number && ISIDNUM (p)(_sch_istable[(p) & 0xff] & (unsigned short)(_sch_isidnum
))
)
5420 {
5421 raw = false;
5422 break;
5423 }
5424 }
5425
5426 if (ISDIGIT (p)(_sch_istable[(p) & 0xff] & (unsigned short)(_sch_isdigit
))
)
5427 maybe_number_start = true;
5428 else if (p == '.')
5429 want_number = true;
5430 else if (ISIDNUM (p)(_sch_istable[(p) & 0xff] & (unsigned short)(_sch_isidnum
))
)
5431 maybe_number_start = false;
5432 else if (p == '+' || p == '-')
5433 {
5434 if (const unsigned char *peek_prev
5435 = do_peek_prev (peek, lwm))
5436 {
5437 p = *peek_prev;
5438 if (p == 'e' || p == 'E'
5439 || p == 'p' || p == 'P')
5440 {
5441 want_number = true;
5442 maybe_number_start = false;
5443 }
5444 else
5445 break;
5446 }
5447 else
5448 break;
5449 }
5450 else if (p == '\'' || p == '\"')
5451 {
5452 /* If this is lwm, this must be the end of a
5453 previous string. So this is a trailing
5454 literal type, (a) if those are allowed,
5455 and (b) maybe_start is false. Otherwise
5456 this must be a CPP_NUMBER because we've
5457 met another ', and we'd have checked that
5458 in its own right. */
5459 if (peek == lwm && CPP_OPTION (pfile, uliterals)((pfile)->opts.uliterals))
5460 {
5461 if (!maybe_number_start && !want_number)
5462 /* Must be a literal type. */
5463 raw = false;
5464 }
5465 else if (p == '\''
5466 && CPP_OPTION (pfile, digit_separators)((pfile)->opts.digit_separators))
5467 maybe_number_start = true;
5468 break;
5469 }
5470 else if (c == '\'')
5471 break;
5472 else if (!quote_first && !quote_eight)
5473 break;
5474 }
5475
5476 if (maybe_number_start)
5477 {
5478 if (c == '\'')
5479 /* A CPP NUMBER. */
5480 goto dflt;
5481 raw = false;
5482 }
5483
5484 goto delimited_string;
5485 }
5486
5487 delimited_string:
5488 {
5489 /* (Possibly raw) string or char literal. */
5490 unsigned char end = c;
5491 int delim_len = -1;
5492 const unsigned char *delim = NULL__null;
5493 location_t sloc = linemap_position_for_column (pfile->line_table,
5494 pos - line_start);
5495 int esc = 0;
5496
5497 if (raw)
5498 {
5499 /* There can be no line breaks in the delimiter. */
5500 delim = pos;
5501 for (delim_len = 0; (c = *pos++) != '('; delim_len++)
5502 {
5503 if (delim_len == 16)
5504 {
5505 cpp_error_with_line (pfile, CPP_DL_ERROR,
5506 sloc, 0,
5507 "raw string delimiter"
5508 " longer than %d"
5509 " characters",
5510 delim_len);
5511 raw = false;
5512 pos = delim;
5513 break;
5514 }
5515 if (strchr (") \\\t\v\f\n", c))
5516 {
5517 cpp_error_with_line (pfile, CPP_DL_ERROR,
5518 sloc, 0,
5519 "invalid character '%c'"
5520 " in raw string"
5521 " delimiter", c);
5522 raw = false;
5523 pos = delim;
5524 break;
5525 }
5526 if (pos >= limit)
5527 goto bad_string;
5528 }
5529 }
5530
5531 while (pos < limit)
5532 {
5533 char c = *pos++;
5534 switch (c)
5535 {
5536 case '\\':
5537 if (!raw)
5538 esc++;
5539 break;
5540
5541 case '\r':
5542 if (*pos == '\n')
5543 pos++;
5544 /* FALLTHROUGH */
5545
5546 case '\n':
5547 {
5548 CPP_INCREMENT_LINE (pfile, 0)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, 0); } while (0)
;
5549 line_count++;
5550 line_start = pos;
5551 }
5552 if (esc)
5553 esc--;
5554 break;
5555
5556 case ')':
5557 if (raw
5558 && pos + delim_len + 1 < limit
5559 && pos[delim_len] == end
5560 && !memcmp (delim, pos, delim_len))
5561 {
5562 pos += delim_len + 1;
5563 raw = false;
5564 goto done_string;
5565 }
5566 break;
5567
5568 default:
5569 if (!raw && !(esc & 1) && c == end)
5570 goto done_string;
5571 esc = 0;
5572 break;
5573 }
5574 }
5575 bad_string:
5576 cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
5577 "unterminated literal");
5578
5579 done_string:
5580 raw = false;
5581 lwm = pos - 1;
5582 }
5583 goto dflt;
5584
5585 case '_':
5586 case 'e':
5587 case 'i':
5588 case 'm':
5589 if (bol && module_p && !pfile->state.skipping
5590 && do_peek_module (pfile, c, pos, limit))
5591 {
5592 /* We've seen the start of a module control line.
5593 Start up the tokenizer. */
5594 pos--; /* Backup over the first character. */
5595
5596 /* Backup over whitespace to start of line. */
5597 while (pos > line_start
5598 && (pos[-1] == ' ' || pos[-1] == '\t'))
5599 pos--;
5600
5601 if (pos > base)
5602 cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
5603
5604 /* Prep things for directive handling. */
5605 buffer->next_line = pos;
5606 buffer->need_line = true;
5607
5608 /* Now get tokens until the PRAGMA_EOL. */
5609 do
5610 {
5611 location_t spelling;
5612 const cpp_token *tok
5613 = cpp_get_token_with_location (pfile, &spelling);
5614
5615 gcc_assert (pfile->state.in_deferred_pragma((void)(!(pfile->state.in_deferred_pragma || tok->type ==
CPP_PRAGMA_EOL) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 5616, __FUNCTION__), 0 : 0))
5616 || tok->type == CPP_PRAGMA_EOL)((void)(!(pfile->state.in_deferred_pragma || tok->type ==
CPP_PRAGMA_EOL) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/libcpp/lex.cc"
, 5616, __FUNCTION__), 0 : 0))
;
5617 cb (pfile, CPP_DO_token, data, tok, spelling);
5618 }
5619 while (pfile->state.in_deferred_pragma);
5620
5621 if (pfile->buffer->next_line < pfile->buffer->rlimit)
5622 cb (pfile, CPP_DO_location, data,
5623 pfile->line_table->highest_line);
5624
5625 pfile->mi_valid = false;
5626 goto restart;
5627 }
5628 goto dflt;
5629
5630 default:
5631 dflt:
5632 bol = false;
5633 pfile->mi_valid = false;
5634 break;
5635 }
5636 }
5637
5638 if (buffer->rlimit > base && !pfile->state.skipping)
5639 {
5640 const unsigned char *limit = buffer->rlimit;
5641 /* If the file was not newline terminated, add rlimit, which is
5642 guaranteed to point to a newline, to the end of our range. */
5643 if (limit[-1] != '\n')
5644 {
5645 limit++;
5646 CPP_INCREMENT_LINE (pfile, 0)do { const class line_maps *line_table = pfile->line_table
; const struct line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP
(line_table); linenum_type line = SOURCE_LINE (map, line_table
->highest_line); linemap_line_start (pfile->line_table,
line + 1, 0); } while (0)
;
5647 line_count++;
5648 }
5649 cb (pfile, CPP_DO_print, data, line_count, base, limit - base);
5650 }
5651
5652 _cpp_pop_buffer (pfile);
5653 }
5654 while (pfile->buffer);
5655}