File: | build/gcc/tracer.cc |
Warning: | line 392, column 25 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* The tracer pass for the GNU compiler. | |||
2 | Contributed by Jan Hubicka, SuSE Labs. | |||
3 | Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. | |||
4 | Copyright (C) 2001-2023 Free Software Foundation, Inc. | |||
5 | ||||
6 | This file is part of GCC. | |||
7 | ||||
8 | GCC is free software; you can redistribute it and/or modify it | |||
9 | under the terms of the GNU General Public License as published by | |||
10 | the Free Software Foundation; either version 3, or (at your option) | |||
11 | any later version. | |||
12 | ||||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT | |||
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |||
15 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |||
16 | License for more details. | |||
17 | ||||
18 | You should have received a copy of the GNU General Public License | |||
19 | along with GCC; see the file COPYING3. If not see | |||
20 | <http://www.gnu.org/licenses/>. */ | |||
21 | ||||
22 | /* This pass performs the tail duplication needed for superblock formation. | |||
23 | For more information see: | |||
24 | ||||
25 | Design and Analysis of Profile-Based Optimization in Compaq's | |||
26 | Compilation Tools for Alpha; Journal of Instruction-Level | |||
27 | Parallelism 3 (2000) 1-25 | |||
28 | ||||
29 | Unlike Compaq's implementation we don't do the loop peeling as most | |||
30 | probably a better job can be done by a special pass and we don't | |||
31 | need to worry too much about the code size implications as the tail | |||
32 | duplicates are crossjumped again if optimizations are not | |||
33 | performed. */ | |||
34 | ||||
35 | ||||
36 | #include "config.h" | |||
37 | #include "system.h" | |||
38 | #include "coretypes.h" | |||
39 | #include "backend.h" | |||
40 | #include "rtl.h" | |||
41 | #include "tree.h" | |||
42 | #include "gimple.h" | |||
43 | #include "cfghooks.h" | |||
44 | #include "tree-pass.h" | |||
45 | #include "profile.h" | |||
46 | #include "cfganal.h" | |||
47 | #include "gimple-iterator.h" | |||
48 | #include "tree-cfg.h" | |||
49 | #include "tree-ssa.h" | |||
50 | #include "tree-inline.h" | |||
51 | #include "cfgloop.h" | |||
52 | #include "alloc-pool.h" | |||
53 | #include "fibonacci_heap.h" | |||
54 | #include "tracer.h" | |||
55 | ||||
56 | static void analyze_bb (basic_block, int *); | |||
57 | static bool better_p (const_edge, const_edge); | |||
58 | static edge find_best_successor (basic_block); | |||
59 | static edge find_best_predecessor (basic_block); | |||
60 | static int find_trace (basic_block, basic_block *); | |||
61 | ||||
62 | /* Minimal outgoing edge probability considered for superblock formation. */ | |||
63 | static int probability_cutoff; | |||
64 | static int branch_ratio_cutoff; | |||
65 | ||||
66 | /* A bit BB->index is set if BB has already been seen, i.e. it is | |||
67 | connected to some trace already. */ | |||
68 | static sbitmap bb_seen; | |||
69 | ||||
70 | static inline void | |||
71 | mark_bb_seen (basic_block bb) | |||
72 | { | |||
73 | unsigned int size = SBITMAP_SIZE (bb_seen)((bb_seen)->n_bits); | |||
74 | ||||
75 | if ((unsigned int)bb->index >= size) | |||
76 | bb_seen = sbitmap_resize (bb_seen, size * 2, 0); | |||
77 | ||||
78 | bitmap_set_bit (bb_seen, bb->index); | |||
79 | } | |||
80 | ||||
81 | static inline bool | |||
82 | bb_seen_p (basic_block bb) | |||
83 | { | |||
84 | return bitmap_bit_p (bb_seen, bb->index); | |||
85 | } | |||
86 | ||||
87 | static sbitmap can_duplicate_bb; | |||
88 | ||||
89 | /* Cache VAL as value of can_duplicate_bb_p for BB. */ | |||
90 | static inline void | |||
91 | cache_can_duplicate_bb_p (const_basic_block bb, bool val) | |||
92 | { | |||
93 | if (val) | |||
94 | bitmap_set_bit (can_duplicate_bb, bb->index); | |||
95 | } | |||
96 | ||||
97 | /* Return cached value of can_duplicate_bb_p for BB. */ | |||
98 | static bool | |||
99 | cached_can_duplicate_bb_p (const_basic_block bb) | |||
100 | { | |||
101 | if (can_duplicate_bb) | |||
102 | { | |||
103 | unsigned int size = SBITMAP_SIZE (can_duplicate_bb)((can_duplicate_bb)->n_bits); | |||
104 | if ((unsigned int)bb->index < size) | |||
105 | return bitmap_bit_p (can_duplicate_bb, bb->index); | |||
106 | ||||
107 | /* Assume added bb's should not be duplicated. */ | |||
108 | return false; | |||
109 | } | |||
110 | ||||
111 | return can_duplicate_block_p (bb); | |||
112 | } | |||
113 | ||||
114 | /* Return true if we should ignore the basic block for purposes of tracing. */ | |||
115 | bool | |||
116 | ignore_bb_p (const_basic_block bb) | |||
117 | { | |||
118 | if (bb->index < NUM_FIXED_BLOCKS(2)) | |||
119 | return true; | |||
120 | if (optimize_bb_for_size_p (bb)) | |||
121 | return true; | |||
122 | ||||
123 | return !cached_can_duplicate_bb_p (bb); | |||
124 | } | |||
125 | ||||
126 | /* Return number of instructions in the block. */ | |||
127 | ||||
128 | static void | |||
129 | analyze_bb (basic_block bb, int *count) | |||
130 | { | |||
131 | gimple_stmt_iterator gsi; | |||
132 | gimple *stmt; | |||
133 | int n = 0; | |||
134 | ||||
135 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |||
136 | { | |||
137 | stmt = gsi_stmt (gsi); | |||
138 | n += estimate_num_insns (stmt, &eni_size_weights); | |||
139 | } | |||
140 | *count = n; | |||
141 | ||||
142 | cache_can_duplicate_bb_p (bb, can_duplicate_block_p (CONST_CAST_BB (bb)(const_cast<struct basic_block_def *> (((bb)))))); | |||
143 | } | |||
144 | ||||
145 | /* Return true if E1 is more frequent than E2. */ | |||
146 | static bool | |||
147 | better_p (const_edge e1, const_edge e2) | |||
148 | { | |||
149 | if ((e1->count () > e2->count ()) || (e1->count () < e2->count ())) | |||
150 | return e1->count () > e2->count (); | |||
151 | /* This is needed to avoid changes in the decision after | |||
152 | CFG is modified. */ | |||
153 | if (e1->src != e2->src) | |||
154 | return e1->src->index > e2->src->index; | |||
155 | return e1->dest->index > e2->dest->index; | |||
156 | } | |||
157 | ||||
158 | /* Return most frequent successor of basic block BB. */ | |||
159 | ||||
160 | static edge | |||
161 | find_best_successor (basic_block bb) | |||
162 | { | |||
163 | edge e; | |||
164 | edge best = NULLnullptr; | |||
165 | edge_iterator ei; | |||
166 | ||||
167 | FOR_EACH_EDGE (e, ei, bb->succs)for ((ei) = ei_start_1 (&((bb->succs))); ei_cond ((ei) , &(e)); ei_next (&(ei))) | |||
168 | { | |||
169 | if (!e->count ().initialized_p ()) | |||
170 | return NULLnullptr; | |||
171 | if (!best || better_p (e, best)) | |||
172 | best = e; | |||
173 | } | |||
174 | if (!best || ignore_bb_p (best->dest)) | |||
175 | return NULLnullptr; | |||
176 | if (!best->probability.initialized_p () | |||
177 | || best->probability.to_reg_br_prob_base () <= probability_cutoff) | |||
178 | return NULLnullptr; | |||
179 | return best; | |||
180 | } | |||
181 | ||||
182 | /* Return most frequent predecessor of basic block BB. */ | |||
183 | ||||
184 | static edge | |||
185 | find_best_predecessor (basic_block bb) | |||
186 | { | |||
187 | edge e; | |||
188 | edge best = NULLnullptr; | |||
189 | edge_iterator ei; | |||
190 | ||||
191 | FOR_EACH_EDGE (e, ei, bb->preds)for ((ei) = ei_start_1 (&((bb->preds))); ei_cond ((ei) , &(e)); ei_next (&(ei))) | |||
192 | { | |||
193 | if (!e->count ().initialized_p ()) | |||
194 | return NULLnullptr; | |||
195 | if (!best || better_p (e, best)) | |||
196 | best = e; | |||
197 | } | |||
198 | if (!best || ignore_bb_p (best->src)) | |||
199 | return NULLnullptr; | |||
200 | if (bb->count.initialized_p () | |||
201 | && (best->count ().to_frequency (cfun(cfun + 0)) * REG_BR_PROB_BASE10000 | |||
202 | < bb->count.to_frequency (cfun(cfun + 0)) * branch_ratio_cutoff)) | |||
203 | return NULLnullptr; | |||
204 | return best; | |||
205 | } | |||
206 | ||||
207 | /* Find the trace using bb and record it in the TRACE array. | |||
208 | Return number of basic blocks recorded. */ | |||
209 | ||||
210 | static int | |||
211 | find_trace (basic_block bb, basic_block *trace) | |||
212 | { | |||
213 | int i = 0; | |||
214 | edge e; | |||
215 | ||||
216 | if (dump_file) | |||
217 | fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun(cfun + 0))); | |||
218 | ||||
219 | while ((e = find_best_predecessor (bb)) != NULLnullptr) | |||
220 | { | |||
221 | basic_block bb2 = e->src; | |||
222 | if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX(EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_EH | EDGE_PRESERVE ))) | |||
223 | || find_best_successor (bb2) != e) | |||
224 | break; | |||
225 | if (dump_file) | |||
226 | fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun(cfun + 0))); | |||
227 | bb = bb2; | |||
228 | } | |||
229 | if (dump_file) | |||
230 | fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun(cfun + 0))); | |||
231 | trace[i++] = bb; | |||
232 | ||||
233 | /* Follow the trace in forward direction. */ | |||
234 | while ((e = find_best_successor (bb)) != NULLnullptr) | |||
235 | { | |||
236 | bb = e->dest; | |||
237 | if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX(EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_EH | EDGE_PRESERVE ))) | |||
238 | || find_best_predecessor (bb) != e) | |||
239 | break; | |||
240 | if (dump_file) | |||
241 | fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun(cfun + 0))); | |||
242 | trace[i++] = bb; | |||
243 | } | |||
244 | if (dump_file) | |||
245 | fprintf (dump_file, "\n"); | |||
246 | return i; | |||
247 | } | |||
248 | ||||
249 | /* Duplicate block BB2, placing it after BB in the CFG. Return the | |||
250 | newly created block. */ | |||
251 | basic_block | |||
252 | transform_duplicate (basic_block bb, basic_block bb2) | |||
253 | { | |||
254 | edge e; | |||
255 | basic_block copy; | |||
256 | ||||
257 | e = find_edge (bb, bb2); | |||
258 | ||||
259 | copy = duplicate_block (bb2, e, bb); | |||
260 | flush_pending_stmts (e); | |||
261 | ||||
262 | add_phi_args_after_copy (©, 1, NULLnullptr); | |||
263 | ||||
264 | return (copy); | |||
265 | } | |||
266 | ||||
267 | /* Look for basic blocks in frequency order, construct traces and tail duplicate | |||
268 | if profitable. */ | |||
269 | ||||
270 | static bool | |||
271 | tail_duplicate (void) | |||
272 | { | |||
273 | auto_vec<fibonacci_node<long, basic_block_def>*> blocks; | |||
274 | blocks.safe_grow_cleared (last_basic_block_for_fn (cfun)(((cfun + 0))->cfg->x_last_basic_block), true); | |||
275 | ||||
276 | basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun))((basic_block *) xmalloc (sizeof (basic_block) * ((((cfun + 0 ))->cfg->x_n_basic_blocks)))); | |||
277 | int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun))((int *) xmalloc (sizeof (int) * ((((cfun + 0))->cfg->x_last_basic_block )))); | |||
278 | int ninsns = 0, nduplicated = 0; | |||
279 | gcov_type weighted_insns = 0, traced_insns = 0; | |||
280 | fibonacci_heap<long, basic_block_def> heap (LONG_MIN(-9223372036854775807L -1L)); | |||
281 | gcov_type cover_insns; | |||
282 | int max_dup_insns; | |||
283 | basic_block bb; | |||
284 | bool changed = false; | |||
285 | ||||
286 | /* Create an oversized sbitmap to reduce the chance that we need to | |||
287 | resize it. */ | |||
288 | bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun)(((cfun + 0))->cfg->x_last_basic_block) * 2); | |||
289 | bitmap_clear (bb_seen); | |||
290 | can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun)(((cfun + 0))->cfg->x_last_basic_block)); | |||
291 | bitmap_clear (can_duplicate_bb); | |||
292 | initialize_original_copy_tables (); | |||
293 | ||||
294 | if (profile_info && profile_status_for_fn (cfun)(((cfun + 0))->cfg->x_profile_status) == PROFILE_READ) | |||
295 | probability_cutoff = param_tracer_min_branch_probability_feedbackglobal_options.x_param_tracer_min_branch_probability_feedback; | |||
296 | else | |||
297 | probability_cutoff = param_tracer_min_branch_probabilityglobal_options.x_param_tracer_min_branch_probability; | |||
298 | probability_cutoff = REG_BR_PROB_BASE10000 / 100 * probability_cutoff; | |||
299 | ||||
300 | branch_ratio_cutoff = | |||
301 | (REG_BR_PROB_BASE10000 / 100 * param_tracer_min_branch_ratioglobal_options.x_param_tracer_min_branch_ratio); | |||
302 | ||||
303 | FOR_EACH_BB_FN (bb, cfun)for (bb = ((cfun + 0))->cfg->x_entry_block_ptr->next_bb ; bb != ((cfun + 0))->cfg->x_exit_block_ptr; bb = bb-> next_bb) | |||
304 | { | |||
305 | int n; | |||
306 | analyze_bb (bb, &n); | |||
307 | if (!ignore_bb_p (bb)) | |||
308 | blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun(cfun + 0)), bb); | |||
309 | ||||
310 | counts [bb->index] = n; | |||
311 | ninsns += n; | |||
312 | weighted_insns += n * bb->count.to_frequency (cfun(cfun + 0)); | |||
313 | } | |||
314 | ||||
315 | if (profile_info
| |||
316 | cover_insns = param_tracer_dynamic_coverage_feedbackglobal_options.x_param_tracer_dynamic_coverage_feedback; | |||
317 | else | |||
318 | cover_insns = param_tracer_dynamic_coverageglobal_options.x_param_tracer_dynamic_coverage; | |||
319 | cover_insns = (weighted_insns * cover_insns + 50) / 100; | |||
320 | max_dup_insns = (ninsns * param_tracer_max_code_growthglobal_options.x_param_tracer_max_code_growth + 50) / 100; | |||
321 | ||||
322 | while (traced_insns
| |||
323 | && !heap.empty ()) | |||
324 | { | |||
325 | basic_block bb = heap.extract_min (); | |||
326 | int n, pos; | |||
327 | ||||
328 | if (!bb) | |||
329 | break; | |||
330 | ||||
331 | blocks[bb->index] = NULLnullptr; | |||
332 | ||||
333 | if (ignore_bb_p (bb)) | |||
334 | continue; | |||
335 | gcc_assert (!bb_seen_p (bb))((void)(!(!bb_seen_p (bb)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tracer.cc" , 335, __FUNCTION__), 0 : 0)); | |||
336 | ||||
337 | n = find_trace (bb, trace); | |||
338 | ||||
339 | bb = trace[0]; | |||
340 | traced_insns += bb->count.to_frequency (cfun(cfun + 0)) * counts [bb->index]; | |||
341 | if (blocks[bb->index]) | |||
342 | { | |||
343 | heap.delete_node (blocks[bb->index]); | |||
344 | blocks[bb->index] = NULLnullptr; | |||
345 | } | |||
346 | ||||
347 | for (pos = 1; pos < n; pos++) | |||
348 | { | |||
349 | basic_block bb2 = trace[pos]; | |||
350 | ||||
351 | if (blocks[bb2->index]) | |||
352 | { | |||
353 | heap.delete_node (blocks[bb2->index]); | |||
354 | blocks[bb2->index] = NULLnullptr; | |||
355 | } | |||
356 | traced_insns += bb2->count.to_frequency (cfun(cfun + 0)) * counts [bb2->index]; | |||
357 | if (EDGE_COUNT (bb2->preds)vec_safe_length (bb2->preds) > 1 | |||
358 | && can_duplicate_block_p (bb2) | |||
359 | /* We have the tendency to duplicate the loop header | |||
360 | of all do { } while loops. Do not do that - it is | |||
361 | not profitable and it might create a loop with multiple | |||
362 | entries or at least rotate the loop. */ | |||
363 | && bb2->loop_father->header != bb2) | |||
364 | { | |||
365 | nduplicated += counts [bb2->index]; | |||
366 | basic_block copy = transform_duplicate (bb, bb2); | |||
367 | ||||
368 | /* Reconsider the original copy of block we've duplicated. | |||
369 | Removing the most common predecessor may make it to be | |||
370 | head. */ | |||
371 | blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun(cfun + 0)), bb2); | |||
372 | ||||
373 | if (dump_file) | |||
374 | fprintf (dump_file, "Duplicated %i as %i [%i]\n", | |||
375 | bb2->index, copy->index, copy->count.to_frequency (cfun(cfun + 0))); | |||
376 | ||||
377 | bb2 = copy; | |||
378 | changed = true; | |||
379 | } | |||
380 | mark_bb_seen (bb2); | |||
381 | bb = bb2; | |||
382 | /* In case the trace became infrequent, stop duplicating. */ | |||
383 | if (ignore_bb_p (bb)) | |||
384 | break; | |||
385 | } | |||
386 | if (dump_file) | |||
387 | fprintf (dump_file, " covered now %.1f\n\n", | |||
388 | traced_insns * 100.0 / weighted_insns); | |||
389 | } | |||
390 | if (dump_file) | |||
391 | fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, | |||
392 | nduplicated * 100 / ninsns); | |||
| ||||
393 | ||||
394 | free_original_copy_tables (); | |||
395 | sbitmap_free (bb_seen); | |||
396 | sbitmap_free (can_duplicate_bb); | |||
397 | can_duplicate_bb = NULLnullptr; | |||
398 | free (trace); | |||
399 | free (counts); | |||
400 | ||||
401 | return changed; | |||
402 | } | |||
403 | ||||
404 | namespace { | |||
405 | ||||
406 | const pass_data pass_data_tracer = | |||
407 | { | |||
408 | GIMPLE_PASS, /* type */ | |||
409 | "tracer", /* name */ | |||
410 | OPTGROUP_NONE, /* optinfo_flags */ | |||
411 | TV_TRACER, /* tv_id */ | |||
412 | 0, /* properties_required */ | |||
413 | 0, /* properties_provided */ | |||
414 | 0, /* properties_destroyed */ | |||
415 | 0, /* todo_flags_start */ | |||
416 | TODO_update_ssa(1 << 11), /* todo_flags_finish */ | |||
417 | }; | |||
418 | ||||
419 | class pass_tracer : public gimple_opt_pass | |||
420 | { | |||
421 | public: | |||
422 | pass_tracer (gcc::context *ctxt) | |||
423 | : gimple_opt_pass (pass_data_tracer, ctxt) | |||
424 | {} | |||
425 | ||||
426 | /* opt_pass methods: */ | |||
427 | bool gate (function *) final override | |||
428 | { | |||
429 | return (optimizeglobal_options.x_optimize > 0 && flag_tracerglobal_options.x_flag_tracer && flag_reorder_blocksglobal_options.x_flag_reorder_blocks); | |||
430 | } | |||
431 | ||||
432 | unsigned int execute (function *) final override; | |||
433 | ||||
434 | }; // class pass_tracer | |||
435 | ||||
436 | unsigned int | |||
437 | pass_tracer::execute (function *fun) | |||
438 | { | |||
439 | bool changed; | |||
440 | ||||
441 | if (n_basic_blocks_for_fn (fun)((fun)->cfg->x_n_basic_blocks) <= NUM_FIXED_BLOCKS(2) + 1) | |||
| ||||
442 | return 0; | |||
443 | ||||
444 | mark_dfs_back_edges (); | |||
445 | if (dump_file) | |||
446 | brief_dump_cfg (dump_file, dump_flags); | |||
447 | ||||
448 | /* Trace formation is done on the fly inside tail_duplicate */ | |||
449 | changed = tail_duplicate (); | |||
450 | if (changed) | |||
451 | { | |||
452 | free_dominance_info (CDI_DOMINATORS); | |||
453 | /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */ | |||
454 | loops_state_set (LOOPS_NEED_FIXUP); | |||
455 | } | |||
456 | ||||
457 | if (dump_file) | |||
458 | brief_dump_cfg (dump_file, dump_flags); | |||
459 | ||||
460 | return changed ? TODO_cleanup_cfg(1 << 5) : 0; | |||
461 | } | |||
462 | } // anon namespace | |||
463 | ||||
464 | gimple_opt_pass * | |||
465 | make_pass_tracer (gcc::context *ctxt) | |||
466 | { | |||
467 | return new pass_tracer (ctxt); | |||
468 | } |