File: | build/gcc/tree-ssa-loop-ivcanon.cc |
Warning: | line 1434, column 7 Value stored to 'changed' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Induction variable canonicalization and loop peeling. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* This pass detects the loops that iterate a constant number of times, |
21 | adds a canonical induction variable (step -1, tested against 0) |
22 | and replaces the exit test. This enables the less powerful rtl |
23 | level analysis to use this information. |
24 | |
25 | This might spoil the code in some cases (by increasing register pressure). |
26 | Note that in the case the new variable is not needed, ivopts will get rid |
27 | of it, so it might only be a problem when there are no other linear induction |
28 | variables. In that case the created optimization possibilities are likely |
29 | to pay up. |
30 | |
31 | We also perform |
32 | - complete unrolling (or peeling) when the loops is rolling few enough |
33 | times |
34 | - simple peeling (i.e. copying few initial iterations prior the loop) |
35 | when number of iteration estimate is known (typically by the profile |
36 | info). */ |
37 | |
38 | #include "config.h" |
39 | #include "system.h" |
40 | #include "coretypes.h" |
41 | #include "backend.h" |
42 | #include "tree.h" |
43 | #include "gimple.h" |
44 | #include "cfghooks.h" |
45 | #include "tree-pass.h" |
46 | #include "ssa.h" |
47 | #include "cgraph.h" |
48 | #include "gimple-pretty-print.h" |
49 | #include "fold-const.h" |
50 | #include "profile.h" |
51 | #include "gimple-iterator.h" |
52 | #include "gimple-fold.h" |
53 | #include "tree-eh.h" |
54 | #include "tree-cfg.h" |
55 | #include "tree-ssa-loop-manip.h" |
56 | #include "tree-ssa-loop-niter.h" |
57 | #include "tree-ssa-loop.h" |
58 | #include "tree-into-ssa.h" |
59 | #include "cfgloop.h" |
60 | #include "tree-chrec.h" |
61 | #include "tree-scalar-evolution.h" |
62 | #include "tree-inline.h" |
63 | #include "tree-cfgcleanup.h" |
64 | #include "builtins.h" |
65 | #include "tree-ssa-sccvn.h" |
66 | #include "dbgcnt.h" |
67 | |
68 | /* Specifies types of loops that may be unrolled. */ |
69 | |
70 | enum unroll_level |
71 | { |
72 | UL_SINGLE_ITER, /* Only loops that exit immediately in the first |
73 | iteration. */ |
74 | UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase |
75 | of code size. */ |
76 | UL_ALL /* All suitable loops. */ |
77 | }; |
78 | |
79 | /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT |
80 | is the exit edge whose condition is replaced. The ssa versions of the new |
81 | IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER |
82 | if they are not NULL. */ |
83 | |
84 | void |
85 | create_canonical_iv (class loop *loop, edge exit, tree niter, |
86 | tree *var_before = NULLnullptr, tree *var_after = NULLnullptr) |
87 | { |
88 | edge in; |
89 | tree type, var; |
90 | gcond *cond; |
91 | gimple_stmt_iterator incr_at; |
92 | enum tree_code cmp; |
93 | |
94 | if (dump_file && (dump_flags & TDF_DETAILS)) |
95 | { |
96 | fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num); |
97 | print_generic_expr (dump_file, niter, TDF_SLIM); |
98 | fprintf (dump_file, " iterations.\n"); |
99 | } |
100 | |
101 | cond = as_a <gcond *> (last_stmt (exit->src)); |
102 | in = EDGE_SUCC (exit->src, 0)(*(exit->src)->succs)[(0)]; |
103 | if (in == exit) |
104 | in = EDGE_SUCC (exit->src, 1)(*(exit->src)->succs)[(1)]; |
105 | |
106 | /* Note that we do not need to worry about overflows, since |
107 | type of niter is always unsigned and all comparisons are |
108 | just for equality/nonequality -- i.e. everything works |
109 | with a modulo arithmetics. */ |
110 | |
111 | type = TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 111, __FUNCTION__))->typed.type); |
112 | niter = fold_build2 (PLUS_EXPR, type,fold_build2_loc (((location_t) 0), PLUS_EXPR, type, niter, build_int_cst (type, 1) ) |
113 | niter,fold_build2_loc (((location_t) 0), PLUS_EXPR, type, niter, build_int_cst (type, 1) ) |
114 | build_int_cst (type, 1))fold_build2_loc (((location_t) 0), PLUS_EXPR, type, niter, build_int_cst (type, 1) ); |
115 | incr_at = gsi_last_bb (in->src); |
116 | create_iv (niter, |
117 | build_int_cst (type, -1), |
118 | NULL_TREE(tree) nullptr, loop, |
119 | &incr_at, false, var_before, &var); |
120 | if (var_after) |
121 | *var_after = var; |
122 | |
123 | cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; |
124 | gimple_cond_set_code (cond, cmp); |
125 | gimple_cond_set_lhs (cond, var); |
126 | gimple_cond_set_rhs (cond, build_int_cst (type, 0)); |
127 | update_stmt (cond); |
128 | } |
129 | |
130 | /* Describe size of loop as detected by tree_estimate_loop_size. */ |
131 | struct loop_size |
132 | { |
133 | /* Number of instructions in the loop. */ |
134 | int overall; |
135 | |
136 | /* Number of instructions that will be likely optimized out in |
137 | peeled iterations of loop (i.e. computation based on induction |
138 | variable where induction variable starts at known constant.) */ |
139 | int eliminated_by_peeling; |
140 | |
141 | /* Same statistics for last iteration of loop: it is smaller because |
142 | instructions after exit are not executed. */ |
143 | int last_iteration; |
144 | int last_iteration_eliminated_by_peeling; |
145 | |
146 | /* If some IV computation will become constant. */ |
147 | bool constant_iv; |
148 | |
149 | /* Number of call stmts that are not a builtin and are pure or const |
150 | present on the hot path. */ |
151 | int num_pure_calls_on_hot_path; |
152 | /* Number of call stmts that are not a builtin and are not pure nor const |
153 | present on the hot path. */ |
154 | int num_non_pure_calls_on_hot_path; |
155 | /* Number of statements other than calls in the loop. */ |
156 | int non_call_stmts_on_hot_path; |
157 | /* Number of branches seen on the hot path. */ |
158 | int num_branches_on_hot_path; |
159 | }; |
160 | |
161 | /* Return true if OP in STMT will be constant after peeling LOOP. */ |
162 | |
163 | static bool |
164 | constant_after_peeling (tree op, gimple *stmt, class loop *loop) |
165 | { |
166 | if (CONSTANT_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_constant)) |
167 | return true; |
168 | |
169 | /* We can still fold accesses to constant arrays when index is known. */ |
170 | if (TREE_CODE (op)((enum tree_code) (op)->base.code) != SSA_NAME) |
171 | { |
172 | tree base = op; |
173 | |
174 | /* First make fast look if we see constant array inside. */ |
175 | while (handled_component_p (base)) |
176 | base = TREE_OPERAND (base, 0)(*((const_cast<tree*> (tree_operand_check ((base), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 176, __FUNCTION__))))); |
177 | if ((DECL_P (base)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (base)->base.code))] == tcc_declaration) |
178 | && ctor_for_folding (base) != error_mark_nodeglobal_trees[TI_ERROR_MARK]) |
179 | || CONSTANT_CLASS_P (base)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (base)->base.code))] == tcc_constant)) |
180 | { |
181 | /* If so, see if we understand all the indices. */ |
182 | base = op; |
183 | while (handled_component_p (base)) |
184 | { |
185 | if (TREE_CODE (base)((enum tree_code) (base)->base.code) == ARRAY_REF |
186 | && !constant_after_peeling (TREE_OPERAND (base, 1)(*((const_cast<tree*> (tree_operand_check ((base), (1), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 186, __FUNCTION__))))), stmt, loop)) |
187 | return false; |
188 | base = TREE_OPERAND (base, 0)(*((const_cast<tree*> (tree_operand_check ((base), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 188, __FUNCTION__))))); |
189 | } |
190 | return true; |
191 | } |
192 | return false; |
193 | } |
194 | |
195 | /* Induction variables are constants when defined in loop. */ |
196 | if (loop_containing_stmt (stmt) != loop) |
197 | return false; |
198 | tree ev = analyze_scalar_evolution (loop, op); |
199 | if (chrec_contains_undetermined (ev) |
200 | || chrec_contains_symbols (ev)) |
201 | return false; |
202 | return true; |
203 | } |
204 | |
205 | /* Computes an estimated number of insns in LOOP. |
206 | EXIT (if non-NULL) is an exite edge that will be eliminated in all but last |
207 | iteration of the loop. |
208 | EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration |
209 | of loop. |
210 | Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. |
211 | Stop estimating after UPPER_BOUND is met. Return true in this case. */ |
212 | |
213 | static bool |
214 | tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel, |
215 | struct loop_size *size, int upper_bound) |
216 | { |
217 | basic_block *body = get_loop_body (loop); |
218 | gimple_stmt_iterator gsi; |
219 | unsigned int i; |
220 | bool after_exit; |
221 | auto_vec<basic_block> path = get_loop_hot_path (loop); |
222 | |
223 | size->overall = 0; |
224 | size->eliminated_by_peeling = 0; |
225 | size->last_iteration = 0; |
226 | size->last_iteration_eliminated_by_peeling = 0; |
227 | size->num_pure_calls_on_hot_path = 0; |
228 | size->num_non_pure_calls_on_hot_path = 0; |
229 | size->non_call_stmts_on_hot_path = 0; |
230 | size->num_branches_on_hot_path = 0; |
231 | size->constant_iv = 0; |
232 | |
233 | if (dump_file && (dump_flags & TDF_DETAILS)) |
234 | fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); |
235 | for (i = 0; i < loop->num_nodes; i++) |
236 | { |
237 | if (edge_to_cancel && body[i] != edge_to_cancel->src |
238 | && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src)) |
239 | after_exit = true; |
240 | else |
241 | after_exit = false; |
242 | if (dump_file && (dump_flags & TDF_DETAILS)) |
243 | fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, |
244 | after_exit); |
245 | |
246 | for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) |
247 | { |
248 | gimple *stmt = gsi_stmt (gsi); |
249 | int num = estimate_num_insns (stmt, &eni_size_weights); |
250 | bool likely_eliminated = false; |
251 | bool likely_eliminated_last = false; |
252 | bool likely_eliminated_peeled = false; |
253 | |
254 | if (dump_file && (dump_flags & TDF_DETAILS)) |
255 | { |
256 | fprintf (dump_file, " size: %3i ", num); |
257 | print_gimple_stmt (dump_file, gsi_stmt (gsi), 0); |
258 | } |
259 | |
260 | /* Look for reasons why we might optimize this stmt away. */ |
261 | |
262 | if (!gimple_has_side_effects (stmt)) |
263 | { |
264 | /* Exit conditional. */ |
265 | if (exit && body[i] == exit->src |
266 | && stmt == last_stmt (exit->src)) |
267 | { |
268 | if (dump_file && (dump_flags & TDF_DETAILS)) |
269 | fprintf (dump_file, " Exit condition will be eliminated " |
270 | "in peeled copies.\n"); |
271 | likely_eliminated_peeled = true; |
272 | } |
273 | if (edge_to_cancel && body[i] == edge_to_cancel->src |
274 | && stmt == last_stmt (edge_to_cancel->src)) |
275 | { |
276 | if (dump_file && (dump_flags & TDF_DETAILS)) |
277 | fprintf (dump_file, " Exit condition will be eliminated " |
278 | "in last copy.\n"); |
279 | likely_eliminated_last = true; |
280 | } |
281 | /* Sets of IV variables */ |
282 | if (gimple_code (stmt) == GIMPLE_ASSIGN |
283 | && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop)) |
284 | { |
285 | if (dump_file && (dump_flags & TDF_DETAILS)) |
286 | fprintf (dump_file, " Induction variable computation will" |
287 | " be folded away.\n"); |
288 | likely_eliminated = true; |
289 | } |
290 | /* Assignments of IV variables. */ |
291 | else if (gimple_code (stmt) == GIMPLE_ASSIGN |
292 | && TREE_CODE (gimple_assign_lhs (stmt))((enum tree_code) (gimple_assign_lhs (stmt))->base.code) == SSA_NAME |
293 | && constant_after_peeling (gimple_assign_rhs1 (stmt), |
294 | stmt, loop) |
295 | && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS |
296 | || constant_after_peeling (gimple_assign_rhs2 (stmt), |
297 | stmt, loop)) |
298 | && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS) |
299 | { |
300 | size->constant_iv = true; |
301 | if (dump_file && (dump_flags & TDF_DETAILS)) |
302 | fprintf (dump_file, |
303 | " Constant expression will be folded away.\n"); |
304 | likely_eliminated = true; |
305 | } |
306 | /* Conditionals. */ |
307 | else if ((gimple_code (stmt) == GIMPLE_COND |
308 | && constant_after_peeling (gimple_cond_lhs (stmt), stmt, |
309 | loop) |
310 | && constant_after_peeling (gimple_cond_rhs (stmt), stmt, |
311 | loop) |
312 | /* We don't simplify all constant compares so make sure |
313 | they are not both constant already. See PR70288. */ |
314 | && (! is_gimple_min_invariant (gimple_cond_lhs (stmt)) |
315 | || ! is_gimple_min_invariant |
316 | (gimple_cond_rhs (stmt)))) |
317 | || (gimple_code (stmt) == GIMPLE_SWITCH |
318 | && constant_after_peeling (gimple_switch_index ( |
319 | as_a <gswitch *> |
320 | (stmt)), |
321 | stmt, loop) |
322 | && ! is_gimple_min_invariant |
323 | (gimple_switch_index |
324 | (as_a <gswitch *> (stmt))))) |
325 | { |
326 | if (dump_file && (dump_flags & TDF_DETAILS)) |
327 | fprintf (dump_file, " Constant conditional.\n"); |
328 | likely_eliminated = true; |
329 | } |
330 | } |
331 | |
332 | size->overall += num; |
333 | if (likely_eliminated || likely_eliminated_peeled) |
334 | size->eliminated_by_peeling += num; |
335 | if (!after_exit) |
336 | { |
337 | size->last_iteration += num; |
338 | if (likely_eliminated || likely_eliminated_last) |
339 | size->last_iteration_eliminated_by_peeling += num; |
340 | } |
341 | if ((size->overall * 3 / 2 - size->eliminated_by_peeling |
342 | - size->last_iteration_eliminated_by_peeling) > upper_bound) |
343 | { |
344 | free (body); |
345 | return true; |
346 | } |
347 | } |
348 | } |
349 | while (path.length ()) |
350 | { |
351 | basic_block bb = path.pop (); |
352 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
353 | { |
354 | gimple *stmt = gsi_stmt (gsi); |
355 | if (gimple_code (stmt) == GIMPLE_CALL |
356 | && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))) |
357 | { |
358 | int flags = gimple_call_flags (stmt); |
359 | if (flags & (ECF_PURE(1 << 1) | ECF_CONST(1 << 0))) |
360 | size->num_pure_calls_on_hot_path++; |
361 | else |
362 | size->num_non_pure_calls_on_hot_path++; |
363 | size->num_branches_on_hot_path ++; |
364 | } |
365 | /* Count inexpensive calls as non-calls, because they will likely |
366 | expand inline. */ |
367 | else if (gimple_code (stmt) != GIMPLE_DEBUG) |
368 | size->non_call_stmts_on_hot_path++; |
369 | if (((gimple_code (stmt) == GIMPLE_COND |
370 | && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) |
371 | || !constant_after_peeling (gimple_cond_rhs (stmt), stmt, |
372 | loop))) |
373 | || (gimple_code (stmt) == GIMPLE_SWITCH |
374 | && !constant_after_peeling (gimple_switch_index ( |
375 | as_a <gswitch *> (stmt)), |
376 | stmt, loop))) |
377 | && (!exit || bb != exit->src)) |
378 | size->num_branches_on_hot_path++; |
379 | } |
380 | } |
381 | |
382 | if (dump_file && (dump_flags & TDF_DETAILS)) |
383 | fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, |
384 | size->eliminated_by_peeling, size->last_iteration, |
385 | size->last_iteration_eliminated_by_peeling); |
386 | |
387 | free (body); |
388 | return false; |
389 | } |
390 | |
391 | /* Estimate number of insns of completely unrolled loop. |
392 | It is (NUNROLL + 1) * size of loop body with taking into account |
393 | the fact that in last copy everything after exit conditional |
394 | is dead and that some instructions will be eliminated after |
395 | peeling. |
396 | |
397 | Loop body is likely going to simplify further, this is difficult |
398 | to guess, we just decrease the result by 1/3. */ |
399 | |
400 | static unsigned HOST_WIDE_INTlong |
401 | estimated_unrolled_size (struct loop_size *size, |
402 | unsigned HOST_WIDE_INTlong nunroll) |
403 | { |
404 | HOST_WIDE_INTlong unr_insns = ((nunroll) |
405 | * (HOST_WIDE_INTlong) (size->overall |
406 | - size->eliminated_by_peeling)); |
407 | if (!nunroll) |
408 | unr_insns = 0; |
409 | unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling; |
410 | |
411 | unr_insns = unr_insns * 2 / 3; |
412 | if (unr_insns <= 0) |
413 | unr_insns = 1; |
414 | |
415 | return unr_insns; |
416 | } |
417 | |
418 | /* Loop LOOP is known to not loop. See if there is an edge in the loop |
419 | body that can be remove to make the loop to always exit and at |
420 | the same time it does not make any code potentially executed |
421 | during the last iteration dead. |
422 | |
423 | After complete unrolling we still may get rid of the conditional |
424 | on the exit in the last copy even if we have no idea what it does. |
425 | This is quite common case for loops of form |
426 | |
427 | int a[5]; |
428 | for (i=0;i<b;i++) |
429 | a[i]=0; |
430 | |
431 | Here we prove the loop to iterate 5 times but we do not know |
432 | it from induction variable. |
433 | |
434 | For now we handle only simple case where there is exit condition |
435 | just before the latch block and the latch block contains no statements |
436 | with side effect that may otherwise terminate the execution of loop |
437 | (such as by EH or by terminating the program or longjmp). |
438 | |
439 | In the general case we may want to cancel the paths leading to statements |
440 | loop-niter identified as having undefined effect in the last iteration. |
441 | The other cases are hopefully rare and will be cleaned up later. */ |
442 | |
443 | static edge |
444 | loop_edge_to_cancel (class loop *loop) |
445 | { |
446 | unsigned i; |
447 | edge edge_to_cancel; |
448 | gimple_stmt_iterator gsi; |
449 | |
450 | /* We want only one predecestor of the loop. */ |
451 | if (EDGE_COUNT (loop->latch->preds)vec_safe_length (loop->latch->preds) > 1) |
452 | return NULLnullptr; |
453 | |
454 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
455 | |
456 | FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)for (i = 0; (exits).iterate ((i), &(edge_to_cancel)); ++( i)) |
457 | { |
458 | /* Find the other edge than the loop exit |
459 | leaving the conditoinal. */ |
460 | if (EDGE_COUNT (edge_to_cancel->src->succs)vec_safe_length (edge_to_cancel->src->succs) != 2) |
461 | continue; |
462 | if (EDGE_SUCC (edge_to_cancel->src, 0)(*(edge_to_cancel->src)->succs)[(0)] == edge_to_cancel) |
463 | edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1)(*(edge_to_cancel->src)->succs)[(1)]; |
464 | else |
465 | edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0)(*(edge_to_cancel->src)->succs)[(0)]; |
466 | |
467 | /* We only can handle conditionals. */ |
468 | if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
469 | continue; |
470 | |
471 | /* We should never have conditionals in the loop latch. */ |
472 | gcc_assert (edge_to_cancel->dest != loop->header)((void)(!(edge_to_cancel->dest != loop->header) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 472, __FUNCTION__), 0 : 0)); |
473 | |
474 | /* Check that it leads to loop latch. */ |
475 | if (edge_to_cancel->dest != loop->latch) |
476 | continue; |
477 | |
478 | /* Verify that the code in loop latch does nothing that may end program |
479 | execution without really reaching the exit. This may include |
480 | non-pure/const function calls, EH statements, volatile ASMs etc. */ |
481 | for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi)) |
482 | if (gimple_has_side_effects (gsi_stmt (gsi))) |
483 | return NULLnullptr; |
484 | return edge_to_cancel; |
485 | } |
486 | return NULLnullptr; |
487 | } |
488 | |
489 | /* Remove all tests for exits that are known to be taken after LOOP was |
490 | peeled NPEELED times. Put gcc_unreachable before every statement |
491 | known to not be executed. */ |
492 | |
493 | static bool |
494 | remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled) |
495 | { |
496 | class nb_iter_bound *elt; |
497 | bool changed = false; |
498 | |
499 | for (elt = loop->bounds; elt; elt = elt->next) |
500 | { |
501 | /* If statement is known to be undefined after peeling, turn it |
502 | into unreachable (or trap when debugging experience is supposed |
503 | to be good). */ |
504 | if (!elt->is_exit |
505 | && wi::ltu_p (elt->bound, npeeled)) |
506 | { |
507 | gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); |
508 | location_t loc = gimple_location (elt->stmt); |
509 | gcall *stmt = gimple_build_builtin_unreachable (loc); |
510 | gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); |
511 | split_block (gimple_bb (stmt), stmt); |
512 | changed = true; |
513 | if (dump_file && (dump_flags & TDF_DETAILS)) |
514 | { |
515 | fprintf (dump_file, "Forced statement unreachable: "); |
516 | print_gimple_stmt (dump_file, elt->stmt, 0); |
517 | } |
518 | } |
519 | /* If we know the exit will be taken after peeling, update. */ |
520 | else if (elt->is_exit |
521 | && wi::leu_p (elt->bound, npeeled)) |
522 | { |
523 | basic_block bb = gimple_bb (elt->stmt); |
524 | edge exit_edge = EDGE_SUCC (bb, 0)(*(bb)->succs)[(0)]; |
525 | |
526 | if (dump_file && (dump_flags & TDF_DETAILS)) |
527 | { |
528 | fprintf (dump_file, "Forced exit to be taken: "); |
529 | print_gimple_stmt (dump_file, elt->stmt, 0); |
530 | } |
531 | if (!loop_exit_edge_p (loop, exit_edge)) |
532 | exit_edge = EDGE_SUCC (bb, 1)(*(bb)->succs)[(1)]; |
533 | exit_edge->probability = profile_probability::always (); |
534 | gcc_checking_assert (loop_exit_edge_p (loop, exit_edge))((void)(!(loop_exit_edge_p (loop, exit_edge)) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 534, __FUNCTION__), 0 : 0)); |
535 | gcond *cond_stmt = as_a <gcond *> (elt->stmt); |
536 | if (exit_edge->flags & EDGE_TRUE_VALUE) |
537 | gimple_cond_make_true (cond_stmt); |
538 | else |
539 | gimple_cond_make_false (cond_stmt); |
540 | update_stmt (cond_stmt); |
541 | changed = true; |
542 | } |
543 | } |
544 | return changed; |
545 | } |
546 | |
547 | /* Remove all exits that are known to be never taken because of the loop bound |
548 | discovered. */ |
549 | |
550 | static bool |
551 | remove_redundant_iv_tests (class loop *loop) |
552 | { |
553 | class nb_iter_bound *elt; |
554 | bool changed = false; |
555 | |
556 | if (!loop->any_upper_bound) |
557 | return false; |
558 | for (elt = loop->bounds; elt; elt = elt->next) |
559 | { |
560 | /* Exit is pointless if it won't be taken before loop reaches |
561 | upper bound. */ |
562 | if (elt->is_exit && loop->any_upper_bound |
563 | && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound)) |
564 | { |
565 | basic_block bb = gimple_bb (elt->stmt); |
566 | edge exit_edge = EDGE_SUCC (bb, 0)(*(bb)->succs)[(0)]; |
567 | class tree_niter_desc niter; |
568 | |
569 | if (!loop_exit_edge_p (loop, exit_edge)) |
570 | exit_edge = EDGE_SUCC (bb, 1)(*(bb)->succs)[(1)]; |
571 | |
572 | /* Only when we know the actual number of iterations, not |
573 | just a bound, we can remove the exit. */ |
574 | if (!number_of_iterations_exit (loop, exit_edge, |
575 | &niter, false, false) |
576 | || !integer_onep (niter.assumptions) |
577 | || !integer_zerop (niter.may_be_zero) |
578 | || !niter.niter |
579 | || TREE_CODE (niter.niter)((enum tree_code) (niter.niter)->base.code) != INTEGER_CST |
580 | || !wi::ltu_p (loop->nb_iterations_upper_bound, |
581 | wi::to_widest (niter.niter))) |
582 | continue; |
583 | |
584 | if (dump_file && (dump_flags & TDF_DETAILS)) |
585 | { |
586 | fprintf (dump_file, "Removed pointless exit: "); |
587 | print_gimple_stmt (dump_file, elt->stmt, 0); |
588 | } |
589 | gcond *cond_stmt = as_a <gcond *> (elt->stmt); |
590 | if (exit_edge->flags & EDGE_TRUE_VALUE) |
591 | gimple_cond_make_false (cond_stmt); |
592 | else |
593 | gimple_cond_make_true (cond_stmt); |
594 | update_stmt (cond_stmt); |
595 | changed = true; |
596 | } |
597 | } |
598 | return changed; |
599 | } |
600 | |
601 | /* Stores loops that will be unlooped and edges that will be removed |
602 | after we process whole loop tree. */ |
603 | static vec<loop_p> loops_to_unloop; |
604 | static vec<int> loops_to_unloop_nunroll; |
605 | static vec<edge> edges_to_remove; |
606 | /* Stores loops that has been peeled. */ |
607 | static bitmap peeled_loops; |
608 | |
609 | /* Cancel all fully unrolled loops by putting __builtin_unreachable |
610 | on the latch edge. |
611 | We do it after all unrolling since unlooping moves basic blocks |
612 | across loop boundaries trashing loop closed SSA form as well |
613 | as SCEV info needed to be intact during unrolling. |
614 | |
615 | IRRED_INVALIDATED is used to bookkeep if information about |
616 | irreducible regions may become invalid as a result |
617 | of the transformation. |
618 | LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case |
619 | when we need to go into loop closed SSA form. */ |
620 | |
621 | static void |
622 | unloop_loops (bitmap loop_closed_ssa_invalidated, |
623 | bool *irred_invalidated) |
624 | { |
625 | while (loops_to_unloop.length ()) |
626 | { |
627 | class loop *loop = loops_to_unloop.pop (); |
628 | int n_unroll = loops_to_unloop_nunroll.pop (); |
629 | basic_block latch = loop->latch; |
630 | edge latch_edge = loop_latch_edge (loop); |
631 | int flags = latch_edge->flags; |
632 | location_t locus = latch_edge->goto_locus; |
633 | gcall *stmt; |
634 | gimple_stmt_iterator gsi; |
635 | |
636 | remove_exits_and_undefined_stmts (loop, n_unroll); |
637 | |
638 | /* Unloop destroys the latch edge. */ |
639 | unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); |
640 | |
641 | /* Create new basic block for the latch edge destination and wire |
642 | it in. */ |
643 | stmt = gimple_build_builtin_unreachable (locus); |
644 | latch_edge = make_edge (latch, create_basic_block (NULLnullptr, NULLnullptr, latch), flags); |
645 | latch_edge->probability = profile_probability::never (); |
646 | latch_edge->flags |= flags; |
647 | latch_edge->goto_locus = locus; |
648 | |
649 | add_bb_to_loop (latch_edge->dest, current_loops((cfun + 0)->x_current_loops)->tree_root); |
650 | latch_edge->dest->count = profile_count::zero (); |
651 | set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); |
652 | |
653 | gsi = gsi_start_bb (latch_edge->dest); |
654 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
655 | } |
656 | loops_to_unloop.release (); |
657 | loops_to_unloop_nunroll.release (); |
658 | |
659 | /* Remove edges in peeled copies. Given remove_path removes dominated |
660 | regions we need to cope with removal of already removed paths. */ |
661 | unsigned i; |
662 | edge e; |
663 | auto_vec<int, 20> src_bbs; |
664 | src_bbs.reserve_exact (edges_to_remove.length ()); |
665 | FOR_EACH_VEC_ELT (edges_to_remove, i, e)for (i = 0; (edges_to_remove).iterate ((i), &(e)); ++(i)) |
666 | src_bbs.quick_push (e->src->index); |
667 | FOR_EACH_VEC_ELT (edges_to_remove, i, e)for (i = 0; (edges_to_remove).iterate ((i), &(e)); ++(i)) |
668 | if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i])((*(((cfun + 0))->cfg->x_basic_block_info))[(src_bbs[i] )])) |
669 | { |
670 | bool ok = remove_path (e, irred_invalidated, |
671 | loop_closed_ssa_invalidated); |
672 | gcc_assert (ok)((void)(!(ok) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 672, __FUNCTION__), 0 : 0)); |
673 | } |
674 | edges_to_remove.release (); |
675 | } |
676 | |
677 | /* Tries to unroll LOOP completely, i.e. NITER times. |
678 | UL determines which loops we are allowed to unroll. |
679 | EXIT is the exit of the loop that should be eliminated. |
680 | MAXITER specfy bound on number of iterations, -1 if it is |
681 | not known or too large for HOST_WIDE_INT. The location |
682 | LOCUS corresponding to the loop is used when emitting |
683 | a summary of the unroll to the dump file. */ |
684 | |
685 | static bool |
686 | try_unroll_loop_completely (class loop *loop, |
687 | edge exit, tree niter, bool may_be_zero, |
688 | enum unroll_level ul, |
689 | HOST_WIDE_INTlong maxiter, |
690 | dump_user_location_t locus, bool allow_peel) |
691 | { |
692 | unsigned HOST_WIDE_INTlong n_unroll = 0; |
693 | bool n_unroll_found = false; |
694 | edge edge_to_cancel = NULLnullptr; |
695 | |
696 | /* See if we proved number of iterations to be low constant. |
697 | |
698 | EXIT is an edge that will be removed in all but last iteration of |
699 | the loop. |
700 | |
701 | EDGE_TO_CACNEL is an edge that will be removed from the last iteration |
702 | of the unrolled sequence and is expected to make the final loop not |
703 | rolling. |
704 | |
705 | If the number of execution of loop is determined by standard induction |
706 | variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving |
707 | from the iv test. */ |
708 | if (tree_fits_uhwi_p (niter)) |
709 | { |
710 | n_unroll = tree_to_uhwi (niter); |
711 | n_unroll_found = true; |
712 | edge_to_cancel = EDGE_SUCC (exit->src, 0)(*(exit->src)->succs)[(0)]; |
713 | if (edge_to_cancel == exit) |
714 | edge_to_cancel = EDGE_SUCC (exit->src, 1)(*(exit->src)->succs)[(1)]; |
715 | } |
716 | /* We do not know the number of iterations and thus we cannot eliminate |
717 | the EXIT edge. */ |
718 | else |
719 | exit = NULLnullptr; |
720 | |
721 | /* See if we can improve our estimate by using recorded loop bounds. */ |
722 | if ((maxiter == 0 || ul != UL_SINGLE_ITER) |
723 | && maxiter >= 0 |
724 | && (!n_unroll_found || (unsigned HOST_WIDE_INTlong)maxiter < n_unroll)) |
725 | { |
726 | n_unroll = maxiter; |
727 | n_unroll_found = true; |
728 | /* Loop terminates before the IV variable test, so we cannot |
729 | remove it in the last iteration. */ |
730 | edge_to_cancel = NULLnullptr; |
731 | /* If we do not allow peeling and we iterate just allow cases |
732 | that do not grow code. */ |
733 | if (!allow_peel && maxiter != 0) |
734 | ul = UL_NO_GROWTH; |
735 | } |
736 | |
737 | if (!n_unroll_found) |
738 | return false; |
739 | |
740 | if (!loop->unroll |
741 | && n_unroll > (unsigned) param_max_completely_peel_timesglobal_options.x_param_max_completely_peel_times) |
742 | { |
743 | if (dump_file && (dump_flags & TDF_DETAILS)) |
744 | fprintf (dump_file, "Not unrolling loop %d " |
745 | "(--param max-completely-peel-times limit reached).\n", |
746 | loop->num); |
747 | return false; |
748 | } |
749 | |
750 | if (!edge_to_cancel) |
751 | edge_to_cancel = loop_edge_to_cancel (loop); |
752 | |
753 | if (n_unroll) |
754 | { |
755 | if (ul == UL_SINGLE_ITER) |
756 | return false; |
757 | |
758 | if (loop->unroll) |
759 | { |
760 | /* If the unrolling factor is too large, bail out. */ |
761 | if (n_unroll > (unsigned)loop->unroll) |
762 | { |
763 | if (dump_file && (dump_flags & TDF_DETAILS)) |
764 | fprintf (dump_file, |
765 | "Not unrolling loop %d: " |
766 | "user didn't want it unrolled completely.\n", |
767 | loop->num); |
768 | return false; |
769 | } |
770 | } |
771 | else |
772 | { |
773 | struct loop_size size; |
774 | /* EXIT can be removed only if we are sure it passes first N_UNROLL |
775 | iterations. */ |
776 | bool remove_exit = (exit && niter |
777 | && TREE_CODE (niter)((enum tree_code) (niter)->base.code) == INTEGER_CST |
778 | && wi::leu_p (n_unroll, wi::to_widest (niter))); |
779 | bool large |
780 | = tree_estimate_loop_size |
781 | (loop, remove_exit ? exit : NULLnullptr, edge_to_cancel, &size, |
782 | param_max_completely_peeled_insnsglobal_options.x_param_max_completely_peeled_insns); |
783 | if (large) |
784 | { |
785 | if (dump_file && (dump_flags & TDF_DETAILS)) |
786 | fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", |
787 | loop->num); |
788 | return false; |
789 | } |
790 | |
791 | unsigned HOST_WIDE_INTlong ninsns = size.overall; |
792 | unsigned HOST_WIDE_INTlong unr_insns |
793 | = estimated_unrolled_size (&size, n_unroll); |
794 | if (dump_file && (dump_flags & TDF_DETAILS)) |
795 | { |
796 | fprintf (dump_file, " Loop size: %d\n", (int) ninsns); |
797 | fprintf (dump_file, " Estimated size after unrolling: %d\n", |
798 | (int) unr_insns); |
799 | } |
800 | |
801 | /* If the code is going to shrink, we don't need to be extra |
802 | cautious on guessing if the unrolling is going to be |
803 | profitable. */ |
804 | if (unr_insns |
805 | /* If there is IV variable that will become constant, we |
806 | save one instruction in the loop prologue we do not |
807 | account otherwise. */ |
808 | <= ninsns + (size.constant_iv != false)) |
809 | ; |
810 | /* We unroll only inner loops, because we do not consider it |
811 | profitable otheriwse. We still can cancel loopback edge |
812 | of not rolling loop; this is always a good idea. */ |
813 | else if (ul == UL_NO_GROWTH) |
814 | { |
815 | if (dump_file && (dump_flags & TDF_DETAILS)) |
816 | fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", |
817 | loop->num); |
818 | return false; |
819 | } |
820 | /* Outer loops tend to be less interesting candidates for |
821 | complete unrolling unless we can do a lot of propagation |
822 | into the inner loop body. For now we disable outer loop |
823 | unrolling when the code would grow. */ |
824 | else if (loop->inner) |
825 | { |
826 | if (dump_file && (dump_flags & TDF_DETAILS)) |
827 | fprintf (dump_file, "Not unrolling loop %d: " |
828 | "it is not innermost and code would grow.\n", |
829 | loop->num); |
830 | return false; |
831 | } |
832 | /* If there is call on a hot path through the loop, then |
833 | there is most probably not much to optimize. */ |
834 | else if (size.num_non_pure_calls_on_hot_path) |
835 | { |
836 | if (dump_file && (dump_flags & TDF_DETAILS)) |
837 | fprintf (dump_file, "Not unrolling loop %d: " |
838 | "contains call and code would grow.\n", |
839 | loop->num); |
840 | return false; |
841 | } |
842 | /* If there is pure/const call in the function, then we can |
843 | still optimize the unrolled loop body if it contains some |
844 | other interesting code than the calls and code storing or |
845 | cumulating the return value. */ |
846 | else if (size.num_pure_calls_on_hot_path |
847 | /* One IV increment, one test, one ivtmp store and |
848 | one useful stmt. That is about minimal loop |
849 | doing pure call. */ |
850 | && (size.non_call_stmts_on_hot_path |
851 | <= 3 + size.num_pure_calls_on_hot_path)) |
852 | { |
853 | if (dump_file && (dump_flags & TDF_DETAILS)) |
854 | fprintf (dump_file, "Not unrolling loop %d: " |
855 | "contains just pure calls and code would grow.\n", |
856 | loop->num); |
857 | return false; |
858 | } |
859 | /* Complete unrolling is major win when control flow is |
860 | removed and one big basic block is created. If the loop |
861 | contains control flow the optimization may still be a win |
862 | because of eliminating the loop overhead but it also may |
863 | blow the branch predictor tables. Limit number of |
864 | branches on the hot path through the peeled sequence. */ |
865 | else if (size.num_branches_on_hot_path * (int)n_unroll |
866 | > param_max_peel_branchesglobal_options.x_param_max_peel_branches) |
867 | { |
868 | if (dump_file && (dump_flags & TDF_DETAILS)) |
869 | fprintf (dump_file, "Not unrolling loop %d: " |
870 | "number of branches on hot path in the unrolled " |
871 | "sequence reaches --param max-peel-branches limit.\n", |
872 | loop->num); |
873 | return false; |
874 | } |
875 | else if (unr_insns |
876 | > (unsigned) param_max_completely_peeled_insnsglobal_options.x_param_max_completely_peeled_insns) |
877 | { |
878 | if (dump_file && (dump_flags & TDF_DETAILS)) |
879 | fprintf (dump_file, "Not unrolling loop %d: " |
880 | "number of insns in the unrolled sequence reaches " |
881 | "--param max-completely-peeled-insns limit.\n", |
882 | loop->num); |
883 | return false; |
884 | } |
885 | } |
886 | |
887 | if (!dbg_cnt (gimple_unroll)) |
888 | return false; |
889 | |
890 | initialize_original_copy_tables (); |
891 | auto_sbitmap wont_exit (n_unroll + 1); |
892 | if (exit && niter |
893 | && TREE_CODE (niter)((enum tree_code) (niter)->base.code) == INTEGER_CST |
894 | && wi::leu_p (n_unroll, wi::to_widest (niter))) |
895 | { |
896 | bitmap_ones (wont_exit); |
897 | if (wi::eq_p (wi::to_widest (niter), n_unroll) |
898 | || edge_to_cancel) |
899 | bitmap_clear_bit (wont_exit, 0); |
900 | } |
901 | else |
902 | { |
903 | exit = NULLnullptr; |
904 | bitmap_clear (wont_exit); |
905 | } |
906 | if (may_be_zero) |
907 | bitmap_clear_bit (wont_exit, 1); |
908 | |
909 | if (!gimple_duplicate_loop_body_to_header_edge ( |
910 | loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit, |
911 | &edges_to_remove, |
912 | DLTHE_FLAG_UPDATE_FREQ1 | DLTHE_FLAG_COMPLETTE_PEEL4)) |
913 | { |
914 | free_original_copy_tables (); |
915 | if (dump_file && (dump_flags & TDF_DETAILS)) |
916 | fprintf (dump_file, "Failed to duplicate the loop\n"); |
917 | return false; |
918 | } |
919 | |
920 | free_original_copy_tables (); |
921 | } |
922 | |
923 | /* Remove the conditional from the last copy of the loop. */ |
924 | if (edge_to_cancel) |
925 | { |
926 | gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src)); |
927 | force_edge_cold (edge_to_cancel, true); |
928 | if (edge_to_cancel->flags & EDGE_TRUE_VALUE) |
929 | gimple_cond_make_false (cond); |
930 | else |
931 | gimple_cond_make_true (cond); |
932 | update_stmt (cond); |
933 | /* Do not remove the path, as doing so may remove outer loop and |
934 | confuse bookkeeping code in tree_unroll_loops_completely. */ |
935 | } |
936 | |
937 | /* Store the loop for later unlooping and exit removal. */ |
938 | loops_to_unloop.safe_push (loop); |
939 | loops_to_unloop_nunroll.safe_push (n_unroll); |
940 | |
941 | if (dump_enabled_p ()) |
942 | { |
943 | if (!n_unroll) |
944 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, |
945 | "loop turned into non-loop; it never loops\n"); |
946 | else |
947 | { |
948 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, |
949 | "loop with %d iterations completely unrolled", |
950 | (int) n_unroll); |
951 | if (loop->header->count.initialized_p ()) |
952 | dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, |
953 | " (header execution count %d)", |
954 | (int)loop->header->count.to_gcov_type ()); |
955 | dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); |
956 | } |
957 | } |
958 | |
959 | if (dump_file && (dump_flags & TDF_DETAILS)) |
960 | { |
961 | if (exit) |
962 | fprintf (dump_file, "Exit condition of peeled iterations was " |
963 | "eliminated.\n"); |
964 | if (edge_to_cancel) |
965 | fprintf (dump_file, "Last iteration exit edge was proved true.\n"); |
966 | else |
967 | fprintf (dump_file, "Latch of last iteration was marked by " |
968 | "__builtin_unreachable ().\n"); |
969 | } |
970 | |
971 | return true; |
972 | } |
973 | |
974 | /* Return number of instructions after peeling. */ |
975 | static unsigned HOST_WIDE_INTlong |
976 | estimated_peeled_sequence_size (struct loop_size *size, |
977 | unsigned HOST_WIDE_INTlong npeel) |
978 | { |
979 | return MAX (npeel * (HOST_WIDE_INT) (size->overall((npeel * (long) (size->overall - size->eliminated_by_peeling )) > (1) ? (npeel * (long) (size->overall - size->eliminated_by_peeling )) : (1)) |
980 | - size->eliminated_by_peeling), 1)((npeel * (long) (size->overall - size->eliminated_by_peeling )) > (1) ? (npeel * (long) (size->overall - size->eliminated_by_peeling )) : (1)); |
981 | } |
982 | |
983 | /* If the loop is expected to iterate N times and is |
984 | small enough, duplicate the loop body N+1 times before |
985 | the loop itself. This way the hot path will never |
986 | enter the loop. |
987 | Parameters are the same as for try_unroll_loops_completely */ |
988 | |
989 | static bool |
990 | try_peel_loop (class loop *loop, |
991 | edge exit, tree niter, bool may_be_zero, |
992 | HOST_WIDE_INTlong maxiter) |
993 | { |
994 | HOST_WIDE_INTlong npeel; |
995 | struct loop_size size; |
996 | int peeled_size; |
997 | |
998 | if (!flag_peel_loopsglobal_options.x_flag_peel_loops |
999 | || param_max_peel_timesglobal_options.x_param_max_peel_times <= 0 |
1000 | || !peeled_loops) |
1001 | return false; |
1002 | |
1003 | if (bitmap_bit_p (peeled_loops, loop->num)) |
1004 | { |
1005 | if (dump_file) |
1006 | fprintf (dump_file, "Not peeling: loop is already peeled\n"); |
1007 | return false; |
1008 | } |
1009 | |
1010 | /* We don't peel loops that will be unrolled as this can duplicate a |
1011 | loop more times than the user requested. */ |
1012 | if (loop->unroll) |
1013 | { |
1014 | if (dump_file) |
1015 | fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); |
1016 | return false; |
1017 | } |
1018 | |
1019 | /* Peel only innermost loops. |
1020 | While the code is perfectly capable of peeling non-innermost loops, |
1021 | the heuristics would probably need some improvements. */ |
1022 | if (loop->inner) |
1023 | { |
1024 | if (dump_file) |
1025 | fprintf (dump_file, "Not peeling: outer loop\n"); |
1026 | return false; |
1027 | } |
1028 | |
1029 | if (!optimize_loop_for_speed_p (loop)) |
1030 | { |
1031 | if (dump_file) |
1032 | fprintf (dump_file, "Not peeling: cold loop\n"); |
1033 | return false; |
1034 | } |
1035 | |
1036 | /* Check if there is an estimate on the number of iterations. */ |
1037 | npeel = estimated_loop_iterations_int (loop); |
1038 | if (npeel < 0) |
1039 | npeel = likely_max_loop_iterations_int (loop); |
1040 | if (npeel < 0) |
1041 | { |
1042 | if (dump_file) |
1043 | fprintf (dump_file, "Not peeling: number of iterations is not " |
1044 | "estimated\n"); |
1045 | return false; |
1046 | } |
1047 | if (maxiter >= 0 && maxiter <= npeel) |
1048 | { |
1049 | if (dump_file) |
1050 | fprintf (dump_file, "Not peeling: upper bound is known so can " |
1051 | "unroll completely\n"); |
1052 | return false; |
1053 | } |
1054 | |
1055 | /* We want to peel estimated number of iterations + 1 (so we never |
1056 | enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES |
1057 | and be sure to avoid overflows. */ |
1058 | if (npeel > param_max_peel_timesglobal_options.x_param_max_peel_times - 1) |
1059 | { |
1060 | if (dump_file) |
1061 | fprintf (dump_file, "Not peeling: rolls too much " |
1062 | "(%i + 1 > --param max-peel-times)\n", (int) npeel); |
1063 | return false; |
1064 | } |
1065 | npeel++; |
1066 | |
1067 | /* Check peeled loops size. */ |
1068 | tree_estimate_loop_size (loop, exit, NULLnullptr, &size, |
1069 | param_max_peeled_insnsglobal_options.x_param_max_peeled_insns); |
1070 | if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel)) |
1071 | > param_max_peeled_insnsglobal_options.x_param_max_peeled_insns) |
1072 | { |
1073 | if (dump_file) |
1074 | fprintf (dump_file, "Not peeling: peeled sequence size is too large " |
1075 | "(%i insns > --param max-peel-insns)", peeled_size); |
1076 | return false; |
1077 | } |
1078 | |
1079 | if (!dbg_cnt (gimple_unroll)) |
1080 | return false; |
1081 | |
1082 | /* Duplicate possibly eliminating the exits. */ |
1083 | initialize_original_copy_tables (); |
1084 | auto_sbitmap wont_exit (npeel + 1); |
1085 | if (exit && niter |
1086 | && TREE_CODE (niter)((enum tree_code) (niter)->base.code) == INTEGER_CST |
1087 | && wi::leu_p (npeel, wi::to_widest (niter))) |
1088 | { |
1089 | bitmap_ones (wont_exit); |
1090 | bitmap_clear_bit (wont_exit, 0); |
1091 | } |
1092 | else |
1093 | { |
1094 | exit = NULLnullptr; |
1095 | bitmap_clear (wont_exit); |
1096 | } |
1097 | if (may_be_zero) |
1098 | bitmap_clear_bit (wont_exit, 1); |
1099 | if (!gimple_duplicate_loop_body_to_header_edge ( |
1100 | loop, loop_preheader_edge (loop), npeel, wont_exit, exit, |
1101 | &edges_to_remove, DLTHE_FLAG_UPDATE_FREQ1)) |
1102 | { |
1103 | free_original_copy_tables (); |
1104 | return false; |
1105 | } |
1106 | free_original_copy_tables (); |
1107 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1108 | { |
1109 | fprintf (dump_file, "Peeled loop %d, %i times.\n", |
1110 | loop->num, (int) npeel); |
1111 | } |
1112 | if (loop->any_estimate) |
1113 | { |
1114 | if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) |
1115 | loop->nb_iterations_estimate -= npeel; |
1116 | else |
1117 | loop->nb_iterations_estimate = 0; |
1118 | } |
1119 | if (loop->any_upper_bound) |
1120 | { |
1121 | if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound)) |
1122 | loop->nb_iterations_upper_bound -= npeel; |
1123 | else |
1124 | loop->nb_iterations_upper_bound = 0; |
1125 | } |
1126 | if (loop->any_likely_upper_bound) |
1127 | { |
1128 | if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound)) |
1129 | loop->nb_iterations_likely_upper_bound -= npeel; |
1130 | else |
1131 | { |
1132 | loop->any_estimate = true; |
1133 | loop->nb_iterations_estimate = 0; |
1134 | loop->nb_iterations_likely_upper_bound = 0; |
1135 | } |
1136 | } |
1137 | profile_count entry_count = profile_count::zero (); |
1138 | |
1139 | edge e; |
1140 | edge_iterator ei; |
1141 | FOR_EACH_EDGE (e, ei, loop->header->preds)for ((ei) = ei_start_1 (&((loop->header->preds))); ei_cond ((ei), &(e)); ei_next (&(ei))) |
1142 | if (e->src != loop->latch) |
1143 | { |
1144 | if (e->src->count.initialized_p ()) |
1145 | entry_count += e->src->count; |
1146 | gcc_assert (!flow_bb_inside_loop_p (loop, e->src))((void)(!(!flow_bb_inside_loop_p (loop, e->src)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1146, __FUNCTION__), 0 : 0)); |
1147 | } |
1148 | profile_probability p; |
1149 | p = entry_count.probability_in (loop->header->count); |
1150 | scale_loop_profile (loop, p, 0); |
1151 | bitmap_set_bit (peeled_loops, loop->num); |
1152 | return true; |
1153 | } |
1154 | /* Adds a canonical induction variable to LOOP if suitable. |
1155 | CREATE_IV is true if we may create a new iv. UL determines |
1156 | which loops we are allowed to completely unroll. If TRY_EVAL is true, we try |
1157 | to determine the number of iterations of a loop by direct evaluation. |
1158 | Returns true if cfg is changed. */ |
1159 | |
1160 | static bool |
1161 | canonicalize_loop_induction_variables (class loop *loop, |
1162 | bool create_iv, enum unroll_level ul, |
1163 | bool try_eval, bool allow_peel) |
1164 | { |
1165 | edge exit = NULLnullptr; |
1166 | tree niter; |
1167 | HOST_WIDE_INTlong maxiter; |
1168 | bool modified = false; |
1169 | dump_user_location_t locus; |
1170 | class tree_niter_desc niter_desc; |
1171 | bool may_be_zero = false; |
1172 | |
1173 | /* For unrolling allow conditional constant or zero iterations, thus |
1174 | perform loop-header copying on-the-fly. */ |
1175 | exit = single_exit (loop); |
1176 | niter = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; |
1177 | if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false)) |
1178 | { |
1179 | niter = niter_desc.niter; |
1180 | may_be_zero |
1181 | = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero); |
1182 | } |
1183 | if (TREE_CODE (niter)((enum tree_code) (niter)->base.code) == INTEGER_CST) |
1184 | locus = last_stmt (exit->src); |
1185 | else |
1186 | { |
1187 | /* For non-constant niter fold may_be_zero into niter again. */ |
1188 | if (may_be_zero) |
1189 | { |
1190 | if (COMPARISON_CLASS_P (niter_desc.may_be_zero)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (niter_desc.may_be_zero)->base.code))] == tcc_comparison )) |
1191 | niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1191, __FUNCTION__))->typed.type), niter_desc.may_be_zero , build_int_cst (((contains_struct_check ((niter), (TS_TYPED) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1193, __FUNCTION__))->typed.type), 0), niter ) |
1192 | niter_desc.may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1191, __FUNCTION__))->typed.type), niter_desc.may_be_zero , build_int_cst (((contains_struct_check ((niter), (TS_TYPED) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1193, __FUNCTION__))->typed.type), 0), niter ) |
1193 | build_int_cst (TREE_TYPE (niter), 0), niter)fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1191, __FUNCTION__))->typed.type), niter_desc.may_be_zero , build_int_cst (((contains_struct_check ((niter), (TS_TYPED) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1193, __FUNCTION__))->typed.type), 0), niter ); |
1194 | else |
1195 | niter = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; |
1196 | may_be_zero = false; |
1197 | } |
1198 | |
1199 | /* If the loop has more than one exit, try checking all of them |
1200 | for # of iterations determinable through scev. */ |
1201 | if (!exit) |
1202 | niter = find_loop_niter (loop, &exit); |
1203 | |
1204 | /* Finally if everything else fails, try brute force evaluation. */ |
1205 | if (try_eval |
1206 | && (chrec_contains_undetermined (niter) |
1207 | || TREE_CODE (niter)((enum tree_code) (niter)->base.code) != INTEGER_CST)) |
1208 | niter = find_loop_niter_by_eval (loop, &exit); |
1209 | |
1210 | if (exit) |
1211 | locus = last_stmt (exit->src); |
1212 | |
1213 | if (TREE_CODE (niter)((enum tree_code) (niter)->base.code) != INTEGER_CST) |
1214 | exit = NULLnullptr; |
1215 | } |
1216 | |
1217 | /* We work exceptionally hard here to estimate the bound |
1218 | by find_loop_niter_by_eval. Be sure to keep it for future. */ |
1219 | if (niter && TREE_CODE (niter)((enum tree_code) (niter)->base.code) == INTEGER_CST) |
1220 | { |
1221 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
1222 | record_niter_bound (loop, wi::to_widest (niter), |
1223 | exit == single_likely_exit (loop, exits), true); |
1224 | } |
1225 | |
1226 | /* Force re-computation of loop bounds so we can remove redundant exits. */ |
1227 | maxiter = max_loop_iterations_int (loop); |
1228 | |
1229 | if (dump_file && (dump_flags & TDF_DETAILS) |
1230 | && TREE_CODE (niter)((enum tree_code) (niter)->base.code) == INTEGER_CST) |
1231 | { |
1232 | fprintf (dump_file, "Loop %d iterates ", loop->num); |
1233 | print_generic_expr (dump_file, niter, TDF_SLIM); |
1234 | fprintf (dump_file, " times.\n"); |
1235 | } |
1236 | if (dump_file && (dump_flags & TDF_DETAILS) |
1237 | && maxiter >= 0) |
1238 | { |
1239 | fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, |
1240 | (int)maxiter); |
1241 | } |
1242 | if (dump_file && (dump_flags & TDF_DETAILS) |
1243 | && likely_max_loop_iterations_int (loop) >= 0) |
1244 | { |
1245 | fprintf (dump_file, "Loop %d likely iterates at most %i times.\n", |
1246 | loop->num, (int)likely_max_loop_iterations_int (loop)); |
1247 | } |
1248 | |
1249 | /* Remove exits that are known to be never taken based on loop bound. |
1250 | Needs to be called after compilation of max_loop_iterations_int that |
1251 | populates the loop bounds. */ |
1252 | modified |= remove_redundant_iv_tests (loop); |
1253 | |
1254 | if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, |
1255 | maxiter, locus, allow_peel)) |
1256 | return true; |
1257 | |
1258 | if (create_iv |
1259 | && niter && !chrec_contains_undetermined (niter) |
1260 | && exit && just_once_each_iteration_p (loop, exit->src)) |
1261 | { |
1262 | tree iv_niter = niter; |
1263 | if (may_be_zero) |
1264 | { |
1265 | if (COMPARISON_CLASS_P (niter_desc.may_be_zero)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (niter_desc.may_be_zero)->base.code))] == tcc_comparison )) |
1266 | iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((iv_niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1266, __FUNCTION__))->typed.type), niter_desc.may_be_zero , build_int_cst (((contains_struct_check ((iv_niter), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1268, __FUNCTION__))->typed.type), 0), iv_niter ) |
1267 | niter_desc.may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((iv_niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1266, __FUNCTION__))->typed.type), niter_desc.may_be_zero , build_int_cst (((contains_struct_check ((iv_niter), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1268, __FUNCTION__))->typed.type), 0), iv_niter ) |
1268 | build_int_cst (TREE_TYPE (iv_niter), 0),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((iv_niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1266, __FUNCTION__))->typed.type), niter_desc.may_be_zero , build_int_cst (((contains_struct_check ((iv_niter), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1268, __FUNCTION__))->typed.type), 0), iv_niter ) |
1269 | iv_niter)fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((iv_niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1266, __FUNCTION__))->typed.type), niter_desc.may_be_zero , build_int_cst (((contains_struct_check ((iv_niter), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1268, __FUNCTION__))->typed.type), 0), iv_niter ); |
1270 | else |
1271 | iv_niter = NULL_TREE(tree) nullptr; |
1272 | } |
1273 | if (iv_niter) |
1274 | create_canonical_iv (loop, exit, iv_niter); |
1275 | } |
1276 | |
1277 | if (ul == UL_ALL) |
1278 | modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter); |
1279 | |
1280 | return modified; |
1281 | } |
1282 | |
1283 | /* The main entry point of the pass. Adds canonical induction variables |
1284 | to the suitable loops. */ |
1285 | |
1286 | unsigned int |
1287 | canonicalize_induction_variables (void) |
1288 | { |
1289 | bool changed = false; |
1290 | bool irred_invalidated = false; |
1291 | bitmap loop_closed_ssa_invalidated = BITMAP_ALLOCbitmap_alloc (NULLnullptr); |
1292 | |
1293 | estimate_numbers_of_iterations (cfun(cfun + 0)); |
1294 | |
1295 | for (auto loop : loops_list (cfun(cfun + 0), LI_FROM_INNERMOST)) |
1296 | { |
1297 | changed |= canonicalize_loop_induction_variables (loop, |
1298 | true, UL_SINGLE_ITER, |
1299 | true, false); |
1300 | } |
1301 | gcc_assert (!need_ssa_update_p (cfun))((void)(!(!need_ssa_update_p ((cfun + 0))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1301, __FUNCTION__), 0 : 0)); |
1302 | |
1303 | unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); |
1304 | if (irred_invalidated |
1305 | && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) |
1306 | mark_irreducible_loops (); |
1307 | |
1308 | /* Clean up the information about numbers of iterations, since brute force |
1309 | evaluation could reveal new information. */ |
1310 | free_numbers_of_iterations_estimates (cfun(cfun + 0)); |
1311 | scev_reset (); |
1312 | |
1313 | if (!bitmap_empty_p (loop_closed_ssa_invalidated)) |
1314 | { |
1315 | gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA))((void)(!(loops_state_satisfies_p (LOOP_CLOSED_SSA)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-ssa-loop-ivcanon.cc" , 1315, __FUNCTION__), 0 : 0)); |
1316 | rewrite_into_loop_closed_ssa (NULLnullptr, TODO_update_ssa(1 << 11)); |
1317 | } |
1318 | BITMAP_FREE (loop_closed_ssa_invalidated)((void) (bitmap_obstack_free ((bitmap) loop_closed_ssa_invalidated ), (loop_closed_ssa_invalidated) = (bitmap) nullptr)); |
1319 | |
1320 | if (changed) |
1321 | return TODO_cleanup_cfg(1 << 5); |
1322 | return 0; |
1323 | } |
1324 | |
1325 | /* Process loops from innermost to outer, stopping at the innermost |
1326 | loop we unrolled. */ |
1327 | |
1328 | static bool |
1329 | tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, |
1330 | bitmap father_bbs, class loop *loop) |
1331 | { |
1332 | class loop *loop_father; |
1333 | bool changed = false; |
1334 | class loop *inner; |
1335 | enum unroll_level ul; |
1336 | unsigned num = number_of_loops (cfun(cfun + 0)); |
1337 | |
1338 | /* Process inner loops first. Don't walk loops added by the recursive |
1339 | calls because SSA form is not up-to-date. They can be handled in the |
1340 | next iteration. */ |
1341 | bitmap child_father_bbs = NULLnullptr; |
1342 | for (inner = loop->inner; inner != NULLnullptr; inner = inner->next) |
1343 | if ((unsigned) inner->num < num) |
1344 | { |
1345 | if (!child_father_bbs) |
1346 | child_father_bbs = BITMAP_ALLOCbitmap_alloc (NULLnullptr); |
1347 | if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer, |
1348 | child_father_bbs, inner)) |
1349 | { |
1350 | bitmap_ior_into (father_bbs, child_father_bbs); |
1351 | bitmap_clear (child_father_bbs); |
1352 | changed = true; |
1353 | } |
1354 | } |
1355 | if (child_father_bbs) |
1356 | BITMAP_FREE (child_father_bbs)((void) (bitmap_obstack_free ((bitmap) child_father_bbs), (child_father_bbs ) = (bitmap) nullptr)); |
1357 | |
1358 | /* If we changed an inner loop we cannot process outer loops in this |
1359 | iteration because SSA form is not up-to-date. Continue with |
1360 | siblings of outer loops instead. */ |
1361 | if (changed) |
1362 | { |
1363 | /* If we are recorded as father clear all other fathers that |
1364 | are necessarily covered already to avoid redundant work. */ |
1365 | if (bitmap_bit_p (father_bbs, loop->header->index)) |
1366 | { |
1367 | bitmap_clear (father_bbs); |
1368 | bitmap_set_bit (father_bbs, loop->header->index); |
1369 | } |
1370 | return true; |
1371 | } |
1372 | |
1373 | /* Don't unroll #pragma omp simd loops until the vectorizer |
1374 | attempts to vectorize those. */ |
1375 | if (loop->force_vectorize) |
1376 | return false; |
1377 | |
1378 | /* Try to unroll this loop. */ |
1379 | loop_father = loop_outer (loop); |
1380 | if (!loop_father) |
1381 | return false; |
1382 | |
1383 | if (loop->unroll > 1) |
1384 | ul = UL_ALL; |
1385 | else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) |
1386 | /* Unroll outermost loops only if asked to do so or they do |
1387 | not cause code growth. */ |
1388 | && (unroll_outer || loop_outer (loop_father))) |
1389 | ul = UL_ALL; |
1390 | else |
1391 | ul = UL_NO_GROWTH; |
1392 | |
1393 | if (canonicalize_loop_induction_variables |
1394 | (loop, false, ul, !flag_tree_loop_ivcanonglobal_options.x_flag_tree_loop_ivcanon, unroll_outer)) |
1395 | { |
1396 | /* If we'll continue unrolling, we need to propagate constants |
1397 | within the new basic blocks to fold away induction variable |
1398 | computations; otherwise, the size might blow up before the |
1399 | iteration is complete and the IR eventually cleaned up. */ |
1400 | if (loop_outer (loop_father)) |
1401 | { |
1402 | /* Once we process our father we will have processed |
1403 | the fathers of our children as well, so avoid doing |
1404 | redundant work and clear fathers we've gathered sofar. */ |
1405 | bitmap_clear (father_bbs); |
1406 | bitmap_set_bit (father_bbs, loop_father->header->index); |
1407 | } |
1408 | else if (unroll_outer) |
1409 | /* Trigger scalar cleanup once any outermost loop gets unrolled. */ |
1410 | cfun(cfun + 0)->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup(1 << 1); |
1411 | |
1412 | return true; |
1413 | } |
1414 | |
1415 | return false; |
1416 | } |
1417 | |
1418 | /* Unroll LOOPS completely if they iterate just few times. Unless |
1419 | MAY_INCREASE_SIZE is true, perform the unrolling only if the |
1420 | size of the code does not increase. */ |
1421 | |
1422 | static unsigned int |
1423 | tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) |
1424 | { |
1425 | bitmap father_bbs = BITMAP_ALLOCbitmap_alloc (NULLnullptr); |
1426 | bool changed; |
1427 | int iteration = 0; |
1428 | bool irred_invalidated = false; |
1429 | |
1430 | estimate_numbers_of_iterations (cfun(cfun + 0)); |
1431 | |
1432 | do |
1433 | { |
1434 | changed = false; |
Value stored to 'changed' is never read | |
1435 | bitmap loop_closed_ssa_invalidated = NULLnullptr; |
1436 | |
1437 | if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1438 | loop_closed_ssa_invalidated = BITMAP_ALLOCbitmap_alloc (NULLnullptr); |
1439 | |
1440 | free_numbers_of_iterations_estimates (cfun(cfun + 0)); |
1441 | estimate_numbers_of_iterations (cfun(cfun + 0)); |
1442 | |
1443 | changed = tree_unroll_loops_completely_1 (may_increase_size, |
1444 | unroll_outer, father_bbs, |
1445 | current_loops((cfun + 0)->x_current_loops)->tree_root); |
1446 | if (changed) |
1447 | { |
1448 | unsigned i; |
1449 | |
1450 | unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); |
1451 | |
1452 | /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */ |
1453 | if (loop_closed_ssa_invalidated |
1454 | && !bitmap_empty_p (loop_closed_ssa_invalidated)) |
1455 | rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, |
1456 | TODO_update_ssa(1 << 11)); |
1457 | else |
1458 | update_ssa (TODO_update_ssa(1 << 11)); |
1459 | |
1460 | /* father_bbs is a bitmap of loop father header BB indices. |
1461 | Translate that to what non-root loops these BBs belong to now. */ |
1462 | bitmap_iterator bi; |
1463 | bitmap fathers = BITMAP_ALLOCbitmap_alloc (NULLnullptr); |
1464 | EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)for (bmp_iter_set_init (&(bi), (father_bbs), (0), &(i )); bmp_iter_set (&(bi), &(i)); bmp_iter_next (&( bi), &(i))) |
1465 | { |
1466 | basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i)((*(((cfun + 0))->cfg->x_basic_block_info))[(i)]); |
1467 | if (! unrolled_loop_bb) |
1468 | continue; |
1469 | if (loop_outer (unrolled_loop_bb->loop_father)) |
1470 | bitmap_set_bit (fathers, |
1471 | unrolled_loop_bb->loop_father->num); |
1472 | } |
1473 | bitmap_clear (father_bbs); |
1474 | /* Propagate the constants within the new basic blocks. */ |
1475 | EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)for (bmp_iter_set_init (&(bi), (fathers), (0), &(i)); bmp_iter_set (&(bi), &(i)); bmp_iter_next (&(bi) , &(i))) |
1476 | { |
1477 | loop_p father = get_loop (cfun(cfun + 0), i); |
1478 | bitmap exit_bbs = BITMAP_ALLOCbitmap_alloc (NULLnullptr); |
1479 | loop_exit *exit = father->exits->next; |
1480 | while (exit->e) |
1481 | { |
1482 | bitmap_set_bit (exit_bbs, exit->e->dest->index); |
1483 | exit = exit->next; |
1484 | } |
1485 | do_rpo_vn (cfun(cfun + 0), loop_preheader_edge (father), exit_bbs); |
1486 | } |
1487 | BITMAP_FREE (fathers)((void) (bitmap_obstack_free ((bitmap) fathers), (fathers) = ( bitmap) nullptr)); |
1488 | |
1489 | /* This will take care of removing completely unrolled loops |
1490 | from the loop structures so we can continue unrolling now |
1491 | innermost loops. */ |
1492 | if (cleanup_tree_cfg ()) |
1493 | update_ssa (TODO_update_ssa_only_virtuals(1 << 14)); |
1494 | |
1495 | /* Clean up the information about numbers of iterations, since |
1496 | complete unrolling might have invalidated it. */ |
1497 | scev_reset (); |
1498 | if (flag_checkingglobal_options.x_flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1499 | verify_loop_closed_ssa (true); |
1500 | } |
1501 | if (loop_closed_ssa_invalidated) |
1502 | BITMAP_FREE (loop_closed_ssa_invalidated)((void) (bitmap_obstack_free ((bitmap) loop_closed_ssa_invalidated ), (loop_closed_ssa_invalidated) = (bitmap) nullptr)); |
1503 | } |
1504 | while (changed |
1505 | && ++iteration <= param_max_unroll_iterationsglobal_options.x_param_max_unroll_iterations); |
1506 | |
1507 | BITMAP_FREE (father_bbs)((void) (bitmap_obstack_free ((bitmap) father_bbs), (father_bbs ) = (bitmap) nullptr)); |
1508 | |
1509 | if (irred_invalidated |
1510 | && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) |
1511 | mark_irreducible_loops (); |
1512 | |
1513 | return 0; |
1514 | } |
1515 | |
1516 | /* Canonical induction variable creation pass. */ |
1517 | |
1518 | namespace { |
1519 | |
1520 | const pass_data pass_data_iv_canon = |
1521 | { |
1522 | GIMPLE_PASS, /* type */ |
1523 | "ivcanon", /* name */ |
1524 | OPTGROUP_LOOP, /* optinfo_flags */ |
1525 | TV_TREE_LOOP_IVCANON, /* tv_id */ |
1526 | ( PROP_cfg(1 << 3) | PROP_ssa(1 << 5) ), /* properties_required */ |
1527 | 0, /* properties_provided */ |
1528 | 0, /* properties_destroyed */ |
1529 | 0, /* todo_flags_start */ |
1530 | 0, /* todo_flags_finish */ |
1531 | }; |
1532 | |
1533 | class pass_iv_canon : public gimple_opt_pass |
1534 | { |
1535 | public: |
1536 | pass_iv_canon (gcc::context *ctxt) |
1537 | : gimple_opt_pass (pass_data_iv_canon, ctxt) |
1538 | {} |
1539 | |
1540 | /* opt_pass methods: */ |
1541 | bool gate (function *) final override { return flag_tree_loop_ivcanonglobal_options.x_flag_tree_loop_ivcanon != 0; } |
1542 | unsigned int execute (function *fun) final override; |
1543 | |
1544 | }; // class pass_iv_canon |
1545 | |
1546 | unsigned int |
1547 | pass_iv_canon::execute (function *fun) |
1548 | { |
1549 | if (number_of_loops (fun) <= 1) |
1550 | return 0; |
1551 | |
1552 | return canonicalize_induction_variables (); |
1553 | } |
1554 | |
1555 | } // anon namespace |
1556 | |
1557 | gimple_opt_pass * |
1558 | make_pass_iv_canon (gcc::context *ctxt) |
1559 | { |
1560 | return new pass_iv_canon (ctxt); |
1561 | } |
1562 | |
1563 | /* Complete unrolling of loops. */ |
1564 | |
1565 | namespace { |
1566 | |
1567 | const pass_data pass_data_complete_unroll = |
1568 | { |
1569 | GIMPLE_PASS, /* type */ |
1570 | "cunroll", /* name */ |
1571 | OPTGROUP_LOOP, /* optinfo_flags */ |
1572 | TV_COMPLETE_UNROLL, /* tv_id */ |
1573 | ( PROP_cfg(1 << 3) | PROP_ssa(1 << 5) ), /* properties_required */ |
1574 | 0, /* properties_provided */ |
1575 | 0, /* properties_destroyed */ |
1576 | 0, /* todo_flags_start */ |
1577 | 0, /* todo_flags_finish */ |
1578 | }; |
1579 | |
1580 | class pass_complete_unroll : public gimple_opt_pass |
1581 | { |
1582 | public: |
1583 | pass_complete_unroll (gcc::context *ctxt) |
1584 | : gimple_opt_pass (pass_data_complete_unroll, ctxt) |
1585 | {} |
1586 | |
1587 | /* opt_pass methods: */ |
1588 | unsigned int execute (function *) final override; |
1589 | |
1590 | }; // class pass_complete_unroll |
1591 | |
1592 | unsigned int |
1593 | pass_complete_unroll::execute (function *fun) |
1594 | { |
1595 | if (number_of_loops (fun) <= 1) |
1596 | return 0; |
1597 | |
1598 | /* If we ever decide to run loop peeling more than once, we will need to |
1599 | track loops already peeled in loop structures themselves to avoid |
1600 | re-peeling the same loop multiple times. */ |
1601 | if (flag_peel_loopsglobal_options.x_flag_peel_loops) |
1602 | peeled_loops = BITMAP_ALLOCbitmap_alloc (NULLnullptr); |
1603 | unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_sizeglobal_options.x_flag_cunroll_grow_size, |
1604 | true); |
1605 | if (peeled_loops) |
1606 | { |
1607 | BITMAP_FREE (peeled_loops)((void) (bitmap_obstack_free ((bitmap) peeled_loops), (peeled_loops ) = (bitmap) nullptr)); |
1608 | peeled_loops = NULLnullptr; |
1609 | } |
1610 | return val; |
1611 | } |
1612 | |
1613 | } // anon namespace |
1614 | |
1615 | gimple_opt_pass * |
1616 | make_pass_complete_unroll (gcc::context *ctxt) |
1617 | { |
1618 | return new pass_complete_unroll (ctxt); |
1619 | } |
1620 | |
1621 | /* Complete unrolling of inner loops. */ |
1622 | |
1623 | namespace { |
1624 | |
1625 | const pass_data pass_data_complete_unrolli = |
1626 | { |
1627 | GIMPLE_PASS, /* type */ |
1628 | "cunrolli", /* name */ |
1629 | OPTGROUP_LOOP, /* optinfo_flags */ |
1630 | TV_COMPLETE_UNROLL, /* tv_id */ |
1631 | ( PROP_cfg(1 << 3) | PROP_ssa(1 << 5) ), /* properties_required */ |
1632 | 0, /* properties_provided */ |
1633 | 0, /* properties_destroyed */ |
1634 | 0, /* todo_flags_start */ |
1635 | 0, /* todo_flags_finish */ |
1636 | }; |
1637 | |
1638 | class pass_complete_unrolli : public gimple_opt_pass |
1639 | { |
1640 | public: |
1641 | pass_complete_unrolli (gcc::context *ctxt) |
1642 | : gimple_opt_pass (pass_data_complete_unrolli, ctxt) |
1643 | {} |
1644 | |
1645 | /* opt_pass methods: */ |
1646 | bool gate (function *) final override { return optimizeglobal_options.x_optimize >= 2; } |
1647 | unsigned int execute (function *) final override; |
1648 | |
1649 | }; // class pass_complete_unrolli |
1650 | |
1651 | unsigned int |
1652 | pass_complete_unrolli::execute (function *fun) |
1653 | { |
1654 | unsigned ret = 0; |
1655 | |
1656 | loop_optimizer_init (LOOPS_NORMAL(LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS ) | LOOPS_HAVE_RECORDED_EXITS); |
1657 | if (number_of_loops (fun) > 1) |
1658 | { |
1659 | scev_initialize (); |
1660 | ret = tree_unroll_loops_completely (optimizeglobal_options.x_optimize >= 3, false); |
1661 | scev_finalize (); |
1662 | } |
1663 | loop_optimizer_finalize (); |
1664 | |
1665 | return ret; |
1666 | } |
1667 | |
1668 | } // anon namespace |
1669 | |
1670 | gimple_opt_pass * |
1671 | make_pass_complete_unrolli (gcc::context *ctxt) |
1672 | { |
1673 | return new pass_complete_unrolli (ctxt); |
1674 | } |
1675 | |
1676 |