File: | build/gcc/tree-vect-loop.cc |
Warning: | line 6305, column 7 Value stored to 'epilog_stmt' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Loop Vectorization |
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> and |
4 | Ira Rosen <irar@il.ibm.com> |
5 | |
6 | This file is part of GCC. |
7 | |
8 | GCC is free software; you can redistribute it and/or modify it under |
9 | the terms of the GNU General Public License as published by the Free |
10 | Software Foundation; either version 3, or (at your option) any later |
11 | version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
16 | for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | #define INCLUDE_ALGORITHM |
23 | #include "config.h" |
24 | #include "system.h" |
25 | #include "coretypes.h" |
26 | #include "backend.h" |
27 | #include "target.h" |
28 | #include "rtl.h" |
29 | #include "tree.h" |
30 | #include "gimple.h" |
31 | #include "cfghooks.h" |
32 | #include "tree-pass.h" |
33 | #include "ssa.h" |
34 | #include "optabs-tree.h" |
35 | #include "diagnostic-core.h" |
36 | #include "fold-const.h" |
37 | #include "stor-layout.h" |
38 | #include "cfganal.h" |
39 | #include "gimplify.h" |
40 | #include "gimple-iterator.h" |
41 | #include "gimplify-me.h" |
42 | #include "tree-ssa-loop-ivopts.h" |
43 | #include "tree-ssa-loop-manip.h" |
44 | #include "tree-ssa-loop-niter.h" |
45 | #include "tree-ssa-loop.h" |
46 | #include "cfgloop.h" |
47 | #include "tree-scalar-evolution.h" |
48 | #include "tree-vectorizer.h" |
49 | #include "gimple-fold.h" |
50 | #include "cgraph.h" |
51 | #include "tree-cfg.h" |
52 | #include "tree-if-conv.h" |
53 | #include "internal-fn.h" |
54 | #include "tree-vector-builder.h" |
55 | #include "vec-perm-indices.h" |
56 | #include "tree-eh.h" |
57 | #include "case-cfn-macros.h" |
58 | |
59 | /* Loop Vectorization Pass. |
60 | |
61 | This pass tries to vectorize loops. |
62 | |
63 | For example, the vectorizer transforms the following simple loop: |
64 | |
65 | short a[N]; short b[N]; short c[N]; int i; |
66 | |
67 | for (i=0; i<N; i++){ |
68 | a[i] = b[i] + c[i]; |
69 | } |
70 | |
71 | as if it was manually vectorized by rewriting the source code into: |
72 | |
73 | typedef int __attribute__((mode(V8HI))) v8hi; |
74 | short a[N]; short b[N]; short c[N]; int i; |
75 | v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; |
76 | v8hi va, vb, vc; |
77 | |
78 | for (i=0; i<N/8; i++){ |
79 | vb = pb[i]; |
80 | vc = pc[i]; |
81 | va = vb + vc; |
82 | pa[i] = va; |
83 | } |
84 | |
85 | The main entry to this pass is vectorize_loops(), in which |
86 | the vectorizer applies a set of analyses on a given set of loops, |
87 | followed by the actual vectorization transformation for the loops that |
88 | had successfully passed the analysis phase. |
89 | Throughout this pass we make a distinction between two types of |
90 | data: scalars (which are represented by SSA_NAMES), and memory references |
91 | ("data-refs"). These two types of data require different handling both |
92 | during analysis and transformation. The types of data-refs that the |
93 | vectorizer currently supports are ARRAY_REFS which base is an array DECL |
94 | (not a pointer), and INDIRECT_REFS through pointers; both array and pointer |
95 | accesses are required to have a simple (consecutive) access pattern. |
96 | |
97 | Analysis phase: |
98 | =============== |
99 | The driver for the analysis phase is vect_analyze_loop(). |
100 | It applies a set of analyses, some of which rely on the scalar evolution |
101 | analyzer (scev) developed by Sebastian Pop. |
102 | |
103 | During the analysis phase the vectorizer records some information |
104 | per stmt in a "stmt_vec_info" struct which is attached to each stmt in the |
105 | loop, as well as general information about the loop as a whole, which is |
106 | recorded in a "loop_vec_info" struct attached to each loop. |
107 | |
108 | Transformation phase: |
109 | ===================== |
110 | The loop transformation phase scans all the stmts in the loop, and |
111 | creates a vector stmt (or a sequence of stmts) for each scalar stmt S in |
112 | the loop that needs to be vectorized. It inserts the vector code sequence |
113 | just before the scalar stmt S, and records a pointer to the vector code |
114 | in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct |
115 | attached to S). This pointer will be used for the vectorization of following |
116 | stmts which use the def of stmt S. Stmt S is removed if it writes to memory; |
117 | otherwise, we rely on dead code elimination for removing it. |
118 | |
119 | For example, say stmt S1 was vectorized into stmt VS1: |
120 | |
121 | VS1: vb = px[i]; |
122 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 |
123 | S2: a = b; |
124 | |
125 | To vectorize stmt S2, the vectorizer first finds the stmt that defines |
126 | the operand 'b' (S1), and gets the relevant vector def 'vb' from the |
127 | vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The |
128 | resulting sequence would be: |
129 | |
130 | VS1: vb = px[i]; |
131 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 |
132 | VS2: va = vb; |
133 | S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 |
134 | |
135 | Operands that are not SSA_NAMEs, are data-refs that appear in |
136 | load/store operations (like 'x[i]' in S1), and are handled differently. |
137 | |
138 | Target modeling: |
139 | ================= |
140 | Currently the only target specific information that is used is the |
141 | size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". |
142 | Targets that can support different sizes of vectors, for now will need |
143 | to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More |
144 | flexibility will be added in the future. |
145 | |
146 | Since we only vectorize operations which vector form can be |
147 | expressed using existing tree codes, to verify that an operation is |
148 | supported, the vectorizer checks the relevant optab at the relevant |
149 | machine_mode (e.g, optab_handler (add_optab, V8HImode)). If |
150 | the value found is CODE_FOR_nothing, then there's no target support, and |
151 | we can't vectorize the stmt. |
152 | |
153 | For additional information on this project see: |
154 | http://gcc.gnu.org/projects/tree-ssa/vectorization.html |
155 | */ |
156 | |
157 | static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *, |
158 | unsigned *); |
159 | static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, |
160 | bool *, bool *, bool); |
161 | |
162 | /* Subroutine of vect_determine_vf_for_stmt that handles only one |
163 | statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE |
164 | may already be set for general statements (not just data refs). */ |
165 | |
166 | static opt_result |
167 | vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, |
168 | bool vectype_maybe_set_p, |
169 | poly_uint64 *vf) |
170 | { |
171 | gimple *stmt = stmt_info->stmt; |
172 | |
173 | if ((!STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) |
174 | && !STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) |
175 | || gimple_clobber_p (stmt)) |
176 | { |
177 | if (dump_enabled_p ()) |
178 | dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); |
179 | return opt_result::success (); |
180 | } |
181 | |
182 | tree stmt_vectype, nunits_vectype; |
183 | opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info, |
184 | &stmt_vectype, |
185 | &nunits_vectype); |
186 | if (!res) |
187 | return res; |
188 | |
189 | if (stmt_vectype) |
190 | { |
191 | if (STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype) |
192 | /* The only case when a vectype had been already set is for stmts |
193 | that contain a data ref, or for "pattern-stmts" (stmts generated |
194 | by the vectorizer to represent/replace a certain idiom). */ |
195 | gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 197, __FUNCTION__), 0 : 0)) |
196 | || vectype_maybe_set_p)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 197, __FUNCTION__), 0 : 0)) |
197 | && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 197, __FUNCTION__), 0 : 0)); |
198 | else |
199 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = stmt_vectype; |
200 | } |
201 | |
202 | if (nunits_vectype) |
203 | vect_update_max_nunits (vf, nunits_vectype); |
204 | |
205 | return opt_result::success (); |
206 | } |
207 | |
208 | /* Subroutine of vect_determine_vectorization_factor. Set the vector |
209 | types of STMT_INFO and all attached pattern statements and update |
210 | the vectorization factor VF accordingly. Return true on success |
211 | or false if something prevented vectorization. */ |
212 | |
213 | static opt_result |
214 | vect_determine_vf_for_stmt (vec_info *vinfo, |
215 | stmt_vec_info stmt_info, poly_uint64 *vf) |
216 | { |
217 | if (dump_enabled_p ()) |
218 | dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", |
219 | stmt_info->stmt); |
220 | opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf); |
221 | if (!res) |
222 | return res; |
223 | |
224 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p |
225 | && STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt) |
226 | { |
227 | gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq; |
228 | stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
229 | |
230 | /* If a pattern statement has def stmts, analyze them too. */ |
231 | for (gimple_stmt_iterator si = gsi_start (pattern_def_seq); |
232 | !gsi_end_p (si); gsi_next (&si)) |
233 | { |
234 | stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); |
235 | if (dump_enabled_p ()) |
236 | dump_printf_loc (MSG_NOTE, vect_location, |
237 | "==> examining pattern def stmt: %G", |
238 | def_stmt_info->stmt); |
239 | res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf); |
240 | if (!res) |
241 | return res; |
242 | } |
243 | |
244 | if (dump_enabled_p ()) |
245 | dump_printf_loc (MSG_NOTE, vect_location, |
246 | "==> examining pattern statement: %G", |
247 | stmt_info->stmt); |
248 | res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf); |
249 | if (!res) |
250 | return res; |
251 | } |
252 | |
253 | return opt_result::success (); |
254 | } |
255 | |
256 | /* Function vect_determine_vectorization_factor |
257 | |
258 | Determine the vectorization factor (VF). VF is the number of data elements |
259 | that are operated upon in parallel in a single iteration of the vectorized |
260 | loop. For example, when vectorizing a loop that operates on 4byte elements, |
261 | on a target with vector size (VS) 16byte, the VF is set to 4, since 4 |
262 | elements can fit in a single vector register. |
263 | |
264 | We currently support vectorization of loops in which all types operated upon |
265 | are of the same size. Therefore this function currently sets VF according to |
266 | the size of the types operated upon, and fails if there are multiple sizes |
267 | in the loop. |
268 | |
269 | VF is also the factor by which the loop iterations are strip-mined, e.g.: |
270 | original loop: |
271 | for (i=0; i<N; i++){ |
272 | a[i] = b[i] + c[i]; |
273 | } |
274 | |
275 | vectorized loop: |
276 | for (i=0; i<N; i+=VF){ |
277 | a[i:VF] = b[i:VF] + c[i:VF]; |
278 | } |
279 | */ |
280 | |
281 | static opt_result |
282 | vect_determine_vectorization_factor (loop_vec_info loop_vinfo) |
283 | { |
284 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
285 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; |
286 | unsigned nbbs = loop->num_nodes; |
287 | poly_uint64 vectorization_factor = 1; |
288 | tree scalar_type = NULL_TREE(tree) nullptr; |
289 | gphi *phi; |
290 | tree vectype; |
291 | stmt_vec_info stmt_info; |
292 | unsigned i; |
293 | |
294 | DUMP_VECT_SCOPE ("vect_determine_vectorization_factor")auto_dump_scope scope ("vect_determine_vectorization_factor", vect_location); |
295 | |
296 | for (i = 0; i < nbbs; i++) |
297 | { |
298 | basic_block bb = bbs[i]; |
299 | |
300 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); |
301 | gsi_next (&si)) |
302 | { |
303 | phi = si.phi (); |
304 | stmt_info = loop_vinfo->lookup_stmt (phi); |
305 | if (dump_enabled_p ()) |
306 | dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G", |
307 | (gimple *) phi); |
308 | |
309 | gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 309, __FUNCTION__), 0 : 0)); |
310 | |
311 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) |
312 | || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) |
313 | { |
314 | gcc_assert (!STMT_VINFO_VECTYPE (stmt_info))((void)(!(!(stmt_info)->vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 314, __FUNCTION__), 0 : 0)); |
315 | scalar_type = TREE_TYPE (PHI_RESULT (phi))((contains_struct_check ((get_def_from_ptr (gimple_phi_result_ptr (phi))), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 315, __FUNCTION__))->typed.type); |
316 | |
317 | if (dump_enabled_p ()) |
318 | dump_printf_loc (MSG_NOTE, vect_location, |
319 | "get vectype for scalar type: %T\n", |
320 | scalar_type); |
321 | |
322 | vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); |
323 | if (!vectype) |
324 | return opt_result::failure_at (phi, |
325 | "not vectorized: unsupported " |
326 | "data-type %T\n", |
327 | scalar_type); |
328 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = vectype; |
329 | |
330 | if (dump_enabled_p ()) |
331 | dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", |
332 | vectype); |
333 | |
334 | if (dump_enabled_p ()) |
335 | { |
336 | dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); |
337 | dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype)); |
338 | dump_printf (MSG_NOTE, "\n"); |
339 | } |
340 | |
341 | vect_update_max_nunits (&vectorization_factor, vectype); |
342 | } |
343 | } |
344 | |
345 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
346 | gsi_next (&si)) |
347 | { |
348 | if (is_gimple_debug (gsi_stmt (si))) |
349 | continue; |
350 | stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
351 | opt_result res |
352 | = vect_determine_vf_for_stmt (loop_vinfo, |
353 | stmt_info, &vectorization_factor); |
354 | if (!res) |
355 | return res; |
356 | } |
357 | } |
358 | |
359 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ |
360 | if (dump_enabled_p ()) |
361 | { |
362 | dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); |
363 | dump_dec (MSG_NOTE, vectorization_factor); |
364 | dump_printf (MSG_NOTE, "\n"); |
365 | } |
366 | |
367 | if (known_le (vectorization_factor, 1U)(!maybe_lt (1U, vectorization_factor))) |
368 | return opt_result::failure_at (vect_location, |
369 | "not vectorized: unsupported data-type\n"); |
370 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor; |
371 | return opt_result::success (); |
372 | } |
373 | |
374 | |
375 | /* Function vect_is_simple_iv_evolution. |
376 | |
377 | FORNOW: A simple evolution of an induction variables in the loop is |
378 | considered a polynomial evolution. */ |
379 | |
380 | static bool |
381 | vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, |
382 | tree * step) |
383 | { |
384 | tree init_expr; |
385 | tree step_expr; |
386 | tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); |
387 | basic_block bb; |
388 | |
389 | /* When there is no evolution in this loop, the evolution function |
390 | is not "simple". */ |
391 | if (evolution_part == NULL_TREE(tree) nullptr) |
392 | return false; |
393 | |
394 | /* When the evolution is a polynomial of degree >= 2 |
395 | the evolution function is not "simple". */ |
396 | if (tree_is_chrec (evolution_part)) |
397 | return false; |
398 | |
399 | step_expr = evolution_part; |
400 | init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); |
401 | |
402 | if (dump_enabled_p ()) |
403 | dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n", |
404 | step_expr, init_expr); |
405 | |
406 | *init = init_expr; |
407 | *step = step_expr; |
408 | |
409 | if (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != INTEGER_CST |
410 | && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != SSA_NAME |
411 | || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)(tree_check ((step_expr), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 411, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt)) |
412 | && flow_bb_inside_loop_p (get_loop (cfun(cfun + 0), loop_nb), bb)) |
413 | || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 413, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE || ((enum tree_code) (((contains_struct_check ((step_expr), ( TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 413, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE || ((enum tree_code) (((contains_struct_check ((step_expr), ( TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 413, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE ) |
414 | && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 414, __FUNCTION__))->typed.type))->base.code) == REAL_TYPE ) |
415 | || !flag_associative_mathglobal_options.x_flag_associative_math))) |
416 | && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != REAL_CST |
417 | || !flag_associative_mathglobal_options.x_flag_associative_math)) |
418 | { |
419 | if (dump_enabled_p ()) |
420 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
421 | "step unknown.\n"); |
422 | return false; |
423 | } |
424 | |
425 | return true; |
426 | } |
427 | |
428 | /* Function vect_is_nonlinear_iv_evolution |
429 | |
430 | Only support nonlinear induction for integer type |
431 | 1. neg |
432 | 2. mul by constant |
433 | 3. lshift/rshift by constant. |
434 | |
435 | For neg induction, return a fake step as integer -1. */ |
436 | static bool |
437 | vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info, |
438 | gphi* loop_phi_node, tree *init, tree *step) |
439 | { |
440 | tree init_expr, ev_expr, result, op1, op2; |
441 | gimple* def; |
442 | |
443 | if (gimple_phi_num_args (loop_phi_node) != 2) |
444 | return false; |
445 | |
446 | init_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_preheader_edge (loop))gimple_phi_arg_def (((loop_phi_node)), ((loop_preheader_edge ( loop))->dest_idx)); |
447 | ev_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_latch_edge (loop))gimple_phi_arg_def (((loop_phi_node)), ((loop_latch_edge (loop ))->dest_idx)); |
448 | |
449 | /* Support nonlinear induction only for integer type. */ |
450 | if (!INTEGRAL_TYPE_P (TREE_TYPE (init_expr))(((enum tree_code) (((contains_struct_check ((init_expr), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 450, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE || ((enum tree_code) (((contains_struct_check ((init_expr), ( TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 450, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE || ((enum tree_code) (((contains_struct_check ((init_expr), ( TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 450, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE )) |
451 | return false; |
452 | |
453 | *init = init_expr; |
454 | result = PHI_RESULT (loop_phi_node)get_def_from_ptr (gimple_phi_result_ptr (loop_phi_node)); |
455 | |
456 | if (TREE_CODE (ev_expr)((enum tree_code) (ev_expr)->base.code) != SSA_NAME |
457 | || ((def = SSA_NAME_DEF_STMT (ev_expr)(tree_check ((ev_expr), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 457, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt), false) |
458 | || !is_gimple_assign (def)) |
459 | return false; |
460 | |
461 | enum tree_code t_code = gimple_assign_rhs_code (def); |
462 | switch (t_code) |
463 | { |
464 | case NEGATE_EXPR: |
465 | if (gimple_assign_rhs1 (def) != result) |
466 | return false; |
467 | *step = build_int_cst (TREE_TYPE (init_expr)((contains_struct_check ((init_expr), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 467, __FUNCTION__))->typed.type), -1); |
468 | STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_neg; |
469 | break; |
470 | |
471 | case RSHIFT_EXPR: |
472 | case LSHIFT_EXPR: |
473 | case MULT_EXPR: |
474 | op1 = gimple_assign_rhs1 (def); |
475 | op2 = gimple_assign_rhs2 (def); |
476 | if (TREE_CODE (op2)((enum tree_code) (op2)->base.code) != INTEGER_CST |
477 | || op1 != result) |
478 | return false; |
479 | *step = op2; |
480 | if (t_code == LSHIFT_EXPR) |
481 | STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_shl; |
482 | else if (t_code == RSHIFT_EXPR) |
483 | STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_shr; |
484 | /* NEGATE_EXPR and MULT_EXPR are both vect_step_op_mul. */ |
485 | else |
486 | STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_mul; |
487 | break; |
488 | |
489 | default: |
490 | return false; |
491 | } |
492 | |
493 | STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info)(stmt_info)->loop_phi_evolution_base_unchanged = *init; |
494 | STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info)(stmt_info)->loop_phi_evolution_part = *step; |
495 | |
496 | return true; |
497 | } |
498 | |
499 | /* Return true if PHI, described by STMT_INFO, is the inner PHI in |
500 | what we are assuming is a double reduction. For example, given |
501 | a structure like this: |
502 | |
503 | outer1: |
504 | x_1 = PHI <x_4(outer2), ...>; |
505 | ... |
506 | |
507 | inner: |
508 | x_2 = PHI <x_1(outer1), ...>; |
509 | ... |
510 | x_3 = ...; |
511 | ... |
512 | |
513 | outer2: |
514 | x_4 = PHI <x_3(inner)>; |
515 | ... |
516 | |
517 | outer loop analysis would treat x_1 as a double reduction phi and |
518 | this function would then return true for x_2. */ |
519 | |
520 | static bool |
521 | vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi) |
522 | { |
523 | use_operand_p use_p; |
524 | ssa_op_iter op_iter; |
525 | FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)for ((use_p) = op_iter_init_phiuse (&(op_iter), phi, 0x01 ); !op_iter_done (&(op_iter)); (use_p) = op_iter_next_use (&(op_iter))) |
526 | if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)get_use_from_ptr (use_p))) |
527 | if (STMT_VINFO_DEF_TYPE (def_info)(def_info)->def_type == vect_double_reduction_def) |
528 | return true; |
529 | return false; |
530 | } |
531 | |
532 | /* Returns true if Phi is a first-order recurrence. A first-order |
533 | recurrence is a non-reduction recurrence relation in which the value of |
534 | the recurrence in the current loop iteration equals a value defined in |
535 | the previous iteration. */ |
536 | |
537 | static bool |
538 | vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop, |
539 | gphi *phi) |
540 | { |
541 | /* A nested cycle isn't vectorizable as first order recurrence. */ |
542 | if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop) |
543 | return false; |
544 | |
545 | /* Ensure the loop latch definition is from within the loop. */ |
546 | edge latch = loop_latch_edge (loop); |
547 | tree ldef = PHI_ARG_DEF_FROM_EDGE (phi, latch)gimple_phi_arg_def (((phi)), ((latch)->dest_idx)); |
548 | if (TREE_CODE (ldef)((enum tree_code) (ldef)->base.code) != SSA_NAME |
549 | || SSA_NAME_IS_DEFAULT_DEF (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 549, __FUNCTION__, (SSA_NAME)))->base.default_def_flag |
550 | || is_a <gphi *> (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 550, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt) |
551 | || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 551, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt))) |
552 | return false; |
553 | |
554 | tree def = gimple_phi_result (phi); |
555 | |
556 | /* Ensure every use_stmt of the phi node is dominated by the latch |
557 | definition. */ |
558 | imm_use_iterator imm_iter; |
559 | use_operand_p use_p; |
560 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (def) ); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p) = next_readonly_imm_use (&(imm_iter)))) |
561 | if (!is_gimple_debug (USE_STMT (use_p)(use_p)->loc.stmt) |
562 | && (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 562, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt == USE_STMT (use_p)(use_p)->loc.stmt |
563 | || !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 563, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt, |
564 | USE_STMT (use_p)(use_p)->loc.stmt))) |
565 | return false; |
566 | |
567 | /* First-order recurrence autovectorization needs shuffle vector. */ |
568 | tree scalar_type = TREE_TYPE (def)((contains_struct_check ((def), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 568, __FUNCTION__))->typed.type); |
569 | tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); |
570 | if (!vectype) |
571 | return false; |
572 | |
573 | return true; |
574 | } |
575 | |
576 | /* Function vect_analyze_scalar_cycles_1. |
577 | |
578 | Examine the cross iteration def-use cycles of scalar variables |
579 | in LOOP. LOOP_VINFO represents the loop that is now being |
580 | considered for vectorization (can be LOOP, or an outer-loop |
581 | enclosing LOOP). SLP indicates there will be some subsequent |
582 | slp analyses or not. */ |
583 | |
584 | static void |
585 | vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop, |
586 | bool slp) |
587 | { |
588 | basic_block bb = loop->header; |
589 | tree init, step; |
590 | auto_vec<stmt_vec_info, 64> worklist; |
591 | gphi_iterator gsi; |
592 | bool double_reduc, reduc_chain; |
593 | |
594 | DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles")auto_dump_scope scope ("vect_analyze_scalar_cycles", vect_location ); |
595 | |
596 | /* First - identify all inductions. Reduction detection assumes that all the |
597 | inductions have been identified, therefore, this order must not be |
598 | changed. */ |
599 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
600 | { |
601 | gphi *phi = gsi.phi (); |
602 | tree access_fn = NULLnullptr; |
603 | tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); |
604 | stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi); |
605 | |
606 | if (dump_enabled_p ()) |
607 | dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", |
608 | (gimple *) phi); |
609 | |
610 | /* Skip virtual phi's. The data dependences that are associated with |
611 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ |
612 | if (virtual_operand_p (def)) |
613 | continue; |
614 | |
615 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_unknown_def_type; |
616 | |
617 | /* Analyze the evolution function. */ |
618 | access_fn = analyze_scalar_evolution (loop, def); |
619 | if (access_fn) |
620 | { |
621 | STRIP_NOPS (access_fn)(access_fn) = tree_strip_nop_conversions ((const_cast<union tree_node *> (((access_fn))))); |
622 | if (dump_enabled_p ()) |
623 | dump_printf_loc (MSG_NOTE, vect_location, |
624 | "Access function of PHI: %T\n", access_fn); |
625 | STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_base_unchanged |
626 | = initial_condition_in_loop_num (access_fn, loop->num); |
627 | STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_part |
628 | = evolution_part_in_loop_num (access_fn, loop->num); |
629 | } |
630 | |
631 | if ((!access_fn |
632 | || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi) |
633 | || !vect_is_simple_iv_evolution (loop->num, access_fn, |
634 | &init, &step) |
635 | || (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop |
636 | && TREE_CODE (step)((enum tree_code) (step)->base.code) != INTEGER_CST)) |
637 | /* Only handle nonlinear iv for same loop. */ |
638 | && (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop |
639 | || !vect_is_nonlinear_iv_evolution (loop, stmt_vinfo, |
640 | phi, &init, &step))) |
641 | { |
642 | worklist.safe_push (stmt_vinfo); |
643 | continue; |
644 | } |
645 | |
646 | gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged != (tree) nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 647, __FUNCTION__), 0 : 0)) |
647 | != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged != (tree) nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 647, __FUNCTION__), 0 : 0)); |
648 | gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_part != (tree) nullptr ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 648, __FUNCTION__), 0 : 0)); |
649 | |
650 | if (dump_enabled_p ()) |
651 | dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n"); |
652 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_induction_def; |
653 | } |
654 | |
655 | |
656 | /* Second - identify all reductions and nested cycles. */ |
657 | while (worklist.length () > 0) |
658 | { |
659 | stmt_vec_info stmt_vinfo = worklist.pop (); |
660 | gphi *phi = as_a <gphi *> (stmt_vinfo->stmt); |
661 | tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); |
662 | |
663 | if (dump_enabled_p ()) |
664 | dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", |
665 | (gimple *) phi); |
666 | |
667 | gcc_assert (!virtual_operand_p (def)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)-> def_type == vect_unknown_def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 668, __FUNCTION__), 0 : 0)) |
668 | && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)-> def_type == vect_unknown_def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 668, __FUNCTION__), 0 : 0)); |
669 | |
670 | stmt_vec_info reduc_stmt_info |
671 | = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc, |
672 | &reduc_chain, slp); |
673 | if (reduc_stmt_info) |
674 | { |
675 | STMT_VINFO_REDUC_DEF (stmt_vinfo)(stmt_vinfo)->reduc_def = reduc_stmt_info; |
676 | STMT_VINFO_REDUC_DEF (reduc_stmt_info)(reduc_stmt_info)->reduc_def = stmt_vinfo; |
677 | if (double_reduc) |
678 | { |
679 | if (dump_enabled_p ()) |
680 | dump_printf_loc (MSG_NOTE, vect_location, |
681 | "Detected double reduction.\n"); |
682 | |
683 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_double_reduction_def; |
684 | STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_double_reduction_def; |
685 | } |
686 | else |
687 | { |
688 | if (loop != LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop) |
689 | { |
690 | if (dump_enabled_p ()) |
691 | dump_printf_loc (MSG_NOTE, vect_location, |
692 | "Detected vectorizable nested cycle.\n"); |
693 | |
694 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_nested_cycle; |
695 | } |
696 | else |
697 | { |
698 | if (dump_enabled_p ()) |
699 | dump_printf_loc (MSG_NOTE, vect_location, |
700 | "Detected reduction.\n"); |
701 | |
702 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_reduction_def; |
703 | STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_reduction_def; |
704 | /* Store the reduction cycles for possible vectorization in |
705 | loop-aware SLP if it was not detected as reduction |
706 | chain. */ |
707 | if (! reduc_chain) |
708 | LOOP_VINFO_REDUCTIONS (loop_vinfo)(loop_vinfo)->reductions.safe_push |
709 | (reduc_stmt_info); |
710 | } |
711 | } |
712 | } |
713 | else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi)) |
714 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_first_order_recurrence; |
715 | else |
716 | if (dump_enabled_p ()) |
717 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
718 | "Unknown def-use cycle pattern.\n"); |
719 | } |
720 | } |
721 | |
722 | |
723 | /* Function vect_analyze_scalar_cycles. |
724 | |
725 | Examine the cross iteration def-use cycles of scalar variables, by |
726 | analyzing the loop-header PHIs of scalar variables. Classify each |
727 | cycle as one of the following: invariant, induction, reduction, unknown. |
728 | We do that for the loop represented by LOOP_VINFO, and also to its |
729 | inner-loop, if exists. |
730 | Examples for scalar cycles: |
731 | |
732 | Example1: reduction: |
733 | |
734 | loop1: |
735 | for (i=0; i<N; i++) |
736 | sum += a[i]; |
737 | |
738 | Example2: induction: |
739 | |
740 | loop2: |
741 | for (i=0; i<N; i++) |
742 | a[i] = i; */ |
743 | |
744 | static void |
745 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo, bool slp) |
746 | { |
747 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
748 | |
749 | vect_analyze_scalar_cycles_1 (loop_vinfo, loop, slp); |
750 | |
751 | /* When vectorizing an outer-loop, the inner-loop is executed sequentially. |
752 | Reductions in such inner-loop therefore have different properties than |
753 | the reductions in the nest that gets vectorized: |
754 | 1. When vectorized, they are executed in the same order as in the original |
755 | scalar loop, so we can't change the order of computation when |
756 | vectorizing them. |
757 | 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the |
758 | current checks are too strict. */ |
759 | |
760 | if (loop->inner) |
761 | vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner, slp); |
762 | } |
763 | |
764 | /* Transfer group and reduction information from STMT_INFO to its |
765 | pattern stmt. */ |
766 | |
767 | static void |
768 | vect_fixup_reduc_chain (stmt_vec_info stmt_info) |
769 | { |
770 | stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
771 | stmt_vec_info stmtp; |
772 | gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 772, __FUNCTION__), 0 : 0)), (firstp)->first_element) && (((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 773, __FUNCTION__), 0 : 0)), (stmt_info)->first_element) ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 773, __FUNCTION__), 0 : 0)) |
773 | && REDUC_GROUP_FIRST_ELEMENT (stmt_info))((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 772, __FUNCTION__), 0 : 0)), (firstp)->first_element) && (((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 773, __FUNCTION__), 0 : 0)), (stmt_info)->first_element) ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 773, __FUNCTION__), 0 : 0)); |
774 | REDUC_GROUP_SIZE (firstp)(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 774, __FUNCTION__), 0 : 0)), (firstp)->size) = REDUC_GROUP_SIZE (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 774, __FUNCTION__), 0 : 0)), (stmt_info)->size); |
775 | do |
776 | { |
777 | stmtp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
778 | gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)((void)(!((stmtp)->def_type == (stmt_info)->def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 779, __FUNCTION__), 0 : 0)) |
779 | == STMT_VINFO_DEF_TYPE (stmt_info))((void)(!((stmtp)->def_type == (stmt_info)->def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 779, __FUNCTION__), 0 : 0)); |
780 | REDUC_GROUP_FIRST_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 780, __FUNCTION__), 0 : 0)), (stmtp)->first_element) = firstp; |
781 | stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 781, __FUNCTION__), 0 : 0)), (stmt_info)->next_element); |
782 | if (stmt_info) |
783 | REDUC_GROUP_NEXT_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 783, __FUNCTION__), 0 : 0)), (stmtp)->next_element) |
784 | = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
785 | } |
786 | while (stmt_info); |
787 | } |
788 | |
789 | /* Fixup scalar cycles that now have their stmts detected as patterns. */ |
790 | |
791 | static void |
792 | vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo) |
793 | { |
794 | stmt_vec_info first; |
795 | unsigned i; |
796 | |
797 | FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)for (i = 0; ((loop_vinfo)->reduction_chains).iterate ((i), &(first)); ++(i)) |
798 | { |
799 | stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first)(((void)(!(!(first)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 799, __FUNCTION__), 0 : 0)), (first)->next_element); |
800 | while (next) |
801 | { |
802 | if ((STMT_VINFO_IN_PATTERN_P (next)(next)->in_pattern_p |
803 | != STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p) |
804 | || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next))(vect_stmt_to_vectorize (next))->reduc_idx == -1) |
805 | break; |
806 | next = REDUC_GROUP_NEXT_ELEMENT (next)(((void)(!(!(next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 806, __FUNCTION__), 0 : 0)), (next)->next_element); |
807 | } |
808 | /* If all reduction chain members are well-formed patterns adjust |
809 | the group to group the pattern stmts instead. */ |
810 | if (! next |
811 | && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->reduc_idx != -1) |
812 | { |
813 | if (STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p) |
814 | { |
815 | vect_fixup_reduc_chain (first); |
816 | LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains[i] |
817 | = STMT_VINFO_RELATED_STMT (first)(first)->related_stmt; |
818 | } |
819 | } |
820 | /* If not all stmt in the chain are patterns or if we failed |
821 | to update STMT_VINFO_REDUC_IDX dissolve the chain and handle |
822 | it as regular reduction instead. */ |
823 | else |
824 | { |
825 | stmt_vec_info vinfo = first; |
826 | stmt_vec_info last = NULLnullptr; |
827 | while (vinfo) |
828 | { |
829 | next = REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 829, __FUNCTION__), 0 : 0)), (vinfo)->next_element); |
830 | REDUC_GROUP_FIRST_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 830, __FUNCTION__), 0 : 0)), (vinfo)->first_element) = NULLnullptr; |
831 | REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 831, __FUNCTION__), 0 : 0)), (vinfo)->next_element) = NULLnullptr; |
832 | last = vinfo; |
833 | vinfo = next; |
834 | } |
835 | STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->def_type |
836 | = vect_internal_def; |
837 | loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last)); |
838 | LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.unordered_remove (i); |
839 | --i; |
840 | } |
841 | } |
842 | } |
843 | |
844 | /* Function vect_get_loop_niters. |
845 | |
846 | Determine how many iterations the loop is executed and place it |
847 | in NUMBER_OF_ITERATIONS. Place the number of latch iterations |
848 | in NUMBER_OF_ITERATIONSM1. Place the condition under which the |
849 | niter information holds in ASSUMPTIONS. |
850 | |
851 | Return the loop exit condition. */ |
852 | |
853 | |
854 | static gcond * |
855 | vect_get_loop_niters (class loop *loop, tree *assumptions, |
856 | tree *number_of_iterations, tree *number_of_iterationsm1) |
857 | { |
858 | edge exit = single_exit (loop); |
859 | class tree_niter_desc niter_desc; |
860 | tree niter_assumptions, niter, may_be_zero; |
861 | gcond *cond = get_loop_exit_condition (loop); |
862 | |
863 | *assumptions = boolean_true_nodeglobal_trees[TI_BOOLEAN_TRUE]; |
864 | *number_of_iterationsm1 = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; |
865 | *number_of_iterations = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; |
866 | DUMP_VECT_SCOPE ("get_loop_niters")auto_dump_scope scope ("get_loop_niters", vect_location); |
867 | |
868 | if (!exit) |
869 | return cond; |
870 | |
871 | may_be_zero = NULL_TREE(tree) nullptr; |
872 | if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULLnullptr) |
873 | || chrec_contains_undetermined (niter_desc.niter)) |
874 | return cond; |
875 | |
876 | niter_assumptions = niter_desc.assumptions; |
877 | may_be_zero = niter_desc.may_be_zero; |
878 | niter = niter_desc.niter; |
879 | |
880 | if (may_be_zero && integer_zerop (may_be_zero)) |
881 | may_be_zero = NULL_TREE(tree) nullptr; |
882 | |
883 | if (may_be_zero) |
884 | { |
885 | if (COMPARISON_CLASS_P (may_be_zero)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (may_be_zero)->base.code))] == tcc_comparison)) |
886 | { |
887 | /* Try to combine may_be_zero with assumptions, this can simplify |
888 | computation of niter expression. */ |
889 | if (niter_assumptions && !integer_nonzerop (niter_assumptions)) |
890 | niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) |
891 | niter_assumptions,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) |
892 | fold_build1 (TRUTH_NOT_EXPR,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) |
893 | boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) |
894 | may_be_zero))fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ); |
895 | else |
896 | niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ) |
897 | build_int_cst (TREE_TYPE (niter), 0),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ) |
898 | rewrite_to_non_trapping_overflow (niter))fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ); |
899 | |
900 | may_be_zero = NULL_TREE(tree) nullptr; |
901 | } |
902 | else if (integer_nonzerop (may_be_zero)) |
903 | { |
904 | *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 904, __FUNCTION__))->typed.type), 0); |
905 | *number_of_iterations = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 905, __FUNCTION__))->typed.type), 1); |
906 | return cond; |
907 | } |
908 | else |
909 | return cond; |
910 | } |
911 | |
912 | *assumptions = niter_assumptions; |
913 | *number_of_iterationsm1 = niter; |
914 | |
915 | /* We want the number of loop header executions which is the number |
916 | of latch executions plus one. |
917 | ??? For UINT_MAX latch executions this number overflows to zero |
918 | for loops like do { n++; } while (n != 0); */ |
919 | if (niter && !chrec_contains_undetermined (niter)) |
920 | niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 920, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 921, __FUNCTION__))->typed.type), 1) ) |
921 | build_int_cst (TREE_TYPE (niter), 1))fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 920, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 921, __FUNCTION__))->typed.type), 1) ); |
922 | *number_of_iterations = niter; |
923 | |
924 | return cond; |
925 | } |
926 | |
927 | /* Function bb_in_loop_p |
928 | |
929 | Used as predicate for dfs order traversal of the loop bbs. */ |
930 | |
931 | static bool |
932 | bb_in_loop_p (const_basic_block bb, const void *data) |
933 | { |
934 | const class loop *const loop = (const class loop *)data; |
935 | if (flow_bb_inside_loop_p (loop, bb)) |
936 | return true; |
937 | return false; |
938 | } |
939 | |
940 | |
941 | /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as |
942 | stmt_vec_info structs for all the stmts in LOOP_IN. */ |
943 | |
944 | _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) |
945 | : vec_info (vec_info::loop, shared), |
946 | loop (loop_in), |
947 | bbs (XCNEWVEC (basic_block, loop->num_nodes)((basic_block *) xcalloc ((loop->num_nodes), sizeof (basic_block )))), |
948 | num_itersm1 (NULL_TREE(tree) nullptr), |
949 | num_iters (NULL_TREE(tree) nullptr), |
950 | num_iters_unchanged (NULL_TREE(tree) nullptr), |
951 | num_iters_assumptions (NULL_TREE(tree) nullptr), |
952 | vector_costs (nullptr), |
953 | scalar_costs (nullptr), |
954 | th (0), |
955 | versioning_threshold (0), |
956 | vectorization_factor (0), |
957 | main_loop_edge (nullptr), |
958 | skip_main_loop_edge (nullptr), |
959 | skip_this_loop_edge (nullptr), |
960 | reusable_accumulators (), |
961 | suggested_unroll_factor (1), |
962 | max_vectorization_factor (0), |
963 | mask_skip_niters (NULL_TREE(tree) nullptr), |
964 | rgroup_compare_type (NULL_TREE(tree) nullptr), |
965 | simd_if_cond (NULL_TREE(tree) nullptr), |
966 | unaligned_dr (NULLnullptr), |
967 | peeling_for_alignment (0), |
968 | ptr_mask (0), |
969 | ivexpr_map (NULLnullptr), |
970 | scan_map (NULLnullptr), |
971 | slp_unrolling_factor (1), |
972 | inner_loop_cost_factor (param_vect_inner_loop_cost_factorglobal_options.x_param_vect_inner_loop_cost_factor), |
973 | vectorizable (false), |
974 | can_use_partial_vectors_p (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0), |
975 | using_partial_vectors_p (false), |
976 | epil_using_partial_vectors_p (false), |
977 | partial_load_store_bias (0), |
978 | peeling_for_gaps (false), |
979 | peeling_for_niter (false), |
980 | no_data_dependencies (false), |
981 | has_mask_store (false), |
982 | scalar_loop_scaling (profile_probability::uninitialized ()), |
983 | scalar_loop (NULLnullptr), |
984 | orig_loop_info (NULLnullptr) |
985 | { |
986 | /* CHECKME: We want to visit all BBs before their successors (except for |
987 | latch blocks, for which this assertion wouldn't hold). In the simple |
988 | case of the loop forms we allow, a dfs order of the BBs would the same |
989 | as reversed postorder traversal, so we are safe. */ |
990 | |
991 | unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, |
992 | bbs, loop->num_nodes, loop); |
993 | gcc_assert (nbbs == loop->num_nodes)((void)(!(nbbs == loop->num_nodes) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 993, __FUNCTION__), 0 : 0)); |
994 | |
995 | for (unsigned int i = 0; i < nbbs; i++) |
996 | { |
997 | basic_block bb = bbs[i]; |
998 | gimple_stmt_iterator si; |
999 | |
1000 | for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) |
1001 | { |
1002 | gimple *phi = gsi_stmt (si); |
1003 | gimple_set_uid (phi, 0); |
1004 | add_stmt (phi); |
1005 | } |
1006 | |
1007 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) |
1008 | { |
1009 | gimple *stmt = gsi_stmt (si); |
1010 | gimple_set_uid (stmt, 0); |
1011 | if (is_gimple_debug (stmt)) |
1012 | continue; |
1013 | add_stmt (stmt); |
1014 | /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the |
1015 | third argument is the #pragma omp simd if (x) condition, when 0, |
1016 | loop shouldn't be vectorized, when non-zero constant, it should |
1017 | be vectorized normally, otherwise versioned with vectorized loop |
1018 | done if the condition is non-zero at runtime. */ |
1019 | if (loop_in->simduid |
1020 | && is_gimple_call (stmt) |
1021 | && gimple_call_internal_p (stmt) |
1022 | && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE |
1023 | && gimple_call_num_args (stmt) >= 3 |
1024 | && TREE_CODE (gimple_call_arg (stmt, 0))((enum tree_code) (gimple_call_arg (stmt, 0))->base.code) == SSA_NAME |
1025 | && (loop_in->simduid |
1026 | == SSA_NAME_VAR (gimple_call_arg (stmt, 0))((tree_check ((gimple_call_arg (stmt, 0)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1026, __FUNCTION__, (SSA_NAME)))->ssa_name.var == (tree) nullptr || ((enum tree_code) ((gimple_call_arg (stmt, 0))-> ssa_name.var)->base.code) == IDENTIFIER_NODE ? (tree) nullptr : (gimple_call_arg (stmt, 0))->ssa_name.var))) |
1027 | { |
1028 | tree arg = gimple_call_arg (stmt, 2); |
1029 | if (integer_zerop (arg) || TREE_CODE (arg)((enum tree_code) (arg)->base.code) == SSA_NAME) |
1030 | simd_if_cond = arg; |
1031 | else |
1032 | gcc_assert (integer_nonzerop (arg))((void)(!(integer_nonzerop (arg)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1032, __FUNCTION__), 0 : 0)); |
1033 | } |
1034 | } |
1035 | } |
1036 | |
1037 | epilogue_vinfos.create (6); |
1038 | } |
1039 | |
1040 | /* Free all levels of rgroup CONTROLS. */ |
1041 | |
1042 | void |
1043 | release_vec_loop_controls (vec<rgroup_controls> *controls) |
1044 | { |
1045 | rgroup_controls *rgc; |
1046 | unsigned int i; |
1047 | FOR_EACH_VEC_ELT (*controls, i, rgc)for (i = 0; (*controls).iterate ((i), &(rgc)); ++(i)) |
1048 | rgc->controls.release (); |
1049 | controls->release (); |
1050 | } |
1051 | |
1052 | /* Free all memory used by the _loop_vec_info, as well as all the |
1053 | stmt_vec_info structs of all the stmts in the loop. */ |
1054 | |
1055 | _loop_vec_info::~_loop_vec_info () |
1056 | { |
1057 | free (bbs); |
1058 | |
1059 | release_vec_loop_controls (&masks); |
1060 | release_vec_loop_controls (&lens); |
1061 | delete ivexpr_map; |
1062 | delete scan_map; |
1063 | epilogue_vinfos.release (); |
1064 | delete scalar_costs; |
1065 | delete vector_costs; |
1066 | |
1067 | /* When we release an epiloge vinfo that we do not intend to use |
1068 | avoid clearing AUX of the main loop which should continue to |
1069 | point to the main loop vinfo since otherwise we'll leak that. */ |
1070 | if (loop->aux == this) |
1071 | loop->aux = NULLnullptr; |
1072 | } |
1073 | |
1074 | /* Return an invariant or register for EXPR and emit necessary |
1075 | computations in the LOOP_VINFO loop preheader. */ |
1076 | |
1077 | tree |
1078 | cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr) |
1079 | { |
1080 | if (is_gimple_reg (expr) |
1081 | || is_gimple_min_invariant (expr)) |
1082 | return expr; |
1083 | |
1084 | if (! loop_vinfo->ivexpr_map) |
1085 | loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>; |
1086 | tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr); |
1087 | if (! cached) |
1088 | { |
1089 | gimple_seq stmts = NULLnullptr; |
1090 | cached = force_gimple_operand (unshare_expr (expr), |
1091 | &stmts, true, NULL_TREE(tree) nullptr); |
1092 | if (stmts) |
1093 | { |
1094 | edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); |
1095 | gsi_insert_seq_on_edge_immediate (e, stmts); |
1096 | } |
1097 | } |
1098 | return cached; |
1099 | } |
1100 | |
1101 | /* Return true if we can use CMP_TYPE as the comparison type to produce |
1102 | all masks required to mask LOOP_VINFO. */ |
1103 | |
1104 | static bool |
1105 | can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type) |
1106 | { |
1107 | rgroup_controls *rgm; |
1108 | unsigned int i; |
1109 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm) ); ++(i)) |
1110 | if (rgm->type != NULL_TREE(tree) nullptr |
1111 | && !direct_internal_fn_supported_p (IFN_WHILE_ULT, |
1112 | cmp_type, rgm->type, |
1113 | OPTIMIZE_FOR_SPEED)) |
1114 | return false; |
1115 | return true; |
1116 | } |
1117 | |
1118 | /* Calculate the maximum number of scalars per iteration for every |
1119 | rgroup in LOOP_VINFO. */ |
1120 | |
1121 | static unsigned int |
1122 | vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo) |
1123 | { |
1124 | unsigned int res = 1; |
1125 | unsigned int i; |
1126 | rgroup_controls *rgm; |
1127 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm) ); ++(i)) |
1128 | res = MAX (res, rgm->max_nscalars_per_iter)((res) > (rgm->max_nscalars_per_iter) ? (res) : (rgm-> max_nscalars_per_iter)); |
1129 | return res; |
1130 | } |
1131 | |
1132 | /* Calculate the minimum precision necessary to represent: |
1133 | |
1134 | MAX_NITERS * FACTOR |
1135 | |
1136 | as an unsigned integer, where MAX_NITERS is the maximum number of |
1137 | loop header iterations for the original scalar form of LOOP_VINFO. */ |
1138 | |
1139 | static unsigned |
1140 | vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor) |
1141 | { |
1142 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1143 | |
1144 | /* Get the maximum number of iterations that is representable |
1145 | in the counter type. */ |
1146 | tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo))((contains_struct_check (((loop_vinfo)->num_itersm1), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1146, __FUNCTION__))->typed.type); |
1147 | widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)((tree_check5 ((ni_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1147, __FUNCTION__, (INTEGER_TYPE), (ENUMERAL_TYPE), (BOOLEAN_TYPE ), (REAL_TYPE), (FIXED_POINT_TYPE)))->type_non_common.maxval )) + 1; |
1148 | |
1149 | /* Get a more refined estimate for the number of iterations. */ |
1150 | widest_int max_back_edges; |
1151 | if (max_loop_iterations (loop, &max_back_edges)) |
1152 | max_ni = wi::smin (max_ni, max_back_edges + 1); |
1153 | |
1154 | /* Work out how many bits we need to represent the limit. */ |
1155 | return wi::min_precision (max_ni * factor, UNSIGNED); |
1156 | } |
1157 | |
1158 | /* True if the loop needs peeling or partial vectors when vectorized. */ |
1159 | |
1160 | static bool |
1161 | vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo) |
1162 | { |
1163 | unsigned HOST_WIDE_INTlong const_vf; |
1164 | HOST_WIDE_INTlong max_niter |
1165 | = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); |
1166 | |
1167 | unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th; |
1168 | if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) |
1169 | th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO((loop_vinfo)->orig_loop_info)->th |
1170 | (loop_vinfo))((loop_vinfo)->orig_loop_info)->th; |
1171 | |
1172 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) |
1173 | && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment >= 0) |
1174 | { |
1175 | /* Work out the (constant) number of iterations that need to be |
1176 | peeled for reasons other than niters. */ |
1177 | unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; |
1178 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps) |
1179 | peel_niter += 1; |
1180 | if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1180, __FUNCTION__)))) - peel_niter, |
1181 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor)) |
1182 | return true; |
1183 | } |
1184 | else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment |
1185 | /* ??? When peeling for gaps but not alignment, we could |
1186 | try to check whether the (variable) niters is known to be |
1187 | VF * N + 1. That's something of a niche case though. */ |
1188 | || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps |
1189 | || !LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor.is_constant (&const_vf) |
1190 | || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters) |
1191 | < (unsigned) exact_log2 (const_vf)) |
1192 | /* In case of versioning, check if the maximum number of |
1193 | iterations is greater than th. If they are identical, |
1194 | the epilogue is unnecessary. */ |
1195 | && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) |
1196 | || ((unsigned HOST_WIDE_INTlong) max_niter |
1197 | > (th / const_vf) * const_vf)))) |
1198 | return true; |
1199 | |
1200 | return false; |
1201 | } |
1202 | |
1203 | /* Each statement in LOOP_VINFO can be masked where necessary. Check |
1204 | whether we can actually generate the masks required. Return true if so, |
1205 | storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */ |
1206 | |
1207 | static bool |
1208 | vect_verify_full_masking (loop_vec_info loop_vinfo) |
1209 | { |
1210 | unsigned int min_ni_width; |
1211 | unsigned int max_nscalars_per_iter |
1212 | = vect_get_max_nscalars_per_iter (loop_vinfo); |
1213 | |
1214 | /* Use a normal loop if there are no statements that need masking. |
1215 | This only happens in rare degenerate cases: it means that the loop |
1216 | has no loads, no stores, and no live-out values. */ |
1217 | if (LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ()) |
1218 | return false; |
1219 | |
1220 | /* Work out how many bits we need to represent the limit. */ |
1221 | min_ni_width |
1222 | = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter); |
1223 | |
1224 | /* Find a scalar mode for which WHILE_ULT is supported. */ |
1225 | opt_scalar_int_mode cmp_mode_iter; |
1226 | tree cmp_type = NULL_TREE(tree) nullptr; |
1227 | tree iv_type = NULL_TREE(tree) nullptr; |
1228 | widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo); |
1229 | unsigned int iv_precision = UINT_MAX(2147483647 *2U +1U); |
1230 | |
1231 | if (iv_limit != -1) |
1232 | iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter, |
1233 | UNSIGNED); |
1234 | |
1235 | FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)for (mode_iterator::start (&(cmp_mode_iter), MODE_INT); mode_iterator ::iterate_p (&(cmp_mode_iter)); mode_iterator::get_next ( &(cmp_mode_iter))) |
1236 | { |
1237 | unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); |
1238 | if (cmp_bits >= min_ni_width |
1239 | && targetm.scalar_mode_supported_p (cmp_mode_iter.require ())) |
1240 | { |
1241 | tree this_type = build_nonstandard_integer_type (cmp_bits, true); |
1242 | if (this_type |
1243 | && can_produce_all_loop_masks_p (loop_vinfo, this_type)) |
1244 | { |
1245 | /* Although we could stop as soon as we find a valid mode, |
1246 | there are at least two reasons why that's not always the |
1247 | best choice: |
1248 | |
1249 | - An IV that's Pmode or wider is more likely to be reusable |
1250 | in address calculations than an IV that's narrower than |
1251 | Pmode. |
1252 | |
1253 | - Doing the comparison in IV_PRECISION or wider allows |
1254 | a natural 0-based IV, whereas using a narrower comparison |
1255 | type requires mitigations against wrap-around. |
1256 | |
1257 | Conversely, if the IV limit is variable, doing the comparison |
1258 | in a wider type than the original type can introduce |
1259 | unnecessary extensions, so picking the widest valid mode |
1260 | is not always a good choice either. |
1261 | |
1262 | Here we prefer the first IV type that's Pmode or wider, |
1263 | and the first comparison type that's IV_PRECISION or wider. |
1264 | (The comparison type must be no wider than the IV type, |
1265 | to avoid extensions in the vector loop.) |
1266 | |
1267 | ??? We might want to try continuing beyond Pmode for ILP32 |
1268 | targets if CMP_BITS < IV_PRECISION. */ |
1269 | iv_type = this_type; |
1270 | if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)((tree_class_check ((cmp_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1270, __FUNCTION__))->type_common.precision)) |
1271 | cmp_type = this_type; |
1272 | if (cmp_bits >= GET_MODE_BITSIZE (Pmode(global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ( (scalar_int_mode::from_int) E_DImode)) : (scalar_int_mode ((scalar_int_mode ::from_int) E_SImode))))) |
1273 | break; |
1274 | } |
1275 | } |
1276 | } |
1277 | |
1278 | if (!cmp_type) |
1279 | return false; |
1280 | |
1281 | LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = cmp_type; |
1282 | LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type; |
1283 | return true; |
1284 | } |
1285 | |
1286 | /* Check whether we can use vector access with length based on precison |
1287 | comparison. So far, to keep it simple, we only allow the case that the |
1288 | precision of the target supported length is larger than the precision |
1289 | required by loop niters. */ |
1290 | |
1291 | static bool |
1292 | vect_verify_loop_lens (loop_vec_info loop_vinfo) |
1293 | { |
1294 | if (LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ()) |
1295 | return false; |
1296 | |
1297 | machine_mode len_load_mode = get_len_load_store_mode |
1298 | (loop_vinfo->vector_mode, true).require (); |
1299 | machine_mode len_store_mode = get_len_load_store_mode |
1300 | (loop_vinfo->vector_mode, false).require (); |
1301 | |
1302 | signed char partial_load_bias = internal_len_load_store_bias |
1303 | (IFN_LEN_LOAD, len_load_mode); |
1304 | |
1305 | signed char partial_store_bias = internal_len_load_store_bias |
1306 | (IFN_LEN_STORE, len_store_mode); |
1307 | |
1308 | gcc_assert (partial_load_bias == partial_store_bias)((void)(!(partial_load_bias == partial_store_bias) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1308, __FUNCTION__), 0 : 0)); |
1309 | |
1310 | if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED127) |
1311 | return false; |
1312 | |
1313 | /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit |
1314 | len_loads with a length of zero. In order to avoid that we prohibit |
1315 | more than one loop length here. */ |
1316 | if (partial_load_bias == -1 |
1317 | && LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.length () > 1) |
1318 | return false; |
1319 | |
1320 | LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)(loop_vinfo)->partial_load_store_bias = partial_load_bias; |
1321 | |
1322 | unsigned int max_nitems_per_iter = 1; |
1323 | unsigned int i; |
1324 | rgroup_controls *rgl; |
1325 | /* Find the maximum number of items per iteration for every rgroup. */ |
1326 | FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)for (i = 0; ((loop_vinfo)->lens).iterate ((i), &(rgl)) ; ++(i)) |
1327 | { |
1328 | unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor; |
1329 | max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter)((max_nitems_per_iter) > (nitems_per_iter) ? (max_nitems_per_iter ) : (nitems_per_iter)); |
1330 | } |
1331 | |
1332 | /* Work out how many bits we need to represent the length limit. */ |
1333 | unsigned int min_ni_prec |
1334 | = vect_min_prec_for_max_niters (loop_vinfo, max_nitems_per_iter); |
1335 | |
1336 | /* Now use the maximum of below precisions for one suitable IV type: |
1337 | - the IV's natural precision |
1338 | - the precision needed to hold: the maximum number of scalar |
1339 | iterations multiplied by the scale factor (min_ni_prec above) |
1340 | - the Pmode precision |
1341 | |
1342 | If min_ni_prec is less than the precision of the current niters, |
1343 | we perfer to still use the niters type. Prefer to use Pmode and |
1344 | wider IV to avoid narrow conversions. */ |
1345 | |
1346 | unsigned int ni_prec |
1347 | = TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)))((tree_class_check ((((contains_struct_check (((loop_vinfo)-> num_iters), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1347, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1347, __FUNCTION__))->type_common.precision); |
1348 | min_ni_prec = MAX (min_ni_prec, ni_prec)((min_ni_prec) > (ni_prec) ? (min_ni_prec) : (ni_prec)); |
1349 | min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode))((min_ni_prec) > (GET_MODE_BITSIZE ((global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode )) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode)) ))) ? (min_ni_prec) : (GET_MODE_BITSIZE ((global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode )) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode)) )))); |
1350 | |
1351 | tree iv_type = NULL_TREE(tree) nullptr; |
1352 | opt_scalar_int_mode tmode_iter; |
1353 | FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)for (mode_iterator::start (&(tmode_iter), MODE_INT); mode_iterator ::iterate_p (&(tmode_iter)); mode_iterator::get_next (& (tmode_iter))) |
1354 | { |
1355 | scalar_mode tmode = tmode_iter.require (); |
1356 | unsigned int tbits = GET_MODE_BITSIZE (tmode); |
1357 | |
1358 | /* ??? Do we really want to construct one IV whose precision exceeds |
1359 | BITS_PER_WORD? */ |
1360 | if (tbits > BITS_PER_WORD((8) * (((global_options.x_ix86_isa_flags & (1UL << 1)) != 0) ? 8 : 4))) |
1361 | break; |
1362 | |
1363 | /* Find the first available standard integral type. */ |
1364 | if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode)) |
1365 | { |
1366 | iv_type = build_nonstandard_integer_type (tbits, true); |
1367 | break; |
1368 | } |
1369 | } |
1370 | |
1371 | if (!iv_type) |
1372 | { |
1373 | if (dump_enabled_p ()) |
1374 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1375 | "can't vectorize with length-based partial vectors" |
1376 | " because there is no suitable iv type.\n"); |
1377 | return false; |
1378 | } |
1379 | |
1380 | LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = iv_type; |
1381 | LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type; |
1382 | |
1383 | return true; |
1384 | } |
1385 | |
1386 | /* Calculate the cost of one scalar iteration of the loop. */ |
1387 | static void |
1388 | vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) |
1389 | { |
1390 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1391 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; |
1392 | int nbbs = loop->num_nodes, factor; |
1393 | int innerloop_iters, i; |
1394 | |
1395 | DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost")auto_dump_scope scope ("vect_compute_single_scalar_iteration_cost" , vect_location); |
1396 | |
1397 | /* Gather costs for statements in the scalar loop. */ |
1398 | |
1399 | /* FORNOW. */ |
1400 | innerloop_iters = 1; |
1401 | if (loop->inner) |
1402 | innerloop_iters = LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)(loop_vinfo)->inner_loop_cost_factor; |
1403 | |
1404 | for (i = 0; i < nbbs; i++) |
1405 | { |
1406 | gimple_stmt_iterator si; |
1407 | basic_block bb = bbs[i]; |
1408 | |
1409 | if (bb->loop_father == loop->inner) |
1410 | factor = innerloop_iters; |
1411 | else |
1412 | factor = 1; |
1413 | |
1414 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) |
1415 | { |
1416 | gimple *stmt = gsi_stmt (si); |
1417 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); |
1418 | |
1419 | if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) |
1420 | continue; |
1421 | |
1422 | /* Skip stmts that are not vectorized inside the loop. */ |
1423 | stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info); |
1424 | if (!STMT_VINFO_RELEVANT_P (vstmt_info)((vstmt_info)->relevant != vect_unused_in_scope) |
1425 | && (!STMT_VINFO_LIVE_P (vstmt_info)(vstmt_info)->live |
1426 | || !VECTORIZABLE_CYCLE_DEF((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info )->def_type) == vect_double_reduction_def) || (((vstmt_info )->def_type) == vect_nested_cycle)) |
1427 | (STMT_VINFO_DEF_TYPE (vstmt_info))((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info )->def_type) == vect_double_reduction_def) || (((vstmt_info )->def_type) == vect_nested_cycle)))) |
1428 | continue; |
1429 | |
1430 | vect_cost_for_stmt kind; |
1431 | if (STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0)) |
1432 | { |
1433 | if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))(((stmt_info)->dr_aux.dr + 0))->is_read) |
1434 | kind = scalar_load; |
1435 | else |
1436 | kind = scalar_store; |
1437 | } |
1438 | else if (vect_nop_conversion_p (stmt_info)) |
1439 | continue; |
1440 | else |
1441 | kind = scalar_stmt; |
1442 | |
1443 | /* We are using vect_prologue here to avoid scaling twice |
1444 | by the inner loop factor. */ |
1445 | record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec, |
1446 | factor, kind, stmt_info, 0, vect_prologue); |
1447 | } |
1448 | } |
1449 | |
1450 | /* Now accumulate cost. */ |
1451 | loop_vinfo->scalar_costs = init_cost (loop_vinfo, true); |
1452 | add_stmt_costs (loop_vinfo->scalar_costs, |
1453 | &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec); |
1454 | loop_vinfo->scalar_costs->finish_cost (nullptr); |
1455 | } |
1456 | |
1457 | |
1458 | /* Function vect_analyze_loop_form. |
1459 | |
1460 | Verify that certain CFG restrictions hold, including: |
1461 | - the loop has a pre-header |
1462 | - the loop has a single entry and exit |
1463 | - the loop exit condition is simple enough |
1464 | - the number of iterations can be analyzed, i.e, a countable loop. The |
1465 | niter could be analyzed under some assumptions. */ |
1466 | |
1467 | opt_result |
1468 | vect_analyze_loop_form (class loop *loop, vect_loop_form_info *info) |
1469 | { |
1470 | DUMP_VECT_SCOPE ("vect_analyze_loop_form")auto_dump_scope scope ("vect_analyze_loop_form", vect_location ); |
1471 | |
1472 | /* Different restrictions apply when we are considering an inner-most loop, |
1473 | vs. an outer (nested) loop. |
1474 | (FORNOW. May want to relax some of these restrictions in the future). */ |
1475 | |
1476 | info->inner_loop_cond = NULLnullptr; |
1477 | if (!loop->inner) |
1478 | { |
1479 | /* Inner-most loop. We currently require that the number of BBs is |
1480 | exactly 2 (the header and latch). Vectorizable inner-most loops |
1481 | look like this: |
1482 | |
1483 | (pre-header) |
1484 | | |
1485 | header <--------+ |
1486 | | | | |
1487 | | +--> latch --+ |
1488 | | |
1489 | (exit-bb) */ |
1490 | |
1491 | if (loop->num_nodes != 2) |
1492 | return opt_result::failure_at (vect_location, |
1493 | "not vectorized:" |
1494 | " control flow in loop.\n"); |
1495 | |
1496 | if (empty_block_p (loop->header)) |
1497 | return opt_result::failure_at (vect_location, |
1498 | "not vectorized: empty loop.\n"); |
1499 | } |
1500 | else |
1501 | { |
1502 | class loop *innerloop = loop->inner; |
1503 | edge entryedge; |
1504 | |
1505 | /* Nested loop. We currently require that the loop is doubly-nested, |
1506 | contains a single inner loop, and the number of BBs is exactly 5. |
1507 | Vectorizable outer-loops look like this: |
1508 | |
1509 | (pre-header) |
1510 | | |
1511 | header <---+ |
1512 | | | |
1513 | inner-loop | |
1514 | | | |
1515 | tail ------+ |
1516 | | |
1517 | (exit-bb) |
1518 | |
1519 | The inner-loop has the properties expected of inner-most loops |
1520 | as described above. */ |
1521 | |
1522 | if ((loop->inner)->inner || (loop->inner)->next) |
1523 | return opt_result::failure_at (vect_location, |
1524 | "not vectorized:" |
1525 | " multiple nested loops.\n"); |
1526 | |
1527 | if (loop->num_nodes != 5) |
1528 | return opt_result::failure_at (vect_location, |
1529 | "not vectorized:" |
1530 | " control flow in loop.\n"); |
1531 | |
1532 | entryedge = loop_preheader_edge (innerloop); |
1533 | if (entryedge->src != loop->header |
1534 | || !single_exit (innerloop) |
1535 | || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)(*(loop->latch)->preds)[(0)]->src) |
1536 | return opt_result::failure_at (vect_location, |
1537 | "not vectorized:" |
1538 | " unsupported outerloop form.\n"); |
1539 | |
1540 | /* Analyze the inner-loop. */ |
1541 | vect_loop_form_info inner; |
1542 | opt_result res = vect_analyze_loop_form (loop->inner, &inner); |
1543 | if (!res) |
1544 | { |
1545 | if (dump_enabled_p ()) |
1546 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1547 | "not vectorized: Bad inner loop.\n"); |
1548 | return res; |
1549 | } |
1550 | |
1551 | /* Don't support analyzing niter under assumptions for inner |
1552 | loop. */ |
1553 | if (!integer_onep (inner.assumptions)) |
1554 | return opt_result::failure_at (vect_location, |
1555 | "not vectorized: Bad inner loop.\n"); |
1556 | |
1557 | if (!expr_invariant_in_loop_p (loop, inner.number_of_iterations)) |
1558 | return opt_result::failure_at (vect_location, |
1559 | "not vectorized: inner-loop count not" |
1560 | " invariant.\n"); |
1561 | |
1562 | if (dump_enabled_p ()) |
1563 | dump_printf_loc (MSG_NOTE, vect_location, |
1564 | "Considering outer-loop vectorization.\n"); |
1565 | info->inner_loop_cond = inner.loop_cond; |
1566 | } |
1567 | |
1568 | if (!single_exit (loop)) |
1569 | return opt_result::failure_at (vect_location, |
1570 | "not vectorized: multiple exits.\n"); |
1571 | if (EDGE_COUNT (loop->header->preds)vec_safe_length (loop->header->preds) != 2) |
1572 | return opt_result::failure_at (vect_location, |
1573 | "not vectorized:" |
1574 | " too many incoming edges.\n"); |
1575 | |
1576 | /* We assume that the loop exit condition is at the end of the loop. i.e, |
1577 | that the loop is represented as a do-while (with a proper if-guard |
1578 | before the loop if needed), where the loop header contains all the |
1579 | executable statements, and the latch is empty. */ |
1580 | if (!empty_block_p (loop->latch) |
1581 | || !gimple_seq_empty_p (phi_nodes (loop->latch))) |
1582 | return opt_result::failure_at (vect_location, |
1583 | "not vectorized: latch block not empty.\n"); |
1584 | |
1585 | /* Make sure the exit is not abnormal. */ |
1586 | edge e = single_exit (loop); |
1587 | if (e->flags & EDGE_ABNORMAL) |
1588 | return opt_result::failure_at (vect_location, |
1589 | "not vectorized:" |
1590 | " abnormal loop exit edge.\n"); |
1591 | |
1592 | info->loop_cond |
1593 | = vect_get_loop_niters (loop, &info->assumptions, |
1594 | &info->number_of_iterations, |
1595 | &info->number_of_iterationsm1); |
1596 | if (!info->loop_cond) |
1597 | return opt_result::failure_at |
1598 | (vect_location, |
1599 | "not vectorized: complicated exit condition.\n"); |
1600 | |
1601 | if (integer_zerop (info->assumptions) |
1602 | || !info->number_of_iterations |
1603 | || chrec_contains_undetermined (info->number_of_iterations)) |
1604 | return opt_result::failure_at |
1605 | (info->loop_cond, |
1606 | "not vectorized: number of iterations cannot be computed.\n"); |
1607 | |
1608 | if (integer_zerop (info->number_of_iterations)) |
1609 | return opt_result::failure_at |
1610 | (info->loop_cond, |
1611 | "not vectorized: number of iterations = 0.\n"); |
1612 | |
1613 | if (!(tree_fits_shwi_p (info->number_of_iterations) |
1614 | && tree_to_shwi (info->number_of_iterations) > 0)) |
1615 | { |
1616 | if (dump_enabled_p ()) |
1617 | { |
1618 | dump_printf_loc (MSG_NOTE, vect_location, |
1619 | "Symbolic number of iterations is "); |
1620 | dump_generic_expr (MSG_NOTE, TDF_DETAILS, info->number_of_iterations); |
1621 | dump_printf (MSG_NOTE, "\n"); |
1622 | } |
1623 | } |
1624 | |
1625 | return opt_result::success (); |
1626 | } |
1627 | |
1628 | /* Create a loop_vec_info for LOOP with SHARED and the |
1629 | vect_analyze_loop_form result. */ |
1630 | |
1631 | loop_vec_info |
1632 | vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, |
1633 | const vect_loop_form_info *info, |
1634 | loop_vec_info main_loop_info) |
1635 | { |
1636 | loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared); |
1637 | LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1 = info->number_of_iterationsm1; |
1638 | LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters = info->number_of_iterations; |
1639 | LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo)(loop_vinfo)->num_iters_unchanged = info->number_of_iterations; |
1640 | LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = main_loop_info; |
1641 | /* Also record the assumptions for versioning. */ |
1642 | if (!integer_onep (info->assumptions) && !main_loop_info) |
1643 | LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo)(loop_vinfo)->num_iters_assumptions = info->assumptions; |
1644 | |
1645 | stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (info->loop_cond); |
1646 | STMT_VINFO_TYPE (loop_cond_info)(loop_cond_info)->type = loop_exit_ctrl_vec_info_type; |
1647 | if (info->inner_loop_cond) |
1648 | { |
1649 | stmt_vec_info inner_loop_cond_info |
1650 | = loop_vinfo->lookup_stmt (info->inner_loop_cond); |
1651 | STMT_VINFO_TYPE (inner_loop_cond_info)(inner_loop_cond_info)->type = loop_exit_ctrl_vec_info_type; |
1652 | /* If we have an estimate on the number of iterations of the inner |
1653 | loop use that to limit the scale for costing, otherwise use |
1654 | --param vect-inner-loop-cost-factor literally. */ |
1655 | widest_int nit; |
1656 | if (estimated_stmt_executions (loop->inner, &nit)) |
1657 | LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)(loop_vinfo)->inner_loop_cost_factor |
1658 | = wi::smin (nit, param_vect_inner_loop_cost_factorglobal_options.x_param_vect_inner_loop_cost_factor).to_uhwi (); |
1659 | } |
1660 | |
1661 | return loop_vinfo; |
1662 | } |
1663 | |
1664 | |
1665 | |
1666 | /* Scan the loop stmts and dependent on whether there are any (non-)SLP |
1667 | statements update the vectorization factor. */ |
1668 | |
1669 | static void |
1670 | vect_update_vf_for_slp (loop_vec_info loop_vinfo) |
1671 | { |
1672 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1673 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; |
1674 | int nbbs = loop->num_nodes; |
1675 | poly_uint64 vectorization_factor; |
1676 | int i; |
1677 | |
1678 | DUMP_VECT_SCOPE ("vect_update_vf_for_slp")auto_dump_scope scope ("vect_update_vf_for_slp", vect_location ); |
1679 | |
1680 | vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
1681 | gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1681, __FUNCTION__), 0 : 0)); |
1682 | |
1683 | /* If all the stmts in the loop can be SLPed, we perform only SLP, and |
1684 | vectorization factor of the loop is the unrolling factor required by |
1685 | the SLP instances. If that unrolling factor is 1, we say, that we |
1686 | perform pure SLP on loop - cross iteration parallelism is not |
1687 | exploited. */ |
1688 | bool only_slp_in_loop = true; |
1689 | for (i = 0; i < nbbs; i++) |
1690 | { |
1691 | basic_block bb = bbs[i]; |
1692 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); |
1693 | gsi_next (&si)) |
1694 | { |
1695 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ()); |
1696 | if (!stmt_info) |
1697 | continue; |
1698 | if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) |
1699 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) |
1700 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1701 | /* STMT needs both SLP and loop-based vectorization. */ |
1702 | only_slp_in_loop = false; |
1703 | } |
1704 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
1705 | gsi_next (&si)) |
1706 | { |
1707 | if (is_gimple_debug (gsi_stmt (si))) |
1708 | continue; |
1709 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
1710 | stmt_info = vect_stmt_to_vectorize (stmt_info); |
1711 | if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) |
1712 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) |
1713 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1714 | /* STMT needs both SLP and loop-based vectorization. */ |
1715 | only_slp_in_loop = false; |
1716 | } |
1717 | } |
1718 | |
1719 | if (only_slp_in_loop) |
1720 | { |
1721 | if (dump_enabled_p ()) |
1722 | dump_printf_loc (MSG_NOTE, vect_location, |
1723 | "Loop contains only SLP stmts\n"); |
1724 | vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor; |
1725 | } |
1726 | else |
1727 | { |
1728 | if (dump_enabled_p ()) |
1729 | dump_printf_loc (MSG_NOTE, vect_location, |
1730 | "Loop contains SLP and non-SLP stmts\n"); |
1731 | /* Both the vectorization factor and unroll factor have the form |
1732 | GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X, |
1733 | so they must have a common multiple. */ |
1734 | vectorization_factor |
1735 | = force_common_multiple (vectorization_factor, |
1736 | LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor); |
1737 | } |
1738 | |
1739 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor; |
1740 | if (dump_enabled_p ()) |
1741 | { |
1742 | dump_printf_loc (MSG_NOTE, vect_location, |
1743 | "Updating vectorization factor to "); |
1744 | dump_dec (MSG_NOTE, vectorization_factor); |
1745 | dump_printf (MSG_NOTE, ".\n"); |
1746 | } |
1747 | } |
1748 | |
1749 | /* Return true if STMT_INFO describes a double reduction phi and if |
1750 | the other phi in the reduction is also relevant for vectorization. |
1751 | This rejects cases such as: |
1752 | |
1753 | outer1: |
1754 | x_1 = PHI <x_3(outer2), ...>; |
1755 | ... |
1756 | |
1757 | inner: |
1758 | x_2 = ...; |
1759 | ... |
1760 | |
1761 | outer2: |
1762 | x_3 = PHI <x_2(inner)>; |
1763 | |
1764 | if nothing in x_2 or elsewhere makes x_1 relevant. */ |
1765 | |
1766 | static bool |
1767 | vect_active_double_reduction_p (stmt_vec_info stmt_info) |
1768 | { |
1769 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_double_reduction_def) |
1770 | return false; |
1771 | |
1772 | return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info))(((stmt_info)->reduc_def)->relevant != vect_unused_in_scope ); |
1773 | } |
1774 | |
1775 | /* Function vect_analyze_loop_operations. |
1776 | |
1777 | Scan the loop stmts and make sure they are all vectorizable. */ |
1778 | |
1779 | static opt_result |
1780 | vect_analyze_loop_operations (loop_vec_info loop_vinfo) |
1781 | { |
1782 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1783 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; |
1784 | int nbbs = loop->num_nodes; |
1785 | int i; |
1786 | stmt_vec_info stmt_info; |
1787 | bool need_to_vectorize = false; |
1788 | bool ok; |
1789 | |
1790 | DUMP_VECT_SCOPE ("vect_analyze_loop_operations")auto_dump_scope scope ("vect_analyze_loop_operations", vect_location ); |
1791 | |
1792 | auto_vec<stmt_info_for_cost> cost_vec; |
1793 | |
1794 | for (i = 0; i < nbbs; i++) |
1795 | { |
1796 | basic_block bb = bbs[i]; |
1797 | |
1798 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); |
1799 | gsi_next (&si)) |
1800 | { |
1801 | gphi *phi = si.phi (); |
1802 | ok = true; |
1803 | |
1804 | stmt_info = loop_vinfo->lookup_stmt (phi); |
1805 | if (dump_enabled_p ()) |
1806 | dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", |
1807 | (gimple *) phi); |
1808 | if (virtual_operand_p (gimple_phi_result (phi))) |
1809 | continue; |
1810 | |
1811 | /* Inner-loop loop-closed exit phi in outer-loop vectorization |
1812 | (i.e., a phi in the tail of the outer-loop). */ |
1813 | if (! is_loop_header_bb_p (bb)) |
1814 | { |
1815 | /* FORNOW: we currently don't support the case that these phis |
1816 | are not used in the outerloop (unless it is double reduction, |
1817 | i.e., this phi is vect_reduction_def), cause this case |
1818 | requires to actually do something here. */ |
1819 | if (STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live |
1820 | && !vect_active_double_reduction_p (stmt_info)) |
1821 | return opt_result::failure_at (phi, |
1822 | "Unsupported loop-closed phi" |
1823 | " in outer-loop.\n"); |
1824 | |
1825 | /* If PHI is used in the outer loop, we check that its operand |
1826 | is defined in the inner loop. */ |
1827 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) |
1828 | { |
1829 | tree phi_op; |
1830 | |
1831 | if (gimple_phi_num_args (phi) != 1) |
1832 | return opt_result::failure_at (phi, "unsupported phi"); |
1833 | |
1834 | phi_op = PHI_ARG_DEF (phi, 0)gimple_phi_arg_def ((phi), (0)); |
1835 | stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); |
1836 | if (!op_def_info) |
1837 | return opt_result::failure_at (phi, "unsupported phi\n"); |
1838 | |
1839 | if (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant != vect_used_in_outer |
1840 | && (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant |
1841 | != vect_used_in_outer_by_reduction)) |
1842 | return opt_result::failure_at (phi, "unsupported phi\n"); |
1843 | |
1844 | if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_internal_def |
1845 | || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type |
1846 | == vect_double_reduction_def)) |
1847 | && !vectorizable_lc_phi (loop_vinfo, |
1848 | stmt_info, NULLnullptr, NULLnullptr)) |
1849 | return opt_result::failure_at (phi, "unsupported phi\n"); |
1850 | } |
1851 | |
1852 | continue; |
1853 | } |
1854 | |
1855 | gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1855, __FUNCTION__), 0 : 0)); |
1856 | |
1857 | if ((STMT_VINFO_RELEVANT (stmt_info)(stmt_info)->relevant == vect_used_in_scope |
1858 | || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) |
1859 | && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_induction_def |
1860 | && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_first_order_recurrence) |
1861 | /* A scalar-dependence cycle that we don't support. */ |
1862 | return opt_result::failure_at (phi, |
1863 | "not vectorized:" |
1864 | " scalar dependence cycle.\n"); |
1865 | |
1866 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) |
1867 | { |
1868 | need_to_vectorize = true; |
1869 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_induction_def |
1870 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1871 | ok = vectorizable_induction (loop_vinfo, |
1872 | stmt_info, NULLnullptr, NULLnullptr, |
1873 | &cost_vec); |
1874 | else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def |
1875 | || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type |
1876 | == vect_double_reduction_def) |
1877 | || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle) |
1878 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1879 | ok = vectorizable_reduction (loop_vinfo, |
1880 | stmt_info, NULLnullptr, NULLnullptr, &cost_vec); |
1881 | else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type |
1882 | == vect_first_order_recurrence) |
1883 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1884 | ok = vectorizable_recurr (loop_vinfo, stmt_info, NULLnullptr, NULLnullptr, |
1885 | &cost_vec); |
1886 | } |
1887 | |
1888 | /* SLP PHIs are tested by vect_slp_analyze_node_operations. */ |
1889 | if (ok |
1890 | && STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live |
1891 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1892 | ok = vectorizable_live_operation (loop_vinfo, |
1893 | stmt_info, NULLnullptr, NULLnullptr, NULLnullptr, |
1894 | -1, false, &cost_vec); |
1895 | |
1896 | if (!ok) |
1897 | return opt_result::failure_at (phi, |
1898 | "not vectorized: relevant phi not " |
1899 | "supported: %G", |
1900 | static_cast <gimple *> (phi)); |
1901 | } |
1902 | |
1903 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
1904 | gsi_next (&si)) |
1905 | { |
1906 | gimple *stmt = gsi_stmt (si); |
1907 | if (!gimple_clobber_p (stmt) |
1908 | && !is_gimple_debug (stmt)) |
1909 | { |
1910 | opt_result res |
1911 | = vect_analyze_stmt (loop_vinfo, |
1912 | loop_vinfo->lookup_stmt (stmt), |
1913 | &need_to_vectorize, |
1914 | NULLnullptr, NULLnullptr, &cost_vec); |
1915 | if (!res) |
1916 | return res; |
1917 | } |
1918 | } |
1919 | } /* bbs */ |
1920 | |
1921 | add_stmt_costs (loop_vinfo->vector_costs, &cost_vec); |
1922 | |
1923 | /* All operations in the loop are either irrelevant (deal with loop |
1924 | control, or dead), or only used outside the loop and can be moved |
1925 | out of the loop (e.g. invariants, inductions). The loop can be |
1926 | optimized away by scalar optimizations. We're better off not |
1927 | touching this loop. */ |
1928 | if (!need_to_vectorize) |
1929 | { |
1930 | if (dump_enabled_p ()) |
1931 | dump_printf_loc (MSG_NOTE, vect_location, |
1932 | "All the computation can be taken out of the loop.\n"); |
1933 | return opt_result::failure_at |
1934 | (vect_location, |
1935 | "not vectorized: redundant loop. no profit to vectorize.\n"); |
1936 | } |
1937 | |
1938 | return opt_result::success (); |
1939 | } |
1940 | |
1941 | /* Return true if we know that the iteration count is smaller than the |
1942 | vectorization factor. Return false if it isn't, or if we can't be sure |
1943 | either way. */ |
1944 | |
1945 | static bool |
1946 | vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo) |
1947 | { |
1948 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); |
1949 | |
1950 | HOST_WIDE_INTlong max_niter; |
1951 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) |
1952 | max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1952, __FUNCTION__)))); |
1953 | else |
1954 | max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); |
1955 | |
1956 | if (max_niter != -1 && (unsigned HOST_WIDE_INTlong) max_niter < assumed_vf) |
1957 | return true; |
1958 | |
1959 | return false; |
1960 | } |
1961 | |
1962 | /* Analyze the cost of the loop described by LOOP_VINFO. Decide if it |
1963 | is worthwhile to vectorize. Return 1 if definitely yes, 0 if |
1964 | definitely no, or -1 if it's worth retrying. */ |
1965 | |
1966 | static int |
1967 | vect_analyze_loop_costing (loop_vec_info loop_vinfo, |
1968 | unsigned *suggested_unroll_factor) |
1969 | { |
1970 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1971 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); |
1972 | |
1973 | /* Only loops that can handle partially-populated vectors can have iteration |
1974 | counts less than the vectorization factor. */ |
1975 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
1976 | { |
1977 | if (vect_known_niters_smaller_than_vf (loop_vinfo)) |
1978 | { |
1979 | if (dump_enabled_p ()) |
1980 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1981 | "not vectorized: iteration count smaller than " |
1982 | "vectorization factor.\n"); |
1983 | return 0; |
1984 | } |
1985 | } |
1986 | |
1987 | /* If using the "very cheap" model. reject cases in which we'd keep |
1988 | a copy of the scalar code (even if we might be able to vectorize it). */ |
1989 | if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP |
1990 | && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment |
1991 | || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps |
1992 | || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter)) |
1993 | { |
1994 | if (dump_enabled_p ()) |
1995 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1996 | "some scalar iterations would need to be peeled\n"); |
1997 | return 0; |
1998 | } |
1999 | |
2000 | int min_profitable_iters, min_profitable_estimate; |
2001 | vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, |
2002 | &min_profitable_estimate, |
2003 | suggested_unroll_factor); |
2004 | |
2005 | if (min_profitable_iters < 0) |
2006 | { |
2007 | if (dump_enabled_p ()) |
2008 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2009 | "not vectorized: vectorization not profitable.\n"); |
2010 | if (dump_enabled_p ()) |
2011 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2012 | "not vectorized: vector version will never be " |
2013 | "profitable.\n"); |
2014 | return -1; |
2015 | } |
2016 | |
2017 | int min_scalar_loop_bound = (param_min_vect_loop_boundglobal_options.x_param_min_vect_loop_bound |
2018 | * assumed_vf); |
2019 | |
2020 | /* Use the cost model only if it is more conservative than user specified |
2021 | threshold. */ |
2022 | unsigned int th = (unsigned) MAX (min_scalar_loop_bound,((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound ) : (min_profitable_iters)) |
2023 | min_profitable_iters)((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound ) : (min_profitable_iters)); |
2024 | |
2025 | LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = th; |
2026 | |
2027 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) |
2028 | && LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2028, __FUNCTION__)))) < th) |
2029 | { |
2030 | if (dump_enabled_p ()) |
2031 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2032 | "not vectorized: vectorization not profitable.\n"); |
2033 | if (dump_enabled_p ()) |
2034 | dump_printf_loc (MSG_NOTE, vect_location, |
2035 | "not vectorized: iteration count smaller than user " |
2036 | "specified loop bound parameter or minimum profitable " |
2037 | "iterations (whichever is more conservative).\n"); |
2038 | return 0; |
2039 | } |
2040 | |
2041 | /* The static profitablity threshold min_profitable_estimate includes |
2042 | the cost of having to check at runtime whether the scalar loop |
2043 | should be used instead. If it turns out that we don't need or want |
2044 | such a check, the threshold we should use for the static estimate |
2045 | is simply the point at which the vector loop becomes more profitable |
2046 | than the scalar loop. */ |
2047 | if (min_profitable_estimate > min_profitable_iters |
2048 | && !LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) |
2049 | && !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter |
2050 | && !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment |
2051 | && !vect_apply_runtime_profitability_check_p (loop_vinfo)) |
2052 | { |
2053 | if (dump_enabled_p ()) |
2054 | dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime" |
2055 | " choice between the scalar and vector loops\n"); |
2056 | min_profitable_estimate = min_profitable_iters; |
2057 | } |
2058 | |
2059 | /* If the vector loop needs multiple iterations to be beneficial then |
2060 | things are probably too close to call, and the conservative thing |
2061 | would be to stick with the scalar code. */ |
2062 | if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP |
2063 | && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo)) |
2064 | { |
2065 | if (dump_enabled_p ()) |
2066 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2067 | "one iteration of the vector loop would be" |
2068 | " more expensive than the equivalent number of" |
2069 | " iterations of the scalar loop\n"); |
2070 | return 0; |
2071 | } |
2072 | |
2073 | HOST_WIDE_INTlong estimated_niter; |
2074 | |
2075 | /* If we are vectorizing an epilogue then we know the maximum number of |
2076 | scalar iterations it will cover is at least one lower than the |
2077 | vectorization factor of the main loop. */ |
2078 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) |
2079 | estimated_niter |
2080 | = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) - 1; |
2081 | else |
2082 | { |
2083 | estimated_niter = estimated_stmt_executions_int (loop); |
2084 | if (estimated_niter == -1) |
2085 | estimated_niter = likely_max_stmt_executions_int (loop); |
2086 | } |
2087 | if (estimated_niter != -1 |
2088 | && ((unsigned HOST_WIDE_INTlong) estimated_niter |
2089 | < MAX (th, (unsigned) min_profitable_estimate)((th) > ((unsigned) min_profitable_estimate) ? (th) : ((unsigned ) min_profitable_estimate)))) |
2090 | { |
2091 | if (dump_enabled_p ()) |
2092 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2093 | "not vectorized: estimated iteration count too " |
2094 | "small.\n"); |
2095 | if (dump_enabled_p ()) |
2096 | dump_printf_loc (MSG_NOTE, vect_location, |
2097 | "not vectorized: estimated iteration count smaller " |
2098 | "than specified loop bound parameter or minimum " |
2099 | "profitable iterations (whichever is more " |
2100 | "conservative).\n"); |
2101 | return -1; |
2102 | } |
2103 | |
2104 | return 1; |
2105 | } |
2106 | |
2107 | static opt_result |
2108 | vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, |
2109 | vec<data_reference_p> *datarefs, |
2110 | unsigned int *n_stmts) |
2111 | { |
2112 | *n_stmts = 0; |
2113 | for (unsigned i = 0; i < loop->num_nodes; i++) |
2114 | for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); |
2115 | !gsi_end_p (gsi); gsi_next (&gsi)) |
2116 | { |
2117 | gimple *stmt = gsi_stmt (gsi); |
2118 | if (is_gimple_debug (stmt)) |
2119 | continue; |
2120 | ++(*n_stmts); |
2121 | opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs, |
2122 | NULLnullptr, 0); |
2123 | if (!res) |
2124 | { |
2125 | if (is_gimple_call (stmt) && loop->safelen) |
2126 | { |
2127 | tree fndecl = gimple_call_fndecl (stmt), op; |
2128 | if (fndecl == NULL_TREE(tree) nullptr |
2129 | && gimple_call_internal_p (stmt, IFN_MASK_CALL)) |
2130 | { |
2131 | fndecl = gimple_call_arg (stmt, 0); |
2132 | gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR)((void)(!(((enum tree_code) (fndecl)->base.code) == ADDR_EXPR ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2132, __FUNCTION__), 0 : 0)); |
2133 | fndecl = TREE_OPERAND (fndecl, 0)(*((const_cast<tree*> (tree_operand_check ((fndecl), (0 ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2133, __FUNCTION__))))); |
2134 | gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL)((void)(!(((enum tree_code) (fndecl)->base.code) == FUNCTION_DECL ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2134, __FUNCTION__), 0 : 0)); |
2135 | } |
2136 | if (fndecl != NULL_TREE(tree) nullptr) |
2137 | { |
2138 | cgraph_node *node = cgraph_node::get (fndecl); |
2139 | if (node != NULLnullptr && node->simd_clones != NULLnullptr) |
2140 | { |
2141 | unsigned int j, n = gimple_call_num_args (stmt); |
2142 | for (j = 0; j < n; j++) |
2143 | { |
2144 | op = gimple_call_arg (stmt, j); |
2145 | if (DECL_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_declaration) |
2146 | || (REFERENCE_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_reference) |
2147 | && get_base_address (op))) |
2148 | break; |
2149 | } |
2150 | op = gimple_call_lhs (stmt); |
2151 | /* Ignore #pragma omp declare simd functions |
2152 | if they don't have data references in the |
2153 | call stmt itself. */ |
2154 | if (j == n |
2155 | && !(op |
2156 | && (DECL_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_declaration) |
2157 | || (REFERENCE_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_reference) |
2158 | && get_base_address (op))))) |
2159 | continue; |
2160 | } |
2161 | } |
2162 | } |
2163 | return res; |
2164 | } |
2165 | /* If dependence analysis will give up due to the limit on the |
2166 | number of datarefs stop here and fail fatally. */ |
2167 | if (datarefs->length () |
2168 | > (unsigned)param_loop_max_datarefs_for_datadepsglobal_options.x_param_loop_max_datarefs_for_datadeps) |
2169 | return opt_result::failure_at (stmt, "exceeded param " |
2170 | "loop-max-datarefs-for-datadeps\n"); |
2171 | } |
2172 | return opt_result::success (); |
2173 | } |
2174 | |
2175 | /* Look for SLP-only access groups and turn each individual access into its own |
2176 | group. */ |
2177 | static void |
2178 | vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo) |
2179 | { |
2180 | unsigned int i; |
2181 | struct data_reference *dr; |
2182 | |
2183 | DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups")auto_dump_scope scope ("vect_dissolve_slp_only_groups", vect_location ); |
2184 | |
2185 | vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs; |
2186 | FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i)) |
2187 | { |
2188 | gcc_assert (DR_REF (dr))((void)(!((dr)->ref) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2188, __FUNCTION__), 0 : 0)); |
2189 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr)(dr)->stmt); |
2190 | |
2191 | /* Check if the load is a part of an interleaving chain. */ |
2192 | if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2192, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))) |
2193 | { |
2194 | stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2194, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ); |
2195 | dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element)(((void)(!((first_element)->dr_aux.stmt == (first_element) ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2195, __FUNCTION__), 0 : 0)), &(first_element)->dr_aux ); |
2196 | unsigned int group_size = DR_GROUP_SIZE (first_element)(((void)(!((first_element)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2196, __FUNCTION__), 0 : 0)), (first_element)->size); |
2197 | |
2198 | /* Check if SLP-only groups. */ |
2199 | if (!STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type |
2200 | && STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p) |
2201 | { |
2202 | /* Dissolve the group. */ |
2203 | STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p = false; |
2204 | |
2205 | stmt_vec_info vinfo = first_element; |
2206 | while (vinfo) |
2207 | { |
2208 | stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2208, __FUNCTION__), 0 : 0)), (vinfo)->next_element); |
2209 | DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2209, __FUNCTION__), 0 : 0)), (vinfo)->first_element) = vinfo; |
2210 | DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2210, __FUNCTION__), 0 : 0)), (vinfo)->next_element) = NULLnullptr; |
2211 | DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2211, __FUNCTION__), 0 : 0)), (vinfo)->size) = 1; |
2212 | if (STMT_VINFO_STRIDED_P (first_element)(first_element)->strided_p) |
2213 | DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2213, __FUNCTION__), 0 : 0)), (vinfo)->gap) = 0; |
2214 | else |
2215 | DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2215, __FUNCTION__), 0 : 0)), (vinfo)->gap) = group_size - 1; |
2216 | /* Duplicate and adjust alignment info, it needs to |
2217 | be present on each group leader, see dr_misalignment. */ |
2218 | if (vinfo != first_element) |
2219 | { |
2220 | dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo)(((void)(!((vinfo)->dr_aux.stmt == (vinfo)) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2220, __FUNCTION__), 0 : 0)), &(vinfo)->dr_aux); |
2221 | dr_info2->target_alignment = dr_info->target_alignment; |
2222 | int misalignment = dr_info->misalignment; |
2223 | if (misalignment != DR_MISALIGNMENT_UNKNOWN(-1)) |
2224 | { |
2225 | HOST_WIDE_INTlong diff |
2226 | = (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info2->dr) ->innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2226, __FUNCTION__))) |
2227 | - TREE_INT_CST_LOW (DR_INIT (dr_info->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info->dr)-> innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2227, __FUNCTION__)))); |
2228 | unsigned HOST_WIDE_INTlong align_c |
2229 | = dr_info->target_alignment.to_constant (); |
2230 | misalignment = (misalignment + diff) % align_c; |
2231 | } |
2232 | dr_info2->misalignment = misalignment; |
2233 | } |
2234 | vinfo = next; |
2235 | } |
2236 | } |
2237 | } |
2238 | } |
2239 | } |
2240 | |
2241 | /* Determine if operating on full vectors for LOOP_VINFO might leave |
2242 | some scalar iterations still to do. If so, decide how we should |
2243 | handle those scalar iterations. The possibilities are: |
2244 | |
2245 | (1) Make LOOP_VINFO operate on partial vectors instead of full vectors. |
2246 | In this case: |
2247 | |
2248 | LOOP_VINFO_USING_PARTIAL_VECTORS_P == true |
2249 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false |
2250 | LOOP_VINFO_PEELING_FOR_NITER == false |
2251 | |
2252 | (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop |
2253 | to handle the remaining scalar iterations. In this case: |
2254 | |
2255 | LOOP_VINFO_USING_PARTIAL_VECTORS_P == false |
2256 | LOOP_VINFO_PEELING_FOR_NITER == true |
2257 | |
2258 | There are two choices: |
2259 | |
2260 | (2a) Consider vectorizing the epilogue loop at the same VF as the |
2261 | main loop, but using partial vectors instead of full vectors. |
2262 | In this case: |
2263 | |
2264 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true |
2265 | |
2266 | (2b) Consider vectorizing the epilogue loop at lower VFs only. |
2267 | In this case: |
2268 | |
2269 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false |
2270 | |
2271 | When FOR_EPILOGUE_P is true, make this determination based on the |
2272 | assumption that LOOP_VINFO is an epilogue loop, otherwise make it |
2273 | based on the assumption that LOOP_VINFO is the main loop. The caller |
2274 | has made sure that the number of iterations is set appropriately for |
2275 | this value of FOR_EPILOGUE_P. */ |
2276 | |
2277 | opt_result |
2278 | vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo, |
2279 | bool for_epilogue_p) |
2280 | { |
2281 | /* Determine whether there would be any scalar iterations left over. */ |
2282 | bool need_peeling_or_partial_vectors_p |
2283 | = vect_need_peeling_or_partial_vectors_p (loop_vinfo); |
2284 | |
2285 | /* Decide whether to vectorize the loop with partial vectors. */ |
2286 | LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = false; |
2287 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = false; |
2288 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2289 | && need_peeling_or_partial_vectors_p) |
2290 | { |
2291 | /* For partial-vector-usage=1, try to push the handling of partial |
2292 | vectors to the epilogue, with the main loop continuing to operate |
2293 | on full vectors. |
2294 | |
2295 | If we are unrolling we also do not want to use partial vectors. This |
2296 | is to avoid the overhead of generating multiple masks and also to |
2297 | avoid having to execute entire iterations of FALSE masked instructions |
2298 | when dealing with one or less full iterations. |
2299 | |
2300 | ??? We could then end up failing to use partial vectors if we |
2301 | decide to peel iterations into a prologue, and if the main loop |
2302 | then ends up processing fewer than VF iterations. */ |
2303 | if ((param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage == 1 |
2304 | || loop_vinfo->suggested_unroll_factor > 1) |
2305 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) |
2306 | && !vect_known_niters_smaller_than_vf (loop_vinfo)) |
2307 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = true; |
2308 | else |
2309 | LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = true; |
2310 | } |
2311 | |
2312 | if (dump_enabled_p ()) |
2313 | { |
2314 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
2315 | dump_printf_loc (MSG_NOTE, vect_location, |
2316 | "operating on partial vectors%s.\n", |
2317 | for_epilogue_p ? " for epilogue loop" : ""); |
2318 | else |
2319 | dump_printf_loc (MSG_NOTE, vect_location, |
2320 | "operating only on full vectors%s.\n", |
2321 | for_epilogue_p ? " for epilogue loop" : ""); |
2322 | } |
2323 | |
2324 | if (for_epilogue_p) |
2325 | { |
2326 | loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; |
2327 | gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2327, __FUNCTION__), 0 : 0)); |
2328 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
2329 | gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor , (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2330, __FUNCTION__), 0 : 0)) |
2330 | LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor , (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2330, __FUNCTION__), 0 : 0)); |
2331 | } |
2332 | |
2333 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) |
2334 | && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
2335 | { |
2336 | /* Check that the loop processes at least one full vector. */ |
2337 | poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
2338 | tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters; |
2339 | if (known_lt (wi::to_widest (scalar_niters), vf)(!maybe_le (vf, wi::to_widest (scalar_niters)))) |
2340 | return opt_result::failure_at (vect_location, |
2341 | "loop does not have enough iterations" |
2342 | " to support vectorization.\n"); |
2343 | |
2344 | /* If we need to peel an extra epilogue iteration to handle data |
2345 | accesses with gaps, check that there are enough scalar iterations |
2346 | available. |
2347 | |
2348 | The check above is redundant with this one when peeling for gaps, |
2349 | but the distinction is useful for diagnostics. */ |
2350 | tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1; |
2351 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps |
2352 | && known_lt (wi::to_widest (scalar_nitersm1), vf)(!maybe_le (vf, wi::to_widest (scalar_nitersm1)))) |
2353 | return opt_result::failure_at (vect_location, |
2354 | "loop does not have enough iterations" |
2355 | " to support peeling for gaps.\n"); |
2356 | } |
2357 | |
2358 | LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter |
2359 | = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p |
2360 | && need_peeling_or_partial_vectors_p); |
2361 | |
2362 | return opt_result::success (); |
2363 | } |
2364 | |
2365 | /* Function vect_analyze_loop_2. |
2366 | |
2367 | Apply a set of analyses on LOOP specified by LOOP_VINFO, the different |
2368 | analyses will record information in some members of LOOP_VINFO. FATAL |
2369 | indicates if some analysis meets fatal error. If one non-NULL pointer |
2370 | SUGGESTED_UNROLL_FACTOR is provided, it's intent to be filled with one |
2371 | worked out suggested unroll factor, while one NULL pointer shows it's |
2372 | going to apply the suggested unroll factor. SLP_DONE_FOR_SUGGESTED_UF |
2373 | is to hold the slp decision when the suggested unroll factor is worked |
2374 | out. */ |
2375 | static opt_result |
2376 | vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, |
2377 | unsigned *suggested_unroll_factor, |
2378 | bool& slp_done_for_suggested_uf) |
2379 | { |
2380 | opt_result ok = opt_result::success (); |
2381 | int res; |
2382 | unsigned int max_vf = MAX_VECTORIZATION_FACTOR2147483647; |
2383 | poly_uint64 min_vf = 2; |
2384 | loop_vec_info orig_loop_vinfo = NULLnullptr; |
2385 | |
2386 | /* If we are dealing with an epilogue then orig_loop_vinfo points to the |
2387 | loop_vec_info of the first vectorized loop. */ |
2388 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) |
2389 | orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; |
2390 | else |
2391 | orig_loop_vinfo = loop_vinfo; |
2392 | gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2392, __FUNCTION__), 0 : 0)); |
2393 | |
2394 | /* The first group of checks is independent of the vector size. */ |
2395 | fatal = true; |
2396 | |
2397 | if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond |
2398 | && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond)) |
2399 | return opt_result::failure_at (vect_location, |
2400 | "not vectorized: simd if(0)\n"); |
2401 | |
2402 | /* Find all data references in the loop (which correspond to vdefs/vuses) |
2403 | and analyze their evolution in the loop. */ |
2404 | |
2405 | loop_p loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
2406 | |
2407 | /* Gather the data references and count stmts in the loop. */ |
2408 | if (!LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.exists ()) |
2409 | { |
2410 | opt_result res |
2411 | = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs, |
2412 | &LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs, |
2413 | &LOOP_VINFO_N_STMTS (loop_vinfo)(loop_vinfo)->shared->n_stmts); |
2414 | if (!res) |
2415 | { |
2416 | if (dump_enabled_p ()) |
2417 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2418 | "not vectorized: loop contains function " |
2419 | "calls or data references that cannot " |
2420 | "be analyzed\n"); |
2421 | return res; |
2422 | } |
2423 | loop_vinfo->shared->save_datarefs (); |
2424 | } |
2425 | else |
2426 | loop_vinfo->shared->check_datarefs (); |
2427 | |
2428 | /* Analyze the data references and also adjust the minimal |
2429 | vectorization factor according to the loads and stores. */ |
2430 | |
2431 | ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal); |
2432 | if (!ok) |
2433 | { |
2434 | if (dump_enabled_p ()) |
2435 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2436 | "bad data references.\n"); |
2437 | return ok; |
2438 | } |
2439 | |
2440 | /* Check if we are applying unroll factor now. */ |
2441 | bool applying_suggested_uf = loop_vinfo->suggested_unroll_factor > 1; |
2442 | gcc_assert (!applying_suggested_uf || !suggested_unroll_factor)((void)(!(!applying_suggested_uf || !suggested_unroll_factor) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2442, __FUNCTION__), 0 : 0)); |
2443 | |
2444 | /* If the slp decision is false when suggested unroll factor is worked |
2445 | out, and we are applying suggested unroll factor, we can simply skip |
2446 | all slp related analyses this time. */ |
2447 | bool slp = !applying_suggested_uf || slp_done_for_suggested_uf; |
2448 | |
2449 | /* Classify all cross-iteration scalar data-flow cycles. |
2450 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ |
2451 | vect_analyze_scalar_cycles (loop_vinfo, slp); |
2452 | |
2453 | vect_pattern_recog (loop_vinfo); |
2454 | |
2455 | vect_fixup_scalar_cycles_with_patterns (loop_vinfo); |
2456 | |
2457 | /* Analyze the access patterns of the data-refs in the loop (consecutive, |
2458 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ |
2459 | |
2460 | ok = vect_analyze_data_ref_accesses (loop_vinfo, NULLnullptr); |
2461 | if (!ok) |
2462 | { |
2463 | if (dump_enabled_p ()) |
2464 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2465 | "bad data access.\n"); |
2466 | return ok; |
2467 | } |
2468 | |
2469 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ |
2470 | |
2471 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal); |
2472 | if (!ok) |
2473 | { |
2474 | if (dump_enabled_p ()) |
2475 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2476 | "unexpected pattern.\n"); |
2477 | return ok; |
2478 | } |
2479 | |
2480 | /* While the rest of the analysis below depends on it in some way. */ |
2481 | fatal = false; |
2482 | |
2483 | /* Analyze data dependences between the data-refs in the loop |
2484 | and adjust the maximum vectorization factor according to |
2485 | the dependences. |
2486 | FORNOW: fail at the first data dependence that we encounter. */ |
2487 | |
2488 | ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf); |
2489 | if (!ok) |
2490 | { |
2491 | if (dump_enabled_p ()) |
2492 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2493 | "bad data dependence.\n"); |
2494 | return ok; |
2495 | } |
2496 | if (max_vf != MAX_VECTORIZATION_FACTOR2147483647 |
2497 | && maybe_lt (max_vf, min_vf)) |
2498 | return opt_result::failure_at (vect_location, "bad data dependence.\n"); |
2499 | LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor = max_vf; |
2500 | |
2501 | ok = vect_determine_vectorization_factor (loop_vinfo); |
2502 | if (!ok) |
2503 | { |
2504 | if (dump_enabled_p ()) |
2505 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2506 | "can't determine vectorization factor.\n"); |
2507 | return ok; |
2508 | } |
2509 | if (max_vf != MAX_VECTORIZATION_FACTOR2147483647 |
2510 | && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor)) |
2511 | return opt_result::failure_at (vect_location, "bad data dependence.\n"); |
2512 | |
2513 | /* Compute the scalar iteration cost. */ |
2514 | vect_compute_single_scalar_iteration_cost (loop_vinfo); |
2515 | |
2516 | poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
2517 | |
2518 | if (slp) |
2519 | { |
2520 | /* Check the SLP opportunities in the loop, analyze and build |
2521 | SLP trees. */ |
2522 | ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo)(loop_vinfo)->shared->n_stmts); |
2523 | if (!ok) |
2524 | return ok; |
2525 | |
2526 | /* If there are any SLP instances mark them as pure_slp. */ |
2527 | slp = vect_make_slp_decision (loop_vinfo); |
2528 | if (slp) |
2529 | { |
2530 | /* Find stmts that need to be both vectorized and SLPed. */ |
2531 | vect_detect_hybrid_slp (loop_vinfo); |
2532 | |
2533 | /* Update the vectorization factor based on the SLP decision. */ |
2534 | vect_update_vf_for_slp (loop_vinfo); |
2535 | |
2536 | /* Optimize the SLP graph with the vectorization factor fixed. */ |
2537 | vect_optimize_slp (loop_vinfo); |
2538 | |
2539 | /* Gather the loads reachable from the SLP graph entries. */ |
2540 | vect_gather_slp_loads (loop_vinfo); |
2541 | } |
2542 | } |
2543 | |
2544 | bool saved_can_use_partial_vectors_p |
2545 | = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p; |
2546 | |
2547 | /* We don't expect to have to roll back to anything other than an empty |
2548 | set of rgroups. */ |
2549 | gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())((void)(!((loop_vinfo)->masks.is_empty ()) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2549, __FUNCTION__), 0 : 0)); |
2550 | |
2551 | /* This is the point where we can re-start analysis with SLP forced off. */ |
2552 | start_over: |
2553 | |
2554 | /* Apply the suggested unrolling factor, this was determined by the backend |
2555 | during finish_cost the first time we ran the analyzis for this |
2556 | vector mode. */ |
2557 | if (applying_suggested_uf) |
2558 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor *= loop_vinfo->suggested_unroll_factor; |
2559 | |
2560 | /* Now the vectorization factor is final. */ |
2561 | poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
2562 | gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2562, __FUNCTION__), 0 : 0)); |
2563 | |
2564 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && dump_enabled_p ()) |
2565 | { |
2566 | dump_printf_loc (MSG_NOTE, vect_location, |
2567 | "vectorization_factor = "); |
2568 | dump_dec (MSG_NOTE, vectorization_factor); |
2569 | dump_printf (MSG_NOTE, ", niters = %wd\n", |
2570 | LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2570, __FUNCTION__))))); |
2571 | } |
2572 | |
2573 | loop_vinfo->vector_costs = init_cost (loop_vinfo, false); |
2574 | |
2575 | /* Analyze the alignment of the data-refs in the loop. |
2576 | Fail if a data reference is found that cannot be vectorized. */ |
2577 | |
2578 | ok = vect_analyze_data_refs_alignment (loop_vinfo); |
2579 | if (!ok) |
2580 | { |
2581 | if (dump_enabled_p ()) |
2582 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2583 | "bad data alignment.\n"); |
2584 | return ok; |
2585 | } |
2586 | |
2587 | /* Prune the list of ddrs to be tested at run-time by versioning for alias. |
2588 | It is important to call pruning after vect_analyze_data_ref_accesses, |
2589 | since we use grouping information gathered by interleaving analysis. */ |
2590 | ok = vect_prune_runtime_alias_test_list (loop_vinfo); |
2591 | if (!ok) |
2592 | return ok; |
2593 | |
2594 | /* Do not invoke vect_enhance_data_refs_alignment for epilogue |
2595 | vectorization, since we do not want to add extra peeling or |
2596 | add versioning for alignment. */ |
2597 | if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) |
2598 | /* This pass will decide on using loop versioning and/or loop peeling in |
2599 | order to enhance the alignment of data references in the loop. */ |
2600 | ok = vect_enhance_data_refs_alignment (loop_vinfo); |
2601 | if (!ok) |
2602 | return ok; |
2603 | |
2604 | if (slp) |
2605 | { |
2606 | /* Analyze operations in the SLP instances. Note this may |
2607 | remove unsupported SLP instances which makes the above |
2608 | SLP kind detection invalid. */ |
2609 | unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length (); |
2610 | vect_slp_analyze_operations (loop_vinfo); |
2611 | if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length () != old_size) |
2612 | { |
2613 | ok = opt_result::failure_at (vect_location, |
2614 | "unsupported SLP instances\n"); |
2615 | goto again; |
2616 | } |
2617 | |
2618 | /* Check whether any load in ALL SLP instances is possibly permuted. */ |
2619 | slp_tree load_node, slp_root; |
2620 | unsigned i, x; |
2621 | slp_instance instance; |
2622 | bool can_use_lanes = true; |
2623 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)for (x = 0; ((loop_vinfo)->slp_instances).iterate ((x), & (instance)); ++(x)) |
2624 | { |
2625 | slp_root = SLP_INSTANCE_TREE (instance)(instance)->root; |
2626 | int group_size = SLP_TREE_LANES (slp_root)(slp_root)->lanes; |
2627 | tree vectype = SLP_TREE_VECTYPE (slp_root)(slp_root)->vectype; |
2628 | bool loads_permuted = false; |
2629 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node )); ++(i)) |
2630 | { |
2631 | if (!SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation.exists ()) |
2632 | continue; |
2633 | unsigned j; |
2634 | stmt_vec_info load_info; |
2635 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)for (j = 0; ((load_node)->stmts).iterate ((j), &(load_info )); ++(j)) |
2636 | if (SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation[j] != j) |
2637 | { |
2638 | loads_permuted = true; |
2639 | break; |
2640 | } |
2641 | } |
2642 | |
2643 | /* If the loads and stores can be handled with load/store-lane |
2644 | instructions record it and move on to the next instance. */ |
2645 | if (loads_permuted |
2646 | && SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store |
2647 | && vect_store_lanes_supported (vectype, group_size, false)) |
2648 | { |
2649 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node )); ++(i)) |
2650 | { |
2651 | stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2652, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])-> first_element) |
2652 | (SLP_TREE_SCALAR_STMTS (load_node)[0])(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2652, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])-> first_element); |
2653 | /* Use SLP for strided accesses (or if we can't |
2654 | load-lanes). */ |
2655 | if (STMT_VINFO_STRIDED_P (stmt_vinfo)(stmt_vinfo)->strided_p |
2656 | || ! vect_load_lanes_supported |
2657 | (STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype, |
2658 | DR_GROUP_SIZE (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2658, __FUNCTION__), 0 : 0)), (stmt_vinfo)->size), false)) |
2659 | break; |
2660 | } |
2661 | |
2662 | can_use_lanes |
2663 | = can_use_lanes && i == SLP_INSTANCE_LOADS (instance)(instance)->loads.length (); |
2664 | |
2665 | if (can_use_lanes && dump_enabled_p ()) |
2666 | dump_printf_loc (MSG_NOTE, vect_location, |
2667 | "SLP instance %p can use load/store-lanes\n", |
2668 | (void *) instance); |
2669 | } |
2670 | else |
2671 | { |
2672 | can_use_lanes = false; |
2673 | break; |
2674 | } |
2675 | } |
2676 | |
2677 | /* If all SLP instances can use load/store-lanes abort SLP and try again |
2678 | with SLP disabled. */ |
2679 | if (can_use_lanes) |
2680 | { |
2681 | ok = opt_result::failure_at (vect_location, |
2682 | "Built SLP cancelled: can use " |
2683 | "load/store-lanes\n"); |
2684 | if (dump_enabled_p ()) |
2685 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2686 | "Built SLP cancelled: all SLP instances support " |
2687 | "load/store-lanes\n"); |
2688 | goto again; |
2689 | } |
2690 | } |
2691 | |
2692 | /* Dissolve SLP-only groups. */ |
2693 | vect_dissolve_slp_only_groups (loop_vinfo); |
2694 | |
2695 | /* Scan all the remaining operations in the loop that are not subject |
2696 | to SLP and make sure they are vectorizable. */ |
2697 | ok = vect_analyze_loop_operations (loop_vinfo); |
2698 | if (!ok) |
2699 | { |
2700 | if (dump_enabled_p ()) |
2701 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2702 | "bad operation or unsupported loop bound.\n"); |
2703 | return ok; |
2704 | } |
2705 | |
2706 | /* For now, we don't expect to mix both masking and length approaches for one |
2707 | loop, disable it if both are recorded. */ |
2708 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2709 | && !LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty () |
2710 | && !LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ()) |
2711 | { |
2712 | if (dump_enabled_p ()) |
2713 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2714 | "can't vectorize a loop with partial vectors" |
2715 | " because we don't expect to mix different" |
2716 | " approaches with partial vectors for the" |
2717 | " same loop.\n"); |
2718 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false; |
2719 | } |
2720 | |
2721 | /* If we still have the option of using partial vectors, |
2722 | check whether we can generate the necessary loop controls. */ |
2723 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2724 | && !vect_verify_full_masking (loop_vinfo) |
2725 | && !vect_verify_loop_lens (loop_vinfo)) |
2726 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false; |
2727 | |
2728 | /* If we're vectorizing an epilogue loop, the vectorized loop either needs |
2729 | to be able to handle fewer than VF scalars, or needs to have a lower VF |
2730 | than the main loop. */ |
2731 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) |
2732 | && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2733 | && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo )->vectorization_factor) |
2734 | LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo )->vectorization_factor)) |
2735 | return opt_result::failure_at (vect_location, |
2736 | "Vectorization factor too high for" |
2737 | " epilogue loop.\n"); |
2738 | |
2739 | /* Decide whether this loop_vinfo should use partial vectors or peeling, |
2740 | assuming that the loop will be used as a main loop. We will redo |
2741 | this analysis later if we instead decide to use the loop as an |
2742 | epilogue loop. */ |
2743 | ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false); |
2744 | if (!ok) |
2745 | return ok; |
2746 | |
2747 | /* Check the costings of the loop make vectorizing worthwhile. */ |
2748 | res = vect_analyze_loop_costing (loop_vinfo, suggested_unroll_factor); |
2749 | if (res < 0) |
2750 | { |
2751 | ok = opt_result::failure_at (vect_location, |
2752 | "Loop costings may not be worthwhile.\n"); |
2753 | goto again; |
2754 | } |
2755 | if (!res) |
2756 | return opt_result::failure_at (vect_location, |
2757 | "Loop costings not worthwhile.\n"); |
2758 | |
2759 | /* If an epilogue loop is required make sure we can create one. */ |
2760 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps |
2761 | || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter) |
2762 | { |
2763 | if (dump_enabled_p ()) |
2764 | dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n"); |
2765 | if (!vect_can_advance_ivs_p (loop_vinfo) |
2766 | || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, |
2767 | single_exit (LOOP_VINFO_LOOP(loop_vinfo)->loop |
2768 | (loop_vinfo)(loop_vinfo)->loop))) |
2769 | { |
2770 | ok = opt_result::failure_at (vect_location, |
2771 | "not vectorized: can't create required " |
2772 | "epilog loop\n"); |
2773 | goto again; |
2774 | } |
2775 | } |
2776 | |
2777 | /* During peeling, we need to check if number of loop iterations is |
2778 | enough for both peeled prolog loop and vector loop. This check |
2779 | can be merged along with threshold check of loop versioning, so |
2780 | increase threshold for this case if necessary. |
2781 | |
2782 | If we are analyzing an epilogue we still want to check what its |
2783 | versioning threshold would be. If we decide to vectorize the epilogues we |
2784 | will want to use the lowest versioning threshold of all epilogues and main |
2785 | loop. This will enable us to enter a vectorized epilogue even when |
2786 | versioning the loop. We can't simply check whether the epilogue requires |
2787 | versioning though since we may have skipped some versioning checks when |
2788 | analyzing the epilogue. For instance, checks for alias versioning will be |
2789 | skipped when dealing with epilogues as we assume we already checked them |
2790 | for the main loop. So instead we always check the 'orig_loop_vinfo'. */ |
2791 | if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo)(((orig_loop_vinfo)->may_misalign_stmts.length () > 0) || ((orig_loop_vinfo)->comp_alias_ddrs.length () > 0 || ( orig_loop_vinfo)->check_unequal_addrs.length () > 0 || ( orig_loop_vinfo)->lower_bounds.length () > 0) || ((orig_loop_vinfo )->num_iters_assumptions) || ((orig_loop_vinfo)->simd_if_cond ))) |
2792 | { |
2793 | poly_uint64 niters_th = 0; |
2794 | unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th; |
2795 | |
2796 | if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) |
2797 | { |
2798 | /* Niters for peeled prolog loop. */ |
2799 | if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0) |
2800 | { |
2801 | dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo)(loop_vinfo)->unaligned_dr; |
2802 | tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype; |
2803 | niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1; |
2804 | } |
2805 | else |
2806 | niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; |
2807 | } |
2808 | |
2809 | /* Niters for at least one iteration of vectorized loop. */ |
2810 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
2811 | niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
2812 | /* One additional iteration because of peeling for gap. */ |
2813 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps) |
2814 | niters_th += 1; |
2815 | |
2816 | /* Use the same condition as vect_transform_loop to decide when to use |
2817 | the cost to determine a versioning threshold. */ |
2818 | if (vect_apply_runtime_profitability_check_p (loop_vinfo) |
2819 | && ordered_p (th, niters_th)) |
2820 | niters_th = ordered_max (poly_uint64 (th), niters_th); |
2821 | |
2822 | LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = niters_th; |
2823 | } |
2824 | |
2825 | gcc_assert (known_eq (vectorization_factor,((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)-> vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2826, __FUNCTION__), 0 : 0)) |
2826 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)))((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)-> vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2826, __FUNCTION__), 0 : 0)); |
2827 | |
2828 | slp_done_for_suggested_uf = slp; |
2829 | |
2830 | /* Ok to vectorize! */ |
2831 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)(loop_vinfo)->vectorizable = 1; |
2832 | return opt_result::success (); |
2833 | |
2834 | again: |
2835 | /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */ |
2836 | gcc_assert (!ok)((void)(!(!ok) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2836, __FUNCTION__), 0 : 0)); |
2837 | |
2838 | /* Try again with SLP forced off but if we didn't do any SLP there is |
2839 | no point in re-trying. */ |
2840 | if (!slp) |
2841 | return ok; |
2842 | |
2843 | /* If the slp decision is true when suggested unroll factor is worked |
2844 | out, and we are applying suggested unroll factor, we don't need to |
2845 | re-try any more. */ |
2846 | if (applying_suggested_uf && slp_done_for_suggested_uf) |
2847 | return ok; |
2848 | |
2849 | /* If there are reduction chains re-trying will fail anyway. */ |
2850 | if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.is_empty ()) |
2851 | return ok; |
2852 | |
2853 | /* Likewise if the grouped loads or stores in the SLP cannot be handled |
2854 | via interleaving or lane instructions. */ |
2855 | slp_instance instance; |
2856 | slp_tree node; |
2857 | unsigned i, j; |
2858 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)for (i = 0; ((loop_vinfo)->slp_instances).iterate ((i), & (instance)); ++(i)) |
2859 | { |
2860 | stmt_vec_info vinfo; |
2861 | vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))((instance)->root)->stmts[0]; |
2862 | if (! STMT_VINFO_GROUPED_ACCESS (vinfo)((vinfo)->dr_aux.dr && (((void)(!((vinfo)->dr_aux .dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2862, __FUNCTION__), 0 : 0)), (vinfo)->first_element))) |
2863 | continue; |
2864 | vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2864, __FUNCTION__), 0 : 0)), (vinfo)->first_element); |
2865 | unsigned int size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2865, __FUNCTION__), 0 : 0)), (vinfo)->size); |
2866 | tree vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype; |
2867 | if (! vect_store_lanes_supported (vectype, size, false) |
2868 | && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), 1U)) |
2869 | && ! vect_grouped_store_supported (vectype, size)) |
2870 | return opt_result::failure_at (vinfo->stmt, |
2871 | "unsupported grouped store\n"); |
2872 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)for (j = 0; ((instance)->loads).iterate ((j), &(node)) ; ++(j)) |
2873 | { |
2874 | vinfo = SLP_TREE_SCALAR_STMTS (node)(node)->stmts[0]; |
2875 | vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2875, __FUNCTION__), 0 : 0)), (vinfo)->first_element); |
2876 | bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2876, __FUNCTION__), 0 : 0)), (vinfo)->next_element); |
2877 | size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2877, __FUNCTION__), 0 : 0)), (vinfo)->size); |
2878 | vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype; |
2879 | if (! vect_load_lanes_supported (vectype, size, false) |
2880 | && ! vect_grouped_load_supported (vectype, single_element_p, |
2881 | size)) |
2882 | return opt_result::failure_at (vinfo->stmt, |
2883 | "unsupported grouped load\n"); |
2884 | } |
2885 | } |
2886 | |
2887 | if (dump_enabled_p ()) |
2888 | dump_printf_loc (MSG_NOTE, vect_location, |
2889 | "re-trying with SLP disabled\n"); |
2890 | |
2891 | /* Roll back state appropriately. No SLP this time. */ |
2892 | slp = false; |
2893 | /* Restore vectorization factor as it were without SLP. */ |
2894 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = saved_vectorization_factor; |
2895 | /* Free the SLP instances. */ |
2896 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)for (j = 0; ((loop_vinfo)->slp_instances).iterate ((j), & (instance)); ++(j)) |
2897 | vect_free_slp_instance (instance); |
2898 | LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.release (); |
2899 | /* Reset SLP type to loop_vect on all stmts. */ |
2900 | for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->num_nodes; ++i) |
2901 | { |
2902 | basic_block bb = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs[i]; |
2903 | for (gimple_stmt_iterator si = gsi_start_phis (bb); |
2904 | !gsi_end_p (si); gsi_next (&si)) |
2905 | { |
2906 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
2907 | STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect; |
2908 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def |
2909 | || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) |
2910 | { |
2911 | /* vectorizable_reduction adjusts reduction stmt def-types, |
2912 | restore them to that of the PHI. */ |
2913 | STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))((stmt_info)->reduc_def)->def_type |
2914 | = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type; |
2915 | STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type |
2916 | (STMT_VINFO_REDUC_DEF (stmt_info)))(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type |
2917 | = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type; |
2918 | } |
2919 | } |
2920 | for (gimple_stmt_iterator si = gsi_start_bb (bb); |
2921 | !gsi_end_p (si); gsi_next (&si)) |
2922 | { |
2923 | if (is_gimple_debug (gsi_stmt (si))) |
2924 | continue; |
2925 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
2926 | STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect; |
2927 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p) |
2928 | { |
2929 | stmt_vec_info pattern_stmt_info |
2930 | = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
2931 | if (STMT_VINFO_SLP_VECT_ONLY_PATTERN (pattern_stmt_info)(pattern_stmt_info)->slp_vect_pattern_only_p) |
2932 | STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p = false; |
2933 | |
2934 | gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq; |
2935 | STMT_SLP_TYPE (pattern_stmt_info)(pattern_stmt_info)->slp_type = loop_vect; |
2936 | for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq); |
2937 | !gsi_end_p (pi); gsi_next (&pi)) |
2938 | STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))(loop_vinfo->lookup_stmt (gsi_stmt (pi)))->slp_type |
2939 | = loop_vect; |
2940 | } |
2941 | } |
2942 | } |
2943 | /* Free optimized alias test DDRS. */ |
2944 | LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.truncate (0); |
2945 | LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.release (); |
2946 | LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.release (); |
2947 | /* Reset target cost data. */ |
2948 | delete loop_vinfo->vector_costs; |
2949 | loop_vinfo->vector_costs = nullptr; |
2950 | /* Reset accumulated rgroup information. */ |
2951 | release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks); |
2952 | release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens); |
2953 | /* Reset assorted flags. */ |
2954 | LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter = false; |
2955 | LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps = false; |
2956 | LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = 0; |
2957 | LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = 0; |
2958 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2959 | = saved_can_use_partial_vectors_p; |
2960 | |
2961 | goto start_over; |
2962 | } |
2963 | |
2964 | /* Return true if vectorizing a loop using NEW_LOOP_VINFO appears |
2965 | to be better than vectorizing it using OLD_LOOP_VINFO. Assume that |
2966 | OLD_LOOP_VINFO is better unless something specifically indicates |
2967 | otherwise. |
2968 | |
2969 | Note that this deliberately isn't a partial order. */ |
2970 | |
2971 | static bool |
2972 | vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo, |
2973 | loop_vec_info old_loop_vinfo) |
2974 | { |
2975 | struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo)(new_loop_vinfo)->loop; |
2976 | gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop)((void)(!((old_loop_vinfo)->loop == loop) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2976, __FUNCTION__), 0 : 0)); |
2977 | |
2978 | poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo)(new_loop_vinfo)->vectorization_factor; |
2979 | poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo)(old_loop_vinfo)->vectorization_factor; |
2980 | |
2981 | /* Always prefer a VF of loop->simdlen over any other VF. */ |
2982 | if (loop->simdlen) |
2983 | { |
2984 | bool new_simdlen_p = known_eq (new_vf, loop->simdlen)(!maybe_ne (new_vf, loop->simdlen)); |
2985 | bool old_simdlen_p = known_eq (old_vf, loop->simdlen)(!maybe_ne (old_vf, loop->simdlen)); |
2986 | if (new_simdlen_p != old_simdlen_p) |
2987 | return new_simdlen_p; |
2988 | } |
2989 | |
2990 | const auto *old_costs = old_loop_vinfo->vector_costs; |
2991 | const auto *new_costs = new_loop_vinfo->vector_costs; |
2992 | if (loop_vec_info main_loop = LOOP_VINFO_ORIG_LOOP_INFO (old_loop_vinfo)(old_loop_vinfo)->orig_loop_info) |
2993 | return new_costs->better_epilogue_loop_than_p (old_costs, main_loop); |
2994 | |
2995 | return new_costs->better_main_loop_than_p (old_costs); |
2996 | } |
2997 | |
2998 | /* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return |
2999 | true if we should. */ |
3000 | |
3001 | static bool |
3002 | vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, |
3003 | loop_vec_info old_loop_vinfo) |
3004 | { |
3005 | if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo)) |
3006 | return false; |
3007 | |
3008 | if (dump_enabled_p ()) |
3009 | dump_printf_loc (MSG_NOTE, vect_location, |
3010 | "***** Preferring vector mode %s to vector mode %s\n", |
3011 | GET_MODE_NAME (new_loop_vinfo->vector_mode)mode_name[new_loop_vinfo->vector_mode], |
3012 | GET_MODE_NAME (old_loop_vinfo->vector_mode)mode_name[old_loop_vinfo->vector_mode]); |
3013 | return true; |
3014 | } |
3015 | |
3016 | /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is |
3017 | not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance |
3018 | MODE_I to the next mode useful to analyze. |
3019 | Return the loop_vinfo on success and wrapped null on failure. */ |
3020 | |
3021 | static opt_loop_vec_info |
3022 | vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, |
3023 | const vect_loop_form_info *loop_form_info, |
3024 | loop_vec_info main_loop_vinfo, |
3025 | const vector_modes &vector_modes, unsigned &mode_i, |
3026 | machine_mode &autodetected_vector_mode, |
3027 | bool &fatal) |
3028 | { |
3029 | loop_vec_info loop_vinfo |
3030 | = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); |
3031 | |
3032 | machine_mode vector_mode = vector_modes[mode_i]; |
3033 | loop_vinfo->vector_mode = vector_mode; |
3034 | unsigned int suggested_unroll_factor = 1; |
3035 | bool slp_done_for_suggested_uf; |
3036 | |
3037 | /* Run the main analysis. */ |
3038 | opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, |
3039 | &suggested_unroll_factor, |
3040 | slp_done_for_suggested_uf); |
3041 | if (dump_enabled_p ()) |
3042 | dump_printf_loc (MSG_NOTE, vect_location, |
3043 | "***** Analysis %s with vector mode %s\n", |
3044 | res ? "succeeded" : " failed", |
3045 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); |
3046 | |
3047 | if (!main_loop_vinfo && suggested_unroll_factor > 1) |
3048 | { |
3049 | if (dump_enabled_p ()) |
3050 | dump_printf_loc (MSG_NOTE, vect_location, |
3051 | "***** Re-trying analysis for unrolling" |
3052 | " with unroll factor %d and slp %s.\n", |
3053 | suggested_unroll_factor, |
3054 | slp_done_for_suggested_uf ? "on" : "off"); |
3055 | loop_vec_info unroll_vinfo |
3056 | = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); |
3057 | unroll_vinfo->vector_mode = vector_mode; |
3058 | unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; |
3059 | opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULLnullptr, |
3060 | slp_done_for_suggested_uf); |
3061 | if (new_res) |
3062 | { |
3063 | delete loop_vinfo; |
3064 | loop_vinfo = unroll_vinfo; |
3065 | } |
3066 | else |
3067 | delete unroll_vinfo; |
3068 | } |
3069 | |
3070 | /* Remember the autodetected vector mode. */ |
3071 | if (vector_mode == VOIDmode((void) 0, E_VOIDmode)) |
3072 | autodetected_vector_mode = loop_vinfo->vector_mode; |
3073 | |
3074 | /* Advance mode_i, first skipping modes that would result in the |
3075 | same analysis result. */ |
3076 | while (mode_i + 1 < vector_modes.length () |
3077 | && vect_chooses_same_modes_p (loop_vinfo, |
3078 | vector_modes[mode_i + 1])) |
3079 | { |
3080 | if (dump_enabled_p ()) |
3081 | dump_printf_loc (MSG_NOTE, vect_location, |
3082 | "***** The result for vector mode %s would" |
3083 | " be the same\n", |
3084 | GET_MODE_NAME (vector_modes[mode_i + 1])mode_name[vector_modes[mode_i + 1]]); |
3085 | mode_i += 1; |
3086 | } |
3087 | if (mode_i + 1 < vector_modes.length () |
3088 | && VECTOR_MODE_P (autodetected_vector_mode)(((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_BOOL || ((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_INT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_UACCUM) |
3089 | && (related_vector_mode (vector_modes[mode_i + 1], |
3090 | GET_MODE_INNER (autodetected_vector_mode)(mode_to_inner (autodetected_vector_mode))) |
3091 | == autodetected_vector_mode) |
3092 | && (related_vector_mode (autodetected_vector_mode, |
3093 | GET_MODE_INNER (vector_modes[mode_i + 1])(mode_to_inner (vector_modes[mode_i + 1]))) |
3094 | == vector_modes[mode_i + 1])) |
3095 | { |
3096 | if (dump_enabled_p ()) |
3097 | dump_printf_loc (MSG_NOTE, vect_location, |
3098 | "***** Skipping vector mode %s, which would" |
3099 | " repeat the analysis for %s\n", |
3100 | GET_MODE_NAME (vector_modes[mode_i + 1])mode_name[vector_modes[mode_i + 1]], |
3101 | GET_MODE_NAME (autodetected_vector_mode)mode_name[autodetected_vector_mode]); |
3102 | mode_i += 1; |
3103 | } |
3104 | mode_i++; |
3105 | |
3106 | if (!res) |
3107 | { |
3108 | delete loop_vinfo; |
3109 | if (fatal) |
3110 | gcc_checking_assert (main_loop_vinfo == NULL)((void)(!(main_loop_vinfo == nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3110, __FUNCTION__), 0 : 0)); |
3111 | return opt_loop_vec_info::propagate_failure (res); |
3112 | } |
3113 | |
3114 | return opt_loop_vec_info::success (loop_vinfo); |
3115 | } |
3116 | |
3117 | /* Function vect_analyze_loop. |
3118 | |
3119 | Apply a set of analyses on LOOP, and create a loop_vec_info struct |
3120 | for it. The different analyses will record information in the |
3121 | loop_vec_info struct. */ |
3122 | opt_loop_vec_info |
3123 | vect_analyze_loop (class loop *loop, vec_info_shared *shared) |
3124 | { |
3125 | DUMP_VECT_SCOPE ("analyze_loop_nest")auto_dump_scope scope ("analyze_loop_nest", vect_location); |
3126 | |
3127 | if (loop_outer (loop) |
3128 | && loop_vec_info_for_loop (loop_outer (loop)) |
3129 | && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))(loop_vec_info_for_loop (loop_outer (loop)))->vectorizable) |
3130 | return opt_loop_vec_info::failure_at (vect_location, |
3131 | "outer-loop already vectorized.\n"); |
3132 | |
3133 | if (!find_loop_nest (loop, &shared->loop_nest)) |
3134 | return opt_loop_vec_info::failure_at |
3135 | (vect_location, |
3136 | "not vectorized: loop nest containing two or more consecutive inner" |
3137 | " loops cannot be vectorized\n"); |
3138 | |
3139 | /* Analyze the loop form. */ |
3140 | vect_loop_form_info loop_form_info; |
3141 | opt_result res = vect_analyze_loop_form (loop, &loop_form_info); |
3142 | if (!res) |
3143 | { |
3144 | if (dump_enabled_p ()) |
3145 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3146 | "bad loop form.\n"); |
3147 | return opt_loop_vec_info::propagate_failure (res); |
3148 | } |
3149 | if (!integer_onep (loop_form_info.assumptions)) |
3150 | { |
3151 | /* We consider to vectorize this loop by versioning it under |
3152 | some assumptions. In order to do this, we need to clear |
3153 | existing information computed by scev and niter analyzer. */ |
3154 | scev_reset_htab (); |
3155 | free_numbers_of_iterations_estimates (loop); |
3156 | /* Also set flag for this loop so that following scev and niter |
3157 | analysis are done under the assumptions. */ |
3158 | loop_constraint_set (loop, LOOP_C_FINITE(1 << 1)); |
3159 | } |
3160 | |
3161 | auto_vector_modes vector_modes; |
3162 | /* Autodetect first vector size we try. */ |
3163 | vector_modes.safe_push (VOIDmode((void) 0, E_VOIDmode)); |
3164 | unsigned int autovec_flags |
3165 | = targetm.vectorize.autovectorize_vector_modes (&vector_modes, |
3166 | loop->simdlen != 0); |
3167 | bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS) |
3168 | && !unlimited_cost_model (loop)); |
3169 | machine_mode autodetected_vector_mode = VOIDmode((void) 0, E_VOIDmode); |
3170 | opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3171 | unsigned int mode_i = 0; |
3172 | unsigned HOST_WIDE_INTlong simdlen = loop->simdlen; |
3173 | |
3174 | /* Keep track of the VF for each mode. Initialize all to 0 which indicates |
3175 | a mode has not been analyzed. */ |
3176 | auto_vec<poly_uint64, 8> cached_vf_per_mode; |
3177 | for (unsigned i = 0; i < vector_modes.length (); ++i) |
3178 | cached_vf_per_mode.safe_push (0); |
3179 | |
3180 | /* First determine the main loop vectorization mode, either the first |
3181 | one that works, starting with auto-detecting the vector mode and then |
3182 | following the targets order of preference, or the one with the |
3183 | lowest cost if pick_lowest_cost_p. */ |
3184 | while (1) |
3185 | { |
3186 | bool fatal; |
3187 | unsigned int last_mode_i = mode_i; |
3188 | /* Set cached VF to -1 prior to analysis, which indicates a mode has |
3189 | failed. */ |
3190 | cached_vf_per_mode[last_mode_i] = -1; |
3191 | opt_loop_vec_info loop_vinfo |
3192 | = vect_analyze_loop_1 (loop, shared, &loop_form_info, |
3193 | NULLnullptr, vector_modes, mode_i, |
3194 | autodetected_vector_mode, fatal); |
3195 | if (fatal) |
3196 | break; |
3197 | |
3198 | if (loop_vinfo) |
3199 | { |
3200 | /* Analyzis has been successful so update the VF value. The |
3201 | VF should always be a multiple of unroll_factor and we want to |
3202 | capture the original VF here. */ |
3203 | cached_vf_per_mode[last_mode_i] |
3204 | = exact_div (LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor, |
3205 | loop_vinfo->suggested_unroll_factor); |
3206 | /* Once we hit the desired simdlen for the first time, |
3207 | discard any previous attempts. */ |
3208 | if (simdlen |
3209 | && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen)(!maybe_ne ((loop_vinfo)->vectorization_factor, simdlen))) |
3210 | { |
3211 | delete first_loop_vinfo; |
3212 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3213 | simdlen = 0; |
3214 | } |
3215 | else if (pick_lowest_cost_p |
3216 | && first_loop_vinfo |
3217 | && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo)) |
3218 | { |
3219 | /* Pick loop_vinfo over first_loop_vinfo. */ |
3220 | delete first_loop_vinfo; |
3221 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3222 | } |
3223 | if (first_loop_vinfo == NULLnullptr) |
3224 | first_loop_vinfo = loop_vinfo; |
3225 | else |
3226 | { |
3227 | delete loop_vinfo; |
3228 | loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3229 | } |
3230 | |
3231 | /* Commit to first_loop_vinfo if we have no reason to try |
3232 | alternatives. */ |
3233 | if (!simdlen && !pick_lowest_cost_p) |
3234 | break; |
3235 | } |
3236 | if (mode_i == vector_modes.length () |
3237 | || autodetected_vector_mode == VOIDmode((void) 0, E_VOIDmode)) |
3238 | break; |
3239 | |
3240 | /* Try the next biggest vector size. */ |
3241 | if (dump_enabled_p ()) |
3242 | dump_printf_loc (MSG_NOTE, vect_location, |
3243 | "***** Re-trying analysis with vector mode %s\n", |
3244 | GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]); |
3245 | } |
3246 | if (!first_loop_vinfo) |
3247 | return opt_loop_vec_info::propagate_failure (res); |
3248 | |
3249 | if (dump_enabled_p ()) |
3250 | dump_printf_loc (MSG_NOTE, vect_location, |
3251 | "***** Choosing vector mode %s\n", |
3252 | GET_MODE_NAME (first_loop_vinfo->vector_mode)mode_name[first_loop_vinfo->vector_mode]); |
3253 | |
3254 | /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is |
3255 | enabled, SIMDUID is not set, it is the innermost loop and we have |
3256 | either already found the loop's SIMDLEN or there was no SIMDLEN to |
3257 | begin with. |
3258 | TODO: Enable epilogue vectorization for loops with SIMDUID set. */ |
3259 | bool vect_epilogues = (!simdlen |
3260 | && loop->inner == NULLnullptr |
3261 | && param_vect_epilogues_nomaskglobal_options.x_param_vect_epilogues_nomask |
3262 | && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)(first_loop_vinfo)->peeling_for_niter |
3263 | && !loop->simduid); |
3264 | if (!vect_epilogues) |
3265 | return first_loop_vinfo; |
3266 | |
3267 | /* Now analyze first_loop_vinfo for epilogue vectorization. */ |
3268 | poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold; |
3269 | |
3270 | /* For epilogues start the analysis from the first mode. The motivation |
3271 | behind starting from the beginning comes from cases where the VECTOR_MODES |
3272 | array may contain length-agnostic and length-specific modes. Their |
3273 | ordering is not guaranteed, so we could end up picking a mode for the main |
3274 | loop that is after the epilogue's optimal mode. */ |
3275 | vector_modes[0] = autodetected_vector_mode; |
3276 | mode_i = 0; |
3277 | |
3278 | bool supports_partial_vectors = |
3279 | partial_vectors_supported_p () && param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0; |
3280 | poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo)(first_loop_vinfo)->vectorization_factor; |
3281 | |
3282 | while (1) |
3283 | { |
3284 | /* If the target does not support partial vectors we can shorten the |
3285 | number of modes to analyze for the epilogue as we know we can't pick a |
3286 | mode that would lead to a VF at least as big as the |
3287 | FIRST_VINFO_VF. */ |
3288 | if (!supports_partial_vectors |
3289 | && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)maybe_le (first_vinfo_vf, cached_vf_per_mode[mode_i])) |
3290 | { |
3291 | mode_i++; |
3292 | if (mode_i == vector_modes.length ()) |
3293 | break; |
3294 | continue; |
3295 | } |
3296 | |
3297 | if (dump_enabled_p ()) |
3298 | dump_printf_loc (MSG_NOTE, vect_location, |
3299 | "***** Re-trying epilogue analysis with vector " |
3300 | "mode %s\n", GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]); |
3301 | |
3302 | bool fatal; |
3303 | opt_loop_vec_info loop_vinfo |
3304 | = vect_analyze_loop_1 (loop, shared, &loop_form_info, |
3305 | first_loop_vinfo, |
3306 | vector_modes, mode_i, |
3307 | autodetected_vector_mode, fatal); |
3308 | if (fatal) |
3309 | break; |
3310 | |
3311 | if (loop_vinfo) |
3312 | { |
3313 | if (pick_lowest_cost_p) |
3314 | { |
3315 | /* Keep trying to roll back vectorization attempts while the |
3316 | loop_vec_infos they produced were worse than this one. */ |
3317 | vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos; |
3318 | while (!vinfos.is_empty () |
3319 | && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ())) |
3320 | { |
3321 | gcc_assert (vect_epilogues)((void)(!(vect_epilogues) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3321, __FUNCTION__), 0 : 0)); |
3322 | delete vinfos.pop (); |
3323 | } |
3324 | } |
3325 | /* For now only allow one epilogue loop. */ |
3326 | if (first_loop_vinfo->epilogue_vinfos.is_empty ()) |
3327 | { |
3328 | first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo); |
3329 | poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold; |
3330 | gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || ( loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo )->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th, 0U)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3331, __FUNCTION__), 0 : 0)) |
3331 | || maybe_ne (lowest_th, 0U))((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || ( loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo )->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th, 0U)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3331, __FUNCTION__), 0 : 0)); |
3332 | /* Keep track of the known smallest versioning |
3333 | threshold. */ |
3334 | if (ordered_p (lowest_th, th)) |
3335 | lowest_th = ordered_min (lowest_th, th); |
3336 | } |
3337 | else |
3338 | { |
3339 | delete loop_vinfo; |
3340 | loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3341 | } |
3342 | |
3343 | /* For now only allow one epilogue loop, but allow |
3344 | pick_lowest_cost_p to replace it, so commit to the |
3345 | first epilogue if we have no reason to try alternatives. */ |
3346 | if (!pick_lowest_cost_p) |
3347 | break; |
3348 | } |
3349 | |
3350 | if (mode_i == vector_modes.length ()) |
3351 | break; |
3352 | |
3353 | } |
3354 | |
3355 | if (!first_loop_vinfo->epilogue_vinfos.is_empty ()) |
3356 | { |
3357 | LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold = lowest_th; |
3358 | if (dump_enabled_p ()) |
3359 | dump_printf_loc (MSG_NOTE, vect_location, |
3360 | "***** Choosing epilogue vector mode %s\n", |
3361 | GET_MODE_NAMEmode_name[first_loop_vinfo->epilogue_vinfos[0]->vector_mode ] |
3362 | (first_loop_vinfo->epilogue_vinfos[0]->vector_mode)mode_name[first_loop_vinfo->epilogue_vinfos[0]->vector_mode ]); |
3363 | } |
3364 | |
3365 | return first_loop_vinfo; |
3366 | } |
3367 | |
3368 | /* Return true if there is an in-order reduction function for CODE, storing |
3369 | it in *REDUC_FN if so. */ |
3370 | |
3371 | static bool |
3372 | fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn) |
3373 | { |
3374 | if (code == PLUS_EXPR) |
3375 | { |
3376 | *reduc_fn = IFN_FOLD_LEFT_PLUS; |
3377 | return true; |
3378 | } |
3379 | return false; |
3380 | } |
3381 | |
3382 | /* Function reduction_fn_for_scalar_code |
3383 | |
3384 | Input: |
3385 | CODE - tree_code of a reduction operations. |
3386 | |
3387 | Output: |
3388 | REDUC_FN - the corresponding internal function to be used to reduce the |
3389 | vector of partial results into a single scalar result, or IFN_LAST |
3390 | if the operation is a supported reduction operation, but does not have |
3391 | such an internal function. |
3392 | |
3393 | Return FALSE if CODE currently cannot be vectorized as reduction. */ |
3394 | |
3395 | bool |
3396 | reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn) |
3397 | { |
3398 | if (code.is_tree_code ()) |
3399 | switch (tree_code (code)) |
3400 | { |
3401 | case MAX_EXPR: |
3402 | *reduc_fn = IFN_REDUC_MAX; |
3403 | return true; |
3404 | |
3405 | case MIN_EXPR: |
3406 | *reduc_fn = IFN_REDUC_MIN; |
3407 | return true; |
3408 | |
3409 | case PLUS_EXPR: |
3410 | *reduc_fn = IFN_REDUC_PLUS; |
3411 | return true; |
3412 | |
3413 | case BIT_AND_EXPR: |
3414 | *reduc_fn = IFN_REDUC_AND; |
3415 | return true; |
3416 | |
3417 | case BIT_IOR_EXPR: |
3418 | *reduc_fn = IFN_REDUC_IOR; |
3419 | return true; |
3420 | |
3421 | case BIT_XOR_EXPR: |
3422 | *reduc_fn = IFN_REDUC_XOR; |
3423 | return true; |
3424 | |
3425 | case MULT_EXPR: |
3426 | case MINUS_EXPR: |
3427 | *reduc_fn = IFN_LAST; |
3428 | return true; |
3429 | |
3430 | default: |
3431 | return false; |
3432 | } |
3433 | else |
3434 | switch (combined_fn (code)) |
3435 | { |
3436 | CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX : case CFN_BUILT_IN_FMAXL: |
3437 | *reduc_fn = IFN_REDUC_FMAX; |
3438 | return true; |
3439 | |
3440 | CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN : case CFN_BUILT_IN_FMINL: |
3441 | *reduc_fn = IFN_REDUC_FMIN; |
3442 | return true; |
3443 | |
3444 | default: |
3445 | return false; |
3446 | } |
3447 | } |
3448 | |
3449 | /* If there is a neutral value X such that a reduction would not be affected |
3450 | by the introduction of additional X elements, return that X, otherwise |
3451 | return null. CODE is the code of the reduction and SCALAR_TYPE is type |
3452 | of the scalar elements. If the reduction has just a single initial value |
3453 | then INITIAL_VALUE is that value, otherwise it is null. */ |
3454 | |
3455 | tree |
3456 | neutral_op_for_reduction (tree scalar_type, code_helper code, |
3457 | tree initial_value) |
3458 | { |
3459 | if (code.is_tree_code ()) |
3460 | switch (tree_code (code)) |
3461 | { |
3462 | case WIDEN_SUM_EXPR: |
3463 | case DOT_PROD_EXPR: |
3464 | case SAD_EXPR: |
3465 | case PLUS_EXPR: |
3466 | case MINUS_EXPR: |
3467 | case BIT_IOR_EXPR: |
3468 | case BIT_XOR_EXPR: |
3469 | return build_zero_cst (scalar_type); |
3470 | |
3471 | case MULT_EXPR: |
3472 | return build_one_cst (scalar_type); |
3473 | |
3474 | case BIT_AND_EXPR: |
3475 | return build_all_ones_cst (scalar_type); |
3476 | |
3477 | case MAX_EXPR: |
3478 | case MIN_EXPR: |
3479 | return initial_value; |
3480 | |
3481 | default: |
3482 | return NULL_TREE(tree) nullptr; |
3483 | } |
3484 | else |
3485 | switch (combined_fn (code)) |
3486 | { |
3487 | CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN : case CFN_BUILT_IN_FMINL: |
3488 | CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX : case CFN_BUILT_IN_FMAXL: |
3489 | return initial_value; |
3490 | |
3491 | default: |
3492 | return NULL_TREE(tree) nullptr; |
3493 | } |
3494 | } |
3495 | |
3496 | /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement |
3497 | STMT is printed with a message MSG. */ |
3498 | |
3499 | static void |
3500 | report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg) |
3501 | { |
3502 | dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt); |
3503 | } |
3504 | |
3505 | /* Return true if we need an in-order reduction for operation CODE |
3506 | on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer |
3507 | overflow must wrap. */ |
3508 | |
3509 | bool |
3510 | needs_fold_left_reduction_p (tree type, code_helper code) |
3511 | { |
3512 | /* CHECKME: check for !flag_finite_math_only too? */ |
3513 | if (SCALAR_FLOAT_TYPE_P (type)(((enum tree_code) (type)->base.code) == REAL_TYPE)) |
3514 | { |
3515 | if (code.is_tree_code ()) |
3516 | switch (tree_code (code)) |
3517 | { |
3518 | case MIN_EXPR: |
3519 | case MAX_EXPR: |
3520 | return false; |
3521 | |
3522 | default: |
3523 | return !flag_associative_mathglobal_options.x_flag_associative_math; |
3524 | } |
3525 | else |
3526 | switch (combined_fn (code)) |
3527 | { |
3528 | CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN : case CFN_BUILT_IN_FMINL: |
3529 | CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX : case CFN_BUILT_IN_FMAXL: |
3530 | return false; |
3531 | |
3532 | default: |
3533 | return !flag_associative_mathglobal_options.x_flag_associative_math; |
3534 | } |
3535 | } |
3536 | |
3537 | if (INTEGRAL_TYPE_P (type)(((enum tree_code) (type)->base.code) == ENUMERAL_TYPE || ( (enum tree_code) (type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (type)->base.code) == INTEGER_TYPE)) |
3538 | return (!code.is_tree_code () |
3539 | || !operation_no_trapping_overflow (type, tree_code (code))); |
3540 | |
3541 | if (SAT_FIXED_POINT_TYPE_P (type)(((enum tree_code) (type)->base.code) == FIXED_POINT_TYPE && ((tree_not_check4 ((type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3541, __FUNCTION__, (RECORD_TYPE), (UNION_TYPE), (QUAL_UNION_TYPE ), (ARRAY_TYPE)))->base.u.bits.saturating_flag))) |
3542 | return true; |
3543 | |
3544 | return false; |
3545 | } |
3546 | |
3547 | /* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and |
3548 | has a handled computation expression. Store the main reduction |
3549 | operation in *CODE. */ |
3550 | |
3551 | static bool |
3552 | check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, |
3553 | tree loop_arg, code_helper *code, |
3554 | vec<std::pair<ssa_op_iter, use_operand_p> > &path) |
3555 | { |
3556 | auto_bitmap visited; |
3557 | tree lookfor = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); |
3558 | ssa_op_iter curri; |
3559 | use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE0x01); |
3560 | while (USE_FROM_PTR (curr)get_use_from_ptr (curr) != loop_arg) |
3561 | curr = op_iter_next_use (&curri); |
3562 | curri.i = curri.numops; |
3563 | do |
3564 | { |
3565 | path.safe_push (std::make_pair (curri, curr)); |
3566 | tree use = USE_FROM_PTR (curr)get_use_from_ptr (curr); |
3567 | if (use == lookfor) |
3568 | break; |
3569 | gimple *def = SSA_NAME_DEF_STMT (use)(tree_check ((use), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3569, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; |
3570 | if (gimple_nop_p (def) |
3571 | || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) |
3572 | { |
3573 | pop: |
3574 | do |
3575 | { |
3576 | std::pair<ssa_op_iter, use_operand_p> x = path.pop (); |
3577 | curri = x.first; |
3578 | curr = x.second; |
3579 | do |
3580 | curr = op_iter_next_use (&curri); |
3581 | /* Skip already visited or non-SSA operands (from iterating |
3582 | over PHI args). */ |
3583 | while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr) |
3584 | && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME |
3585 | || ! bitmap_set_bit (visited, |
3586 | SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3587, __FUNCTION__, (SSA_NAME)))->base.u.version |
3587 | (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3587, __FUNCTION__, (SSA_NAME)))->base.u.version))); |
3588 | } |
3589 | while (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr) && ! path.is_empty ()); |
3590 | if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr)) |
3591 | break; |
3592 | } |
3593 | else |
3594 | { |
3595 | if (gimple_code (def) == GIMPLE_PHI) |
3596 | curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE0x01); |
3597 | else |
3598 | curr = op_iter_init_use (&curri, def, SSA_OP_USE0x01); |
3599 | while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr) |
3600 | && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME |
3601 | || ! bitmap_set_bit (visited, |
3602 | SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3603, __FUNCTION__, (SSA_NAME)))->base.u.version |
3603 | (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3603, __FUNCTION__, (SSA_NAME)))->base.u.version))) |
3604 | curr = op_iter_next_use (&curri); |
3605 | if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr)) |
3606 | goto pop; |
3607 | } |
3608 | } |
3609 | while (1); |
3610 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3611 | { |
3612 | dump_printf_loc (MSG_NOTE, loc, "reduction path: "); |
3613 | unsigned i; |
3614 | std::pair<ssa_op_iter, use_operand_p> *x; |
3615 | FOR_EACH_VEC_ELT (path, i, x)for (i = 0; (path).iterate ((i), &(x)); ++(i)) |
3616 | dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second)get_use_from_ptr (x->second)); |
3617 | dump_printf (MSG_NOTE, "\n"); |
3618 | } |
3619 | |
3620 | /* Check whether the reduction path detected is valid. */ |
3621 | bool fail = path.length () == 0; |
3622 | bool neg = false; |
3623 | int sign = -1; |
3624 | *code = ERROR_MARK; |
3625 | for (unsigned i = 1; i < path.length (); ++i) |
3626 | { |
3627 | gimple *use_stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt; |
3628 | gimple_match_op op; |
3629 | if (!gimple_extract_op (use_stmt, &op)) |
3630 | { |
3631 | fail = true; |
3632 | break; |
3633 | } |
3634 | unsigned int opi = op.num_ops; |
3635 | if (gassign *assign = dyn_cast<gassign *> (use_stmt)) |
3636 | { |
3637 | /* The following make sure we can compute the operand index |
3638 | easily plus it mostly disallows chaining via COND_EXPR condition |
3639 | operands. */ |
3640 | for (opi = 0; opi < op.num_ops; ++opi) |
3641 | if (gimple_assign_rhs1_ptr (assign) + opi == path[i].second->use) |
3642 | break; |
3643 | } |
3644 | else if (gcall *call = dyn_cast<gcall *> (use_stmt)) |
3645 | { |
3646 | for (opi = 0; opi < op.num_ops; ++opi) |
3647 | if (gimple_call_arg_ptr (call, opi) == path[i].second->use) |
3648 | break; |
3649 | } |
3650 | if (opi == op.num_ops) |
3651 | { |
3652 | fail = true; |
3653 | break; |
3654 | } |
3655 | op.code = canonicalize_code (op.code, op.type); |
3656 | if (op.code == MINUS_EXPR) |
3657 | { |
3658 | op.code = PLUS_EXPR; |
3659 | /* Track whether we negate the reduction value each iteration. */ |
3660 | if (op.ops[1] == op.ops[opi]) |
3661 | neg = ! neg; |
3662 | } |
3663 | if (CONVERT_EXPR_CODE_P (op.code)((op.code) == NOP_EXPR || (op.code) == CONVERT_EXPR) |
3664 | && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])((contains_struct_check ((op.ops[0]), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3664, __FUNCTION__))->typed.type))) |
3665 | ; |
3666 | else if (*code == ERROR_MARK) |
3667 | { |
3668 | *code = op.code; |
3669 | sign = TYPE_SIGN (op.type)((signop) ((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3669, __FUNCTION__))->base.u.bits.unsigned_flag)); |
3670 | } |
3671 | else if (op.code != *code) |
3672 | { |
3673 | fail = true; |
3674 | break; |
3675 | } |
3676 | else if ((op.code == MIN_EXPR |
3677 | || op.code == MAX_EXPR) |
3678 | && sign != TYPE_SIGN (op.type)((signop) ((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3678, __FUNCTION__))->base.u.bits.unsigned_flag))) |
3679 | { |
3680 | fail = true; |
3681 | break; |
3682 | } |
3683 | /* Check there's only a single stmt the op is used on. For the |
3684 | not value-changing tail and the last stmt allow out-of-loop uses. |
3685 | ??? We could relax this and handle arbitrary live stmts by |
3686 | forcing a scalar epilogue for example. */ |
3687 | imm_use_iterator imm_iter; |
3688 | gimple *op_use_stmt; |
3689 | unsigned cnt = 0; |
3690 | FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi])for (struct auto_end_imm_use_stmt_traverse auto_end_imm_use_stmt_traverse ((((op_use_stmt) = first_imm_use_stmt (&(imm_iter), (op. ops[opi]))), &(imm_iter))); !end_imm_use_stmt_p (&(imm_iter )); (void) ((op_use_stmt) = next_imm_use_stmt (&(imm_iter )))) |
3691 | if (!is_gimple_debug (op_use_stmt) |
3692 | && (*code != ERROR_MARK |
3693 | || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))) |
3694 | { |
3695 | /* We want to allow x + x but not x < 1 ? x : 2. */ |
3696 | if (is_gimple_assign (op_use_stmt) |
3697 | && gimple_assign_rhs_code (op_use_stmt) == COND_EXPR) |
3698 | { |
3699 | use_operand_p use_p; |
3700 | FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)for ((use_p) = first_imm_use_on_stmt (&(imm_iter)); !end_imm_use_on_stmt_p (&(imm_iter)); (void) ((use_p) = next_imm_use_on_stmt (& (imm_iter)))) |
3701 | cnt++; |
3702 | } |
3703 | else |
3704 | cnt++; |
3705 | } |
3706 | if (cnt != 1) |
3707 | { |
3708 | fail = true; |
3709 | break; |
3710 | } |
3711 | } |
3712 | return ! fail && ! neg && *code != ERROR_MARK; |
3713 | } |
3714 | |
3715 | bool |
3716 | check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, |
3717 | tree loop_arg, enum tree_code code) |
3718 | { |
3719 | auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; |
3720 | code_helper code_; |
3721 | return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path) |
3722 | && code_ == code); |
3723 | } |
3724 | |
3725 | |
3726 | |
3727 | /* Function vect_is_simple_reduction |
3728 | |
3729 | (1) Detect a cross-iteration def-use cycle that represents a simple |
3730 | reduction computation. We look for the following pattern: |
3731 | |
3732 | loop_header: |
3733 | a1 = phi < a0, a2 > |
3734 | a3 = ... |
3735 | a2 = operation (a3, a1) |
3736 | |
3737 | or |
3738 | |
3739 | a3 = ... |
3740 | loop_header: |
3741 | a1 = phi < a0, a2 > |
3742 | a2 = operation (a3, a1) |
3743 | |
3744 | such that: |
3745 | 1. operation is commutative and associative and it is safe to |
3746 | change the order of the computation |
3747 | 2. no uses for a2 in the loop (a2 is used out of the loop) |
3748 | 3. no uses of a1 in the loop besides the reduction operation |
3749 | 4. no uses of a1 outside the loop. |
3750 | |
3751 | Conditions 1,4 are tested here. |
3752 | Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. |
3753 | |
3754 | (2) Detect a cross-iteration def-use cycle in nested loops, i.e., |
3755 | nested cycles. |
3756 | |
3757 | (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double |
3758 | reductions: |
3759 | |
3760 | a1 = phi < a0, a2 > |
3761 | inner loop (def of a3) |
3762 | a2 = phi < a3 > |
3763 | |
3764 | (4) Detect condition expressions, ie: |
3765 | for (int i = 0; i < N; i++) |
3766 | if (a[i] < val) |
3767 | ret_val = a[i]; |
3768 | |
3769 | */ |
3770 | |
3771 | static stmt_vec_info |
3772 | vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, |
3773 | bool *double_reduc, bool *reduc_chain_p, bool slp) |
3774 | { |
3775 | gphi *phi = as_a <gphi *> (phi_info->stmt); |
3776 | gimple *phi_use_stmt = NULLnullptr; |
3777 | imm_use_iterator imm_iter; |
3778 | use_operand_p use_p; |
3779 | |
3780 | *double_reduc = false; |
3781 | *reduc_chain_p = false; |
3782 | STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = TREE_CODE_REDUCTION; |
3783 | |
3784 | tree phi_name = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); |
3785 | /* ??? If there are no uses of the PHI result the inner loop reduction |
3786 | won't be detected as possibly double-reduction by vectorizable_reduction |
3787 | because that tries to walk the PHI arg from the preheader edge which |
3788 | can be constant. See PR60382. */ |
3789 | if (has_zero_uses (phi_name)) |
3790 | return NULLnullptr; |
3791 | class loop *loop = (gimple_bb (phi))->loop_father; |
3792 | unsigned nphi_def_loop_uses = 0; |
3793 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)for ((use_p) = first_readonly_imm_use (&(imm_iter), (phi_name )); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p ) = next_readonly_imm_use (&(imm_iter)))) |
3794 | { |
3795 | gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt; |
3796 | if (is_gimple_debug (use_stmt)) |
3797 | continue; |
3798 | |
3799 | if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) |
3800 | { |
3801 | if (dump_enabled_p ()) |
3802 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3803 | "intermediate value used outside loop.\n"); |
3804 | |
3805 | return NULLnullptr; |
3806 | } |
3807 | |
3808 | nphi_def_loop_uses++; |
3809 | phi_use_stmt = use_stmt; |
3810 | } |
3811 | |
3812 | tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop))gimple_phi_arg_def (((phi)), ((loop_latch_edge (loop))->dest_idx )); |
3813 | if (TREE_CODE (latch_def)((enum tree_code) (latch_def)->base.code) != SSA_NAME) |
3814 | { |
3815 | if (dump_enabled_p ()) |
3816 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3817 | "reduction: not ssa_name: %T\n", latch_def); |
3818 | return NULLnullptr; |
3819 | } |
3820 | |
3821 | stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def); |
3822 | if (!def_stmt_info |
3823 | || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))) |
3824 | return NULLnullptr; |
3825 | |
3826 | bool nested_in_vect_loop |
3827 | = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info)(loop_info)->loop, loop); |
3828 | unsigned nlatch_def_loop_uses = 0; |
3829 | auto_vec<gphi *, 3> lcphis; |
3830 | bool inner_loop_of_double_reduc = false; |
3831 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (latch_def )); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p ) = next_readonly_imm_use (&(imm_iter)))) |
3832 | { |
3833 | gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt; |
3834 | if (is_gimple_debug (use_stmt)) |
3835 | continue; |
3836 | if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) |
3837 | nlatch_def_loop_uses++; |
3838 | else |
3839 | { |
3840 | /* We can have more than one loop-closed PHI. */ |
3841 | lcphis.safe_push (as_a <gphi *> (use_stmt)); |
3842 | if (nested_in_vect_loop |
3843 | && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))(loop_info->lookup_stmt (use_stmt))->def_type |
3844 | == vect_double_reduction_def)) |
3845 | inner_loop_of_double_reduc = true; |
3846 | } |
3847 | } |
3848 | |
3849 | /* If we are vectorizing an inner reduction we are executing that |
3850 | in the original order only in case we are not dealing with a |
3851 | double reduction. */ |
3852 | if (nested_in_vect_loop && !inner_loop_of_double_reduc) |
3853 | { |
3854 | if (dump_enabled_p ()) |
3855 | report_vect_op (MSG_NOTE, def_stmt_info->stmt, |
3856 | "detected nested cycle: "); |
3857 | return def_stmt_info; |
3858 | } |
3859 | |
3860 | /* When the inner loop of a double reduction ends up with more than |
3861 | one loop-closed PHI we have failed to classify alternate such |
3862 | PHIs as double reduction, leading to wrong code. See PR103237. */ |
3863 | if (inner_loop_of_double_reduc && lcphis.length () != 1) |
3864 | { |
3865 | if (dump_enabled_p ()) |
3866 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3867 | "unhandle double reduction\n"); |
3868 | return NULLnullptr; |
3869 | } |
3870 | |
3871 | /* If this isn't a nested cycle or if the nested cycle reduction value |
3872 | is used ouside of the inner loop we cannot handle uses of the reduction |
3873 | value. */ |
3874 | if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1) |
3875 | { |
3876 | if (dump_enabled_p ()) |
3877 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3878 | "reduction used in loop.\n"); |
3879 | return NULLnullptr; |
3880 | } |
3881 | |
3882 | /* If DEF_STMT is a phi node itself, we expect it to have a single argument |
3883 | defined in the inner loop. */ |
3884 | if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt)) |
3885 | { |
3886 | tree op1 = PHI_ARG_DEF (def_stmt, 0)gimple_phi_arg_def ((def_stmt), (0)); |
3887 | if (gimple_phi_num_args (def_stmt) != 1 |
3888 | || TREE_CODE (op1)((enum tree_code) (op1)->base.code) != SSA_NAME) |
3889 | { |
3890 | if (dump_enabled_p ()) |
3891 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3892 | "unsupported phi node definition.\n"); |
3893 | |
3894 | return NULLnullptr; |
3895 | } |
3896 | |
3897 | /* Verify there is an inner cycle composed of the PHI phi_use_stmt |
3898 | and the latch definition op1. */ |
3899 | gimple *def1 = SSA_NAME_DEF_STMT (op1)(tree_check ((op1), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3899, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; |
3900 | if (gimple_bb (def1) |
3901 | && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) |
3902 | && loop->inner |
3903 | && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) |
3904 | && (is_gimple_assign (def1) || is_gimple_call (def1)) |
3905 | && is_a <gphi *> (phi_use_stmt) |
3906 | && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)) |
3907 | && (op1 == PHI_ARG_DEF_FROM_EDGE (phi_use_stmt,gimple_phi_arg_def (((phi_use_stmt)), ((loop_latch_edge (loop ->inner))->dest_idx)) |
3908 | loop_latch_edge (loop->inner))gimple_phi_arg_def (((phi_use_stmt)), ((loop_latch_edge (loop ->inner))->dest_idx)))) |
3909 | { |
3910 | if (dump_enabled_p ()) |
3911 | report_vect_op (MSG_NOTE, def_stmt, |
3912 | "detected double reduction: "); |
3913 | |
3914 | *double_reduc = true; |
3915 | return def_stmt_info; |
3916 | } |
3917 | |
3918 | return NULLnullptr; |
3919 | } |
3920 | |
3921 | /* Look for the expression computing latch_def from then loop PHI result. */ |
3922 | auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; |
3923 | code_helper code; |
3924 | if (check_reduction_path (vect_location, loop, phi, latch_def, &code, |
3925 | path)) |
3926 | { |
3927 | STMT_VINFO_REDUC_CODE (phi_info)(phi_info)->reduc_code = code; |
3928 | if (code == COND_EXPR && !nested_in_vect_loop) |
3929 | STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = COND_REDUCTION; |
3930 | |
3931 | /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP |
3932 | reduction chain for which the additional restriction is that |
3933 | all operations in the chain are the same. */ |
3934 | auto_vec<stmt_vec_info, 8> reduc_chain; |
3935 | unsigned i; |
3936 | bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR; |
3937 | for (i = path.length () - 1; i >= 1; --i) |
3938 | { |
3939 | gimple *stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt; |
3940 | stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt); |
3941 | gimple_match_op op; |
3942 | if (!gimple_extract_op (stmt, &op)) |
3943 | gcc_unreachable ()(fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3943, __FUNCTION__)); |
3944 | if (gassign *assign = dyn_cast<gassign *> (stmt)) |
3945 | STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx |
3946 | = path[i].second->use - gimple_assign_rhs1_ptr (assign); |
3947 | else |
3948 | { |
3949 | gcall *call = as_a<gcall *> (stmt); |
3950 | STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx |
3951 | = path[i].second->use - gimple_call_arg_ptr (call, 0); |
3952 | } |
3953 | bool leading_conversion = (CONVERT_EXPR_CODE_P (op.code)((op.code) == NOP_EXPR || (op.code) == CONVERT_EXPR) |
3954 | && (i == 1 || i == path.length () - 1)); |
3955 | if ((op.code != code && !leading_conversion) |
3956 | /* We can only handle the final value in epilogue |
3957 | generation for reduction chains. */ |
3958 | || (i != 1 && !has_single_use (gimple_get_lhs (stmt)))) |
3959 | is_slp_reduc = false; |
3960 | /* For reduction chains we support a trailing/leading |
3961 | conversions. We do not store those in the actual chain. */ |
3962 | if (leading_conversion) |
3963 | continue; |
3964 | reduc_chain.safe_push (stmt_info); |
3965 | } |
3966 | if (slp && is_slp_reduc && reduc_chain.length () > 1) |
3967 | { |
3968 | for (unsigned i = 0; i < reduc_chain.length () - 1; ++i) |
3969 | { |
3970 | REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3970, __FUNCTION__), 0 : 0)), (reduc_chain[i])->first_element ) = reduc_chain[0]; |
3971 | REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3971, __FUNCTION__), 0 : 0)), (reduc_chain[i])->next_element ) = reduc_chain[i+1]; |
3972 | } |
3973 | REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3973, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->first_element ) = reduc_chain[0]; |
3974 | REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3974, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->next_element ) = NULLnullptr; |
3975 | |
3976 | /* Save the chain for further analysis in SLP detection. */ |
3977 | LOOP_VINFO_REDUCTION_CHAINS (loop_info)(loop_info)->reduction_chains.safe_push (reduc_chain[0]); |
3978 | REDUC_GROUP_SIZE (reduc_chain[0])(((void)(!(!(reduc_chain[0])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3978, __FUNCTION__), 0 : 0)), (reduc_chain[0])->size) = reduc_chain.length (); |
3979 | |
3980 | *reduc_chain_p = true; |
3981 | if (dump_enabled_p ()) |
3982 | dump_printf_loc (MSG_NOTE, vect_location, |
3983 | "reduction: detected reduction chain\n"); |
3984 | } |
3985 | else if (dump_enabled_p ()) |
3986 | dump_printf_loc (MSG_NOTE, vect_location, |
3987 | "reduction: detected reduction\n"); |
3988 | |
3989 | return def_stmt_info; |
3990 | } |
3991 | |
3992 | if (dump_enabled_p ()) |
3993 | dump_printf_loc (MSG_NOTE, vect_location, |
3994 | "reduction: unknown pattern\n"); |
3995 | |
3996 | return NULLnullptr; |
3997 | } |
3998 | |
3999 | /* Estimate the number of peeled epilogue iterations for LOOP_VINFO. |
4000 | PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations, |
4001 | or -1 if not known. */ |
4002 | |
4003 | static int |
4004 | vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue) |
4005 | { |
4006 | int assumed_vf = vect_vf_for_cost (loop_vinfo); |
4007 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) || peel_iters_prologue == -1) |
4008 | { |
4009 | if (dump_enabled_p ()) |
4010 | dump_printf_loc (MSG_NOTE, vect_location, |
4011 | "cost model: epilogue peel iters set to vf/2 " |
4012 | "because loop iterations are unknown .\n"); |
4013 | return assumed_vf / 2; |
4014 | } |
4015 | else |
4016 | { |
4017 | int niters = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4017, __FUNCTION__)))); |
4018 | peel_iters_prologue = MIN (niters, peel_iters_prologue)((niters) < (peel_iters_prologue) ? (niters) : (peel_iters_prologue )); |
4019 | int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf; |
4020 | /* If we need to peel for gaps, but no peeling is required, we have to |
4021 | peel VF iterations. */ |
4022 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps && !peel_iters_epilogue) |
4023 | peel_iters_epilogue = assumed_vf; |
4024 | return peel_iters_epilogue; |
4025 | } |
4026 | } |
4027 | |
4028 | /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ |
4029 | int |
4030 | vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, |
4031 | int *peel_iters_epilogue, |
4032 | stmt_vector_for_cost *scalar_cost_vec, |
4033 | stmt_vector_for_cost *prologue_cost_vec, |
4034 | stmt_vector_for_cost *epilogue_cost_vec) |
4035 | { |
4036 | int retval = 0; |
4037 | |
4038 | *peel_iters_epilogue |
4039 | = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue); |
4040 | |
4041 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) |
4042 | { |
4043 | /* If peeled iterations are known but number of scalar loop |
4044 | iterations are unknown, count a taken branch per peeled loop. */ |
4045 | if (peel_iters_prologue > 0) |
4046 | retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, |
4047 | vect_prologue); |
4048 | if (*peel_iters_epilogue > 0) |
4049 | retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken, |
4050 | vect_epilogue); |
4051 | } |
4052 | |
4053 | stmt_info_for_cost *si; |
4054 | int j; |
4055 | if (peel_iters_prologue) |
4056 | FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j )) |
4057 | retval += record_stmt_cost (prologue_cost_vec, |
4058 | si->count * peel_iters_prologue, |
4059 | si->kind, si->stmt_info, si->misalign, |
4060 | vect_prologue); |
4061 | if (*peel_iters_epilogue) |
4062 | FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j )) |
4063 | retval += record_stmt_cost (epilogue_cost_vec, |
4064 | si->count * *peel_iters_epilogue, |
4065 | si->kind, si->stmt_info, si->misalign, |
4066 | vect_epilogue); |
4067 | |
4068 | return retval; |
4069 | } |
4070 | |
4071 | /* Function vect_estimate_min_profitable_iters |
4072 | |
4073 | Return the number of iterations required for the vector version of the |
4074 | loop to be profitable relative to the cost of the scalar version of the |
4075 | loop. |
4076 | |
4077 | *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold |
4078 | of iterations for vectorization. -1 value means loop vectorization |
4079 | is not profitable. This returned value may be used for dynamic |
4080 | profitability check. |
4081 | |
4082 | *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used |
4083 | for static check against estimated number of iterations. */ |
4084 | |
4085 | static void |
4086 | vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, |
4087 | int *ret_min_profitable_niters, |
4088 | int *ret_min_profitable_estimate, |
4089 | unsigned *suggested_unroll_factor) |
4090 | { |
4091 | int min_profitable_iters; |
4092 | int min_profitable_estimate; |
4093 | int peel_iters_prologue; |
4094 | int peel_iters_epilogue; |
4095 | unsigned vec_inside_cost = 0; |
4096 | int vec_outside_cost = 0; |
4097 | unsigned vec_prologue_cost = 0; |
4098 | unsigned vec_epilogue_cost = 0; |
4099 | int scalar_single_iter_cost = 0; |
4100 | int scalar_outside_cost = 0; |
4101 | int assumed_vf = vect_vf_for_cost (loop_vinfo); |
4102 | int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; |
4103 | vector_costs *target_cost_data = loop_vinfo->vector_costs; |
4104 | |
4105 | /* Cost model disabled. */ |
4106 | if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)) |
4107 | { |
4108 | if (dump_enabled_p ()) |
4109 | dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n"); |
4110 | *ret_min_profitable_niters = 0; |
4111 | *ret_min_profitable_estimate = 0; |
4112 | return; |
4113 | } |
4114 | |
4115 | /* Requires loop versioning tests to handle misalignment. */ |
4116 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)((loop_vinfo)->may_misalign_stmts.length () > 0)) |
4117 | { |
4118 | /* FIXME: Make cost depend on complexity of individual check. */ |
4119 | unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.length (); |
4120 | (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue); |
4121 | if (dump_enabled_p ()) |
4122 | dump_printf (MSG_NOTE, |
4123 | "cost model: Adding cost of checks for loop " |
4124 | "versioning to treat misalignment.\n"); |
4125 | } |
4126 | |
4127 | /* Requires loop versioning with alias checks. */ |
4128 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)((loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0)) |
4129 | { |
4130 | /* FIXME: Make cost depend on complexity of individual check. */ |
4131 | unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.length (); |
4132 | (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue); |
4133 | len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.length (); |
4134 | if (len) |
4135 | /* Count LEN - 1 ANDs and LEN comparisons. */ |
4136 | (void) add_stmt_cost (target_cost_data, len * 2 - 1, |
4137 | scalar_stmt, vect_prologue); |
4138 | len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.length (); |
4139 | if (len) |
4140 | { |
4141 | /* Count LEN - 1 ANDs and LEN comparisons. */ |
4142 | unsigned int nstmts = len * 2 - 1; |
4143 | /* +1 for each bias that needs adding. */ |
4144 | for (unsigned int i = 0; i < len; ++i) |
4145 | if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds[i].unsigned_p) |
4146 | nstmts += 1; |
4147 | (void) add_stmt_cost (target_cost_data, nstmts, |
4148 | scalar_stmt, vect_prologue); |
4149 | } |
4150 | if (dump_enabled_p ()) |
4151 | dump_printf (MSG_NOTE, |
4152 | "cost model: Adding cost of checks for loop " |
4153 | "versioning aliasing.\n"); |
4154 | } |
4155 | |
4156 | /* Requires loop versioning with niter checks. */ |
4157 | if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo)((loop_vinfo)->num_iters_assumptions)) |
4158 | { |
4159 | /* FIXME: Make cost depend on complexity of individual check. */ |
4160 | (void) add_stmt_cost (target_cost_data, 1, vector_stmt, |
4161 | NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
4162 | if (dump_enabled_p ()) |
4163 | dump_printf (MSG_NOTE, |
4164 | "cost model: Adding cost of checks for loop " |
4165 | "versioning niters.\n"); |
4166 | } |
4167 | |
4168 | if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) |
4169 | (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, |
4170 | vect_prologue); |
4171 | |
4172 | /* Count statements in scalar loop. Using this as scalar cost for a single |
4173 | iteration for now. |
4174 | |
4175 | TODO: Add outer loop support. |
4176 | |
4177 | TODO: Consider assigning different costs to different scalar |
4178 | statements. */ |
4179 | |
4180 | scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost (); |
4181 | |
4182 | /* Add additional cost for the peeled instructions in prologue and epilogue |
4183 | loop. (For fully-masked loops there will be no peeling.) |
4184 | |
4185 | FORNOW: If we don't know the value of peel_iters for prologue or epilogue |
4186 | at compile-time - we assume it's vf/2 (the worst would be vf-1). |
4187 | |
4188 | TODO: Build an expression that represents peel_iters for prologue and |
4189 | epilogue to be used in a run-time test. */ |
4190 | |
4191 | bool prologue_need_br_taken_cost = false; |
4192 | bool prologue_need_br_not_taken_cost = false; |
4193 | |
4194 | /* Calculate peel_iters_prologue. */ |
4195 | if (vect_use_loop_mask_for_alignment_p (loop_vinfo)) |
4196 | peel_iters_prologue = 0; |
4197 | else if (npeel < 0) |
4198 | { |
4199 | peel_iters_prologue = assumed_vf / 2; |
4200 | if (dump_enabled_p ()) |
4201 | dump_printf (MSG_NOTE, "cost model: " |
4202 | "prologue peel iters set to vf/2.\n"); |
4203 | |
4204 | /* If peeled iterations are unknown, count a taken branch and a not taken |
4205 | branch per peeled loop. Even if scalar loop iterations are known, |
4206 | vector iterations are not known since peeled prologue iterations are |
4207 | not known. Hence guards remain the same. */ |
4208 | prologue_need_br_taken_cost = true; |
4209 | prologue_need_br_not_taken_cost = true; |
4210 | } |
4211 | else |
4212 | { |
4213 | peel_iters_prologue = npeel; |
4214 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && peel_iters_prologue > 0) |
4215 | /* If peeled iterations are known but number of scalar loop |
4216 | iterations are unknown, count a taken branch per peeled loop. */ |
4217 | prologue_need_br_taken_cost = true; |
4218 | } |
4219 | |
4220 | bool epilogue_need_br_taken_cost = false; |
4221 | bool epilogue_need_br_not_taken_cost = false; |
4222 | |
4223 | /* Calculate peel_iters_epilogue. */ |
4224 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4225 | /* We need to peel exactly one iteration for gaps. */ |
4226 | peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps ? 1 : 0; |
4227 | else if (npeel < 0) |
4228 | { |
4229 | /* If peeling for alignment is unknown, loop bound of main loop |
4230 | becomes unknown. */ |
4231 | peel_iters_epilogue = assumed_vf / 2; |
4232 | if (dump_enabled_p ()) |
4233 | dump_printf (MSG_NOTE, "cost model: " |
4234 | "epilogue peel iters set to vf/2 because " |
4235 | "peeling for alignment is unknown.\n"); |
4236 | |
4237 | /* See the same reason above in peel_iters_prologue calculation. */ |
4238 | epilogue_need_br_taken_cost = true; |
4239 | epilogue_need_br_not_taken_cost = true; |
4240 | } |
4241 | else |
4242 | { |
4243 | peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel); |
4244 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && peel_iters_epilogue > 0) |
4245 | /* If peeled iterations are known but number of scalar loop |
4246 | iterations are unknown, count a taken branch per peeled loop. */ |
4247 | epilogue_need_br_taken_cost = true; |
4248 | } |
4249 | |
4250 | stmt_info_for_cost *si; |
4251 | int j; |
4252 | /* Add costs associated with peel_iters_prologue. */ |
4253 | if (peel_iters_prologue) |
4254 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) |
4255 | { |
4256 | (void) add_stmt_cost (target_cost_data, |
4257 | si->count * peel_iters_prologue, si->kind, |
4258 | si->stmt_info, si->node, si->vectype, |
4259 | si->misalign, vect_prologue); |
4260 | } |
4261 | |
4262 | /* Add costs associated with peel_iters_epilogue. */ |
4263 | if (peel_iters_epilogue) |
4264 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) |
4265 | { |
4266 | (void) add_stmt_cost (target_cost_data, |
4267 | si->count * peel_iters_epilogue, si->kind, |
4268 | si->stmt_info, si->node, si->vectype, |
4269 | si->misalign, vect_epilogue); |
4270 | } |
4271 | |
4272 | /* Add possible cond_branch_taken/cond_branch_not_taken cost. */ |
4273 | |
4274 | if (prologue_need_br_taken_cost) |
4275 | (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, |
4276 | vect_prologue); |
4277 | |
4278 | if (prologue_need_br_not_taken_cost) |
4279 | (void) add_stmt_cost (target_cost_data, 1, |
4280 | cond_branch_not_taken, vect_prologue); |
4281 | |
4282 | if (epilogue_need_br_taken_cost) |
4283 | (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, |
4284 | vect_epilogue); |
4285 | |
4286 | if (epilogue_need_br_not_taken_cost) |
4287 | (void) add_stmt_cost (target_cost_data, 1, |
4288 | cond_branch_not_taken, vect_epilogue); |
4289 | |
4290 | /* Take care of special costs for rgroup controls of partial vectors. */ |
4291 | if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo )->masks.is_empty ())) |
4292 | { |
4293 | /* Calculate how many masks we need to generate. */ |
4294 | unsigned int num_masks = 0; |
4295 | rgroup_controls *rgm; |
4296 | unsigned int num_vectors_m1; |
4297 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)for (num_vectors_m1 = 0; ((loop_vinfo)->masks).iterate ((num_vectors_m1 ), &(rgm)); ++(num_vectors_m1)) |
4298 | if (rgm->type) |
4299 | num_masks += num_vectors_m1 + 1; |
4300 | gcc_assert (num_masks > 0)((void)(!(num_masks > 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4300, __FUNCTION__), 0 : 0)); |
4301 | |
4302 | /* In the worst case, we need to generate each mask in the prologue |
4303 | and in the loop body. One of the loop body mask instructions |
4304 | replaces the comparison in the scalar loop, and since we don't |
4305 | count the scalar comparison against the scalar body, we shouldn't |
4306 | count that vector instruction against the vector body either. |
4307 | |
4308 | Sometimes we can use unpacks instead of generating prologue |
4309 | masks and sometimes the prologue mask will fold to a constant, |
4310 | so the actual prologue cost might be smaller. However, it's |
4311 | simpler and safer to use the worst-case cost; if this ends up |
4312 | being the tie-breaker between vectorizing or not, then it's |
4313 | probably better not to vectorize. */ |
4314 | (void) add_stmt_cost (target_cost_data, num_masks, |
4315 | vector_stmt, NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0, |
4316 | vect_prologue); |
4317 | (void) add_stmt_cost (target_cost_data, num_masks - 1, |
4318 | vector_stmt, NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0, |
4319 | vect_body); |
4320 | } |
4321 | else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo )->lens.is_empty ())) |
4322 | { |
4323 | /* Referring to the functions vect_set_loop_condition_partial_vectors |
4324 | and vect_set_loop_controls_directly, we need to generate each |
4325 | length in the prologue and in the loop body if required. Although |
4326 | there are some possible optimizations, we consider the worst case |
4327 | here. */ |
4328 | |
4329 | bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0); |
4330 | signed char partial_load_store_bias |
4331 | = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)(loop_vinfo)->partial_load_store_bias; |
4332 | bool need_iterate_p |
4333 | = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) |
4334 | && !vect_known_niters_smaller_than_vf (loop_vinfo)); |
4335 | |
4336 | /* Calculate how many statements to be added. */ |
4337 | unsigned int prologue_stmts = 0; |
4338 | unsigned int body_stmts = 0; |
4339 | |
4340 | rgroup_controls *rgc; |
4341 | unsigned int num_vectors_m1; |
4342 | FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)for (num_vectors_m1 = 0; ((loop_vinfo)->lens).iterate ((num_vectors_m1 ), &(rgc)); ++(num_vectors_m1)) |
4343 | if (rgc->type) |
4344 | { |
4345 | /* May need one SHIFT for nitems_total computation. */ |
4346 | unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor; |
4347 | if (nitems != 1 && !niters_known_p) |
4348 | prologue_stmts += 1; |
4349 | |
4350 | /* May need one MAX and one MINUS for wrap around. */ |
4351 | if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc)) |
4352 | prologue_stmts += 2; |
4353 | |
4354 | /* Need one MAX and one MINUS for each batch limit excepting for |
4355 | the 1st one. */ |
4356 | prologue_stmts += num_vectors_m1 * 2; |
4357 | |
4358 | unsigned int num_vectors = num_vectors_m1 + 1; |
4359 | |
4360 | /* Need to set up lengths in prologue, only one MIN required |
4361 | for each since start index is zero. */ |
4362 | prologue_stmts += num_vectors; |
4363 | |
4364 | /* If we have a non-zero partial load bias, we need one PLUS |
4365 | to adjust the load length. */ |
4366 | if (partial_load_store_bias != 0) |
4367 | body_stmts += 1; |
4368 | |
4369 | /* Each may need two MINs and one MINUS to update lengths in body |
4370 | for next iteration. */ |
4371 | if (need_iterate_p) |
4372 | body_stmts += 3 * num_vectors; |
4373 | } |
4374 | |
4375 | (void) add_stmt_cost (target_cost_data, prologue_stmts, |
4376 | scalar_stmt, vect_prologue); |
4377 | (void) add_stmt_cost (target_cost_data, body_stmts, |
4378 | scalar_stmt, vect_body); |
4379 | } |
4380 | |
4381 | /* FORNOW: The scalar outside cost is incremented in one of the |
4382 | following ways: |
4383 | |
4384 | 1. The vectorizer checks for alignment and aliasing and generates |
4385 | a condition that allows dynamic vectorization. A cost model |
4386 | check is ANDED with the versioning condition. Hence scalar code |
4387 | path now has the added cost of the versioning check. |
4388 | |
4389 | if (cost > th & versioning_check) |
4390 | jmp to vector code |
4391 | |
4392 | Hence run-time scalar is incremented by not-taken branch cost. |
4393 | |
4394 | 2. The vectorizer then checks if a prologue is required. If the |
4395 | cost model check was not done before during versioning, it has to |
4396 | be done before the prologue check. |
4397 | |
4398 | if (cost <= th) |
4399 | prologue = scalar_iters |
4400 | if (prologue == 0) |
4401 | jmp to vector code |
4402 | else |
4403 | execute prologue |
4404 | if (prologue == num_iters) |
4405 | go to exit |
4406 | |
4407 | Hence the run-time scalar cost is incremented by a taken branch, |
4408 | plus a not-taken branch, plus a taken branch cost. |
4409 | |
4410 | 3. The vectorizer then checks if an epilogue is required. If the |
4411 | cost model check was not done before during prologue check, it |
4412 | has to be done with the epilogue check. |
4413 | |
4414 | if (prologue == 0) |
4415 | jmp to vector code |
4416 | else |
4417 | execute prologue |
4418 | if (prologue == num_iters) |
4419 | go to exit |
4420 | vector code: |
4421 | if ((cost <= th) | (scalar_iters-prologue-epilogue == 0)) |
4422 | jmp to epilogue |
4423 | |
4424 | Hence the run-time scalar cost should be incremented by 2 taken |
4425 | branches. |
4426 | |
4427 | TODO: The back end may reorder the BBS's differently and reverse |
4428 | conditions/branch directions. Change the estimates below to |
4429 | something more reasonable. */ |
4430 | |
4431 | /* If the number of iterations is known and we do not do versioning, we can |
4432 | decide whether to vectorize at compile time. Hence the scalar version |
4433 | do not carry cost model guard costs. */ |
4434 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) |
4435 | || LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) |
4436 | { |
4437 | /* Cost model check occurs at versioning. */ |
4438 | if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) |
4439 | scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken); |
4440 | else |
4441 | { |
4442 | /* Cost model check occurs at prologue generation. */ |
4443 | if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0) |
4444 | scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken) |
4445 | + vect_get_stmt_cost (cond_branch_not_taken); |
4446 | /* Cost model check occurs at epilogue generation. */ |
4447 | else |
4448 | scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken); |
4449 | } |
4450 | } |
4451 | |
4452 | /* Complete the target-specific cost calculations. */ |
4453 | finish_cost (loop_vinfo->vector_costs, loop_vinfo->scalar_costs, |
4454 | &vec_prologue_cost, &vec_inside_cost, &vec_epilogue_cost, |
4455 | suggested_unroll_factor); |
4456 | |
4457 | if (suggested_unroll_factor && *suggested_unroll_factor > 1 |
4458 | && LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor != MAX_VECTORIZATION_FACTOR2147483647 |
4459 | && !known_le (LOOP_VINFO_VECT_FACTOR (loop_vinfo) *(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo )->vectorization_factor * *suggested_unroll_factor)) |
4460 | *suggested_unroll_factor,(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo )->vectorization_factor * *suggested_unroll_factor)) |
4461 | LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo))(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo )->vectorization_factor * *suggested_unroll_factor))) |
4462 | { |
4463 | if (dump_enabled_p ()) |
4464 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
4465 | "can't unroll as unrolled vectorization factor larger" |
4466 | " than maximum vectorization factor: " |
4467 | HOST_WIDE_INT_PRINT_UNSIGNED"%" "l" "u" "\n", |
4468 | LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor); |
4469 | *suggested_unroll_factor = 1; |
4470 | } |
4471 | |
4472 | vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost); |
4473 | |
4474 | if (dump_enabled_p ()) |
4475 | { |
4476 | dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n"); |
4477 | dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n", |
4478 | vec_inside_cost); |
4479 | dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", |
4480 | vec_prologue_cost); |
4481 | dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", |
4482 | vec_epilogue_cost); |
4483 | dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n", |
4484 | scalar_single_iter_cost); |
4485 | dump_printf (MSG_NOTE, " Scalar outside cost: %d\n", |
4486 | scalar_outside_cost); |
4487 | dump_printf (MSG_NOTE, " Vector outside cost: %d\n", |
4488 | vec_outside_cost); |
4489 | dump_printf (MSG_NOTE, " prologue iterations: %d\n", |
4490 | peel_iters_prologue); |
4491 | dump_printf (MSG_NOTE, " epilogue iterations: %d\n", |
4492 | peel_iters_epilogue); |
4493 | } |
4494 | |
4495 | /* Calculate number of iterations required to make the vector version |
4496 | profitable, relative to the loop bodies only. The following condition |
4497 | must hold true: |
4498 | SIC * niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC |
4499 | where |
4500 | SIC = scalar iteration cost, VIC = vector iteration cost, |
4501 | VOC = vector outside cost, VF = vectorization factor, |
4502 | NPEEL = prologue iterations + epilogue iterations, |
4503 | SOC = scalar outside cost for run time cost model check. */ |
4504 | |
4505 | int saving_per_viter = (scalar_single_iter_cost * assumed_vf |
4506 | - vec_inside_cost); |
4507 | if (saving_per_viter <= 0) |
4508 | { |
4509 | if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->force_vectorize) |
4510 | warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd, |
4511 | "vectorization did not happen for a simd loop"); |
4512 | |
4513 | if (dump_enabled_p ()) |
4514 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
4515 | "cost model: the vector iteration cost = %d " |
4516 | "divided by the scalar iteration cost = %d " |
4517 | "is greater or equal to the vectorization factor = %d" |
4518 | ".\n", |
4519 | vec_inside_cost, scalar_single_iter_cost, assumed_vf); |
4520 | *ret_min_profitable_niters = -1; |
4521 | *ret_min_profitable_estimate = -1; |
4522 | return; |
4523 | } |
4524 | |
4525 | /* ??? The "if" arm is written to handle all cases; see below for what |
4526 | we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ |
4527 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4528 | { |
4529 | /* Rewriting the condition above in terms of the number of |
4530 | vector iterations (vniters) rather than the number of |
4531 | scalar iterations (niters) gives: |
4532 | |
4533 | SIC * (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC |
4534 | |
4535 | <==> vniters * (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC |
4536 | |
4537 | For integer N, X and Y when X > 0: |
4538 | |
4539 | N * X > Y <==> N >= (Y /[floor] X) + 1. */ |
4540 | int outside_overhead = (vec_outside_cost |
4541 | - scalar_single_iter_cost * peel_iters_prologue |
4542 | - scalar_single_iter_cost * peel_iters_epilogue |
4543 | - scalar_outside_cost); |
4544 | /* We're only interested in cases that require at least one |
4545 | vector iteration. */ |
4546 | int min_vec_niters = 1; |
4547 | if (outside_overhead > 0) |
4548 | min_vec_niters = outside_overhead / saving_per_viter + 1; |
4549 | |
4550 | if (dump_enabled_p ()) |
4551 | dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n", |
4552 | min_vec_niters); |
4553 | |
4554 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4555 | { |
4556 | /* Now that we know the minimum number of vector iterations, |
4557 | find the minimum niters for which the scalar cost is larger: |
4558 | |
4559 | SIC * niters > VIC * vniters + VOC - SOC |
4560 | |
4561 | We know that the minimum niters is no more than |
4562 | vniters * VF + NPEEL, but it might be (and often is) less |
4563 | than that if a partial vector iteration is cheaper than the |
4564 | equivalent scalar code. */ |
4565 | int threshold = (vec_inside_cost * min_vec_niters |
4566 | + vec_outside_cost |
4567 | - scalar_outside_cost); |
4568 | if (threshold <= 0) |
4569 | min_profitable_iters = 1; |
4570 | else |
4571 | min_profitable_iters = threshold / scalar_single_iter_cost + 1; |
4572 | } |
4573 | else |
4574 | /* Convert the number of vector iterations into a number of |
4575 | scalar iterations. */ |
4576 | min_profitable_iters = (min_vec_niters * assumed_vf |
4577 | + peel_iters_prologue |
4578 | + peel_iters_epilogue); |
4579 | } |
4580 | else |
4581 | { |
4582 | min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) |
4583 | * assumed_vf |
4584 | - vec_inside_cost * peel_iters_prologue |
4585 | - vec_inside_cost * peel_iters_epilogue); |
4586 | if (min_profitable_iters <= 0) |
4587 | min_profitable_iters = 0; |
4588 | else |
4589 | { |
4590 | min_profitable_iters /= saving_per_viter; |
4591 | |
4592 | if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters) |
4593 | <= (((int) vec_inside_cost * min_profitable_iters) |
4594 | + (((int) vec_outside_cost - scalar_outside_cost) |
4595 | * assumed_vf))) |
4596 | min_profitable_iters++; |
4597 | } |
4598 | } |
4599 | |
4600 | if (dump_enabled_p ()) |
4601 | dump_printf (MSG_NOTE, |
4602 | " Calculated minimum iters for profitability: %d\n", |
4603 | min_profitable_iters); |
4604 | |
4605 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p |
4606 | && min_profitable_iters < (assumed_vf + peel_iters_prologue)) |
4607 | /* We want the vectorized loop to execute at least once. */ |
4608 | min_profitable_iters = assumed_vf + peel_iters_prologue; |
4609 | else if (min_profitable_iters < peel_iters_prologue) |
4610 | /* For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the |
4611 | vectorized loop executes at least once. */ |
4612 | min_profitable_iters = peel_iters_prologue; |
4613 | |
4614 | if (dump_enabled_p ()) |
4615 | dump_printf_loc (MSG_NOTE, vect_location, |
4616 | " Runtime profitability threshold = %d\n", |
4617 | min_profitable_iters); |
4618 | |
4619 | *ret_min_profitable_niters = min_profitable_iters; |
4620 | |
4621 | /* Calculate number of iterations required to make the vector version |
4622 | profitable, relative to the loop bodies only. |
4623 | |
4624 | Non-vectorized variant is SIC * niters and it must win over vector |
4625 | variant on the expected loop trip count. The following condition must hold true: |
4626 | SIC * niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC */ |
4627 | |
4628 | if (vec_outside_cost <= 0) |
4629 | min_profitable_estimate = 0; |
4630 | /* ??? This "else if" arm is written to handle all cases; see below for |
4631 | what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ |
4632 | else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4633 | { |
4634 | /* This is a repeat of the code above, but with + SOC rather |
4635 | than - SOC. */ |
4636 | int outside_overhead = (vec_outside_cost |
4637 | - scalar_single_iter_cost * peel_iters_prologue |
4638 | - scalar_single_iter_cost * peel_iters_epilogue |
4639 | + scalar_outside_cost); |
4640 | int min_vec_niters = 1; |
4641 | if (outside_overhead > 0) |
4642 | min_vec_niters = outside_overhead / saving_per_viter + 1; |
4643 | |
4644 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4645 | { |
4646 | int threshold = (vec_inside_cost * min_vec_niters |
4647 | + vec_outside_cost |
4648 | + scalar_outside_cost); |
4649 | min_profitable_estimate = threshold / scalar_single_iter_cost + 1; |
4650 | } |
4651 | else |
4652 | min_profitable_estimate = (min_vec_niters * assumed_vf |
4653 | + peel_iters_prologue |
4654 | + peel_iters_epilogue); |
4655 | } |
4656 | else |
4657 | { |
4658 | min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) |
4659 | * assumed_vf |
4660 | - vec_inside_cost * peel_iters_prologue |
4661 | - vec_inside_cost * peel_iters_epilogue) |
4662 | / ((scalar_single_iter_cost * assumed_vf) |
4663 | - vec_inside_cost); |
4664 | } |
4665 | min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters)((min_profitable_estimate) > (min_profitable_iters) ? (min_profitable_estimate ) : (min_profitable_iters)); |
4666 | if (dump_enabled_p ()) |
4667 | dump_printf_loc (MSG_NOTE, vect_location, |
4668 | " Static estimate profitability threshold = %d\n", |
4669 | min_profitable_estimate); |
4670 | |
4671 | *ret_min_profitable_estimate = min_profitable_estimate; |
4672 | } |
4673 | |
4674 | /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET |
4675 | vector elements (not bits) for a vector with NELT elements. */ |
4676 | static void |
4677 | calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, |
4678 | vec_perm_builder *sel) |
4679 | { |
4680 | /* The encoding is a single stepped pattern. Any wrap-around is handled |
4681 | by vec_perm_indices. */ |
4682 | sel->new_vector (nelt, 1, 3); |
4683 | for (unsigned int i = 0; i < 3; i++) |
4684 | sel->quick_push (i + offset); |
4685 | } |
4686 | |
4687 | /* Checks whether the target supports whole-vector shifts for vectors of mode |
4688 | MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_ |
4689 | it supports vec_perm_const with masks for all necessary shift amounts. */ |
4690 | static bool |
4691 | have_whole_vector_shift (machine_mode mode) |
4692 | { |
4693 | if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) |
4694 | return true; |
4695 | |
4696 | /* Variable-length vectors should be handled via the optab. */ |
4697 | unsigned int nelt; |
4698 | if (!GET_MODE_NUNITS (mode).is_constant (&nelt)) |
4699 | return false; |
4700 | |
4701 | vec_perm_builder sel; |
4702 | vec_perm_indices indices; |
4703 | for (unsigned int i = nelt / 2; i >= 1; i /= 2) |
4704 | { |
4705 | calc_vec_perm_mask_for_shift (i, nelt, &sel); |
4706 | indices.new_vector (sel, 2, nelt); |
4707 | if (!can_vec_perm_const_p (mode, mode, indices, false)) |
4708 | return false; |
4709 | } |
4710 | return true; |
4711 | } |
4712 | |
4713 | /* Return true if (a) STMT_INFO is a DOT_PROD_EXPR reduction whose |
4714 | multiplication operands have differing signs and (b) we intend |
4715 | to emulate the operation using a series of signed DOT_PROD_EXPRs. |
4716 | See vect_emulate_mixed_dot_prod for the actual sequence used. */ |
4717 | |
4718 | static bool |
4719 | vect_is_emulated_mixed_dot_prod (loop_vec_info loop_vinfo, |
4720 | stmt_vec_info stmt_info) |
4721 | { |
4722 | gassign *assign = dyn_cast<gassign *> (stmt_info->stmt); |
4723 | if (!assign || gimple_assign_rhs_code (assign) != DOT_PROD_EXPR) |
4724 | return false; |
4725 | |
4726 | tree rhs1 = gimple_assign_rhs1 (assign); |
4727 | tree rhs2 = gimple_assign_rhs2 (assign); |
4728 | if (TYPE_SIGN (TREE_TYPE (rhs1))((signop) ((tree_class_check ((((contains_struct_check ((rhs1 ), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4728, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4728, __FUNCTION__))->base.u.bits.unsigned_flag)) == TYPE_SIGN (TREE_TYPE (rhs2))((signop) ((tree_class_check ((((contains_struct_check ((rhs2 ), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4728, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4728, __FUNCTION__))->base.u.bits.unsigned_flag))) |
4729 | return false; |
4730 | |
4731 | stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); |
4732 | gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4732, __FUNCTION__), 0 : 0)); |
4733 | return !directly_supported_p (DOT_PROD_EXPR, |
4734 | STMT_VINFO_REDUC_VECTYPE_IN (reduc_info)(reduc_info)->reduc_vectype_in, |
4735 | optab_vector_mixed_sign); |
4736 | } |
4737 | |
4738 | /* TODO: Close dependency between vect_model_*_cost and vectorizable_* |
4739 | functions. Design better to avoid maintenance issues. */ |
4740 | |
4741 | /* Function vect_model_reduction_cost. |
4742 | |
4743 | Models cost for a reduction operation, including the vector ops |
4744 | generated within the strip-mine loop in some cases, the initial |
4745 | definition before the loop, and the epilogue code that must be generated. */ |
4746 | |
4747 | static void |
4748 | vect_model_reduction_cost (loop_vec_info loop_vinfo, |
4749 | stmt_vec_info stmt_info, internal_fn reduc_fn, |
4750 | vect_reduction_type reduction_type, |
4751 | int ncopies, stmt_vector_for_cost *cost_vec) |
4752 | { |
4753 | int prologue_cost = 0, epilogue_cost = 0, inside_cost = 0; |
4754 | tree vectype; |
4755 | machine_mode mode; |
4756 | class loop *loop = NULLnullptr; |
4757 | |
4758 | if (loop_vinfo) |
4759 | loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
4760 | |
4761 | /* Condition reductions generate two reductions in the loop. */ |
4762 | if (reduction_type == COND_REDUCTION) |
4763 | ncopies *= 2; |
4764 | |
4765 | vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; |
4766 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4766, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); |
4767 | stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); |
4768 | |
4769 | gimple_match_op op; |
4770 | if (!gimple_extract_op (orig_stmt_info->stmt, &op)) |
4771 | gcc_unreachable ()(fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4771, __FUNCTION__)); |
4772 | |
4773 | bool emulated_mixed_dot_prod |
4774 | = vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info); |
4775 | if (reduction_type == EXTRACT_LAST_REDUCTION) |
4776 | /* No extra instructions are needed in the prologue. The loop body |
4777 | operations are costed in vectorizable_condition. */ |
4778 | inside_cost = 0; |
4779 | else if (reduction_type == FOLD_LEFT_REDUCTION) |
4780 | { |
4781 | /* No extra instructions needed in the prologue. */ |
4782 | prologue_cost = 0; |
4783 | |
4784 | if (reduc_fn != IFN_LAST) |
4785 | /* Count one reduction-like operation per vector. */ |
4786 | inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar, |
4787 | stmt_info, 0, vect_body); |
4788 | else |
4789 | { |
4790 | /* Use NELEMENTS extracts and NELEMENTS scalar ops. */ |
4791 | unsigned int nelements = ncopies * vect_nunits_for_cost (vectype); |
4792 | inside_cost = record_stmt_cost (cost_vec, nelements, |
4793 | vec_to_scalar, stmt_info, 0, |
4794 | vect_body); |
4795 | inside_cost += record_stmt_cost (cost_vec, nelements, |
4796 | scalar_stmt, stmt_info, 0, |
4797 | vect_body); |
4798 | } |
4799 | } |
4800 | else |
4801 | { |
4802 | /* Add in the cost of the initial definitions. */ |
4803 | int prologue_stmts; |
4804 | if (reduction_type == COND_REDUCTION) |
4805 | /* For cond reductions we have four vectors: initial index, step, |
4806 | initial result of the data reduction, initial value of the index |
4807 | reduction. */ |
4808 | prologue_stmts = 4; |
4809 | else if (emulated_mixed_dot_prod) |
4810 | /* We need the initial reduction value and two invariants: |
4811 | one that contains the minimum signed value and one that |
4812 | contains half of its negative. */ |
4813 | prologue_stmts = 3; |
4814 | else |
4815 | prologue_stmts = 1; |
4816 | prologue_cost += record_stmt_cost (cost_vec, prologue_stmts, |
4817 | scalar_to_vec, stmt_info, 0, |
4818 | vect_prologue); |
4819 | } |
4820 | |
4821 | /* Determine cost of epilogue code. |
4822 | |
4823 | We have a reduction operator that will reduce the vector in one statement. |
4824 | Also requires scalar extract. */ |
4825 | |
4826 | if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info)) |
4827 | { |
4828 | if (reduc_fn != IFN_LAST) |
4829 | { |
4830 | if (reduction_type == COND_REDUCTION) |
4831 | { |
4832 | /* An EQ stmt and an COND_EXPR stmt. */ |
4833 | epilogue_cost += record_stmt_cost (cost_vec, 2, |
4834 | vector_stmt, stmt_info, 0, |
4835 | vect_epilogue); |
4836 | /* Reduction of the max index and a reduction of the found |
4837 | values. */ |
4838 | epilogue_cost += record_stmt_cost (cost_vec, 2, |
4839 | vec_to_scalar, stmt_info, 0, |
4840 | vect_epilogue); |
4841 | /* A broadcast of the max value. */ |
4842 | epilogue_cost += record_stmt_cost (cost_vec, 1, |
4843 | scalar_to_vec, stmt_info, 0, |
4844 | vect_epilogue); |
4845 | } |
4846 | else |
4847 | { |
4848 | epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, |
4849 | stmt_info, 0, vect_epilogue); |
4850 | epilogue_cost += record_stmt_cost (cost_vec, 1, |
4851 | vec_to_scalar, stmt_info, 0, |
4852 | vect_epilogue); |
4853 | } |
4854 | } |
4855 | else if (reduction_type == COND_REDUCTION) |
4856 | { |
4857 | unsigned estimated_nunits = vect_nunits_for_cost (vectype); |
4858 | /* Extraction of scalar elements. */ |
4859 | epilogue_cost += record_stmt_cost (cost_vec, |
4860 | 2 * estimated_nunits, |
4861 | vec_to_scalar, stmt_info, 0, |
4862 | vect_epilogue); |
4863 | /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ |
4864 | epilogue_cost += record_stmt_cost (cost_vec, |
4865 | 2 * estimated_nunits - 3, |
4866 | scalar_stmt, stmt_info, 0, |
4867 | vect_epilogue); |
4868 | } |
4869 | else if (reduction_type == EXTRACT_LAST_REDUCTION |
4870 | || reduction_type == FOLD_LEFT_REDUCTION) |
4871 | /* No extra instructions need in the epilogue. */ |
4872 | ; |
4873 | else |
4874 | { |
4875 | int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)((tree_class_check ((vectype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4875, __FUNCTION__))->type_common.size)); |
4876 | tree bitsize = TYPE_SIZE (op.type)((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4876, __FUNCTION__))->type_common.size); |
4877 | int element_bitsize = tree_to_uhwi (bitsize); |
4878 | int nelements = vec_size_in_bits / element_bitsize; |
4879 | |
4880 | if (op.code == COND_EXPR) |
4881 | op.code = MAX_EXPR; |
4882 | |
4883 | /* We have a whole vector shift available. */ |
4884 | if (VECTOR_MODE_P (mode)(((enum mode_class) mode_class[mode]) == MODE_VECTOR_BOOL || ( (enum mode_class) mode_class[mode]) == MODE_VECTOR_INT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_UACCUM) |
4885 | && directly_supported_p (op.code, vectype) |
4886 | && have_whole_vector_shift (mode)) |
4887 | { |
4888 | /* Final reduction via vector shifts and the reduction operator. |
4889 | Also requires scalar extract. */ |
4890 | epilogue_cost += record_stmt_cost (cost_vec, |
4891 | exact_log2 (nelements) * 2, |
4892 | vector_stmt, stmt_info, 0, |
4893 | vect_epilogue); |
4894 | epilogue_cost += record_stmt_cost (cost_vec, 1, |
4895 | vec_to_scalar, stmt_info, 0, |
4896 | vect_epilogue); |
4897 | } |
4898 | else |
4899 | /* Use extracts and reduction op for final reduction. For N |
4900 | elements, we have N extracts and N-1 reduction ops. */ |
4901 | epilogue_cost += record_stmt_cost (cost_vec, |
4902 | nelements + nelements - 1, |
4903 | vector_stmt, stmt_info, 0, |
4904 | vect_epilogue); |
4905 | } |
4906 | } |
4907 | |
4908 | if (dump_enabled_p ()) |
4909 | dump_printf (MSG_NOTE, |
4910 | "vect_model_reduction_cost: inside_cost = %d, " |
4911 | "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost, |
4912 | prologue_cost, epilogue_cost); |
4913 | } |
4914 | |
4915 | /* SEQ is a sequence of instructions that initialize the reduction |
4916 | described by REDUC_INFO. Emit them in the appropriate place. */ |
4917 | |
4918 | static void |
4919 | vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo, |
4920 | stmt_vec_info reduc_info, gimple *seq) |
4921 | { |
4922 | if (reduc_info->reused_accumulator) |
4923 | { |
4924 | /* When reusing an accumulator from the main loop, we only need |
4925 | initialization instructions if the main loop can be skipped. |
4926 | In that case, emit the initialization instructions at the end |
4927 | of the guard block that does the skip. */ |
4928 | edge skip_edge = loop_vinfo->skip_main_loop_edge; |
4929 | gcc_assert (skip_edge)((void)(!(skip_edge) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4929, __FUNCTION__), 0 : 0)); |
4930 | gimple_stmt_iterator gsi = gsi_last_bb (skip_edge->src); |
4931 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); |
4932 | } |
4933 | else |
4934 | { |
4935 | /* The normal case: emit the initialization instructions on the |
4936 | preheader edge. */ |
4937 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
4938 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), seq); |
4939 | } |
4940 | } |
4941 | |
4942 | /* Function get_initial_def_for_reduction |
4943 | |
4944 | Input: |
4945 | REDUC_INFO - the info_for_reduction |
4946 | INIT_VAL - the initial value of the reduction variable |
4947 | NEUTRAL_OP - a value that has no effect on the reduction, as per |
4948 | neutral_op_for_reduction |
4949 | |
4950 | Output: |
4951 | Return a vector variable, initialized according to the operation that |
4952 | STMT_VINFO performs. This vector will be used as the initial value |
4953 | of the vector of partial results. |
4954 | |
4955 | The value we need is a vector in which element 0 has value INIT_VAL |
4956 | and every other element has value NEUTRAL_OP. */ |
4957 | |
4958 | static tree |
4959 | get_initial_def_for_reduction (loop_vec_info loop_vinfo, |
4960 | stmt_vec_info reduc_info, |
4961 | tree init_val, tree neutral_op) |
4962 | { |
4963 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
4964 | tree scalar_type = TREE_TYPE (init_val)((contains_struct_check ((init_val), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4964, __FUNCTION__))->typed.type); |
4965 | tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); |
4966 | tree init_def; |
4967 | gimple_seq stmts = NULLnullptr; |
4968 | |
4969 | gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4969, __FUNCTION__), 0 : 0)); |
4970 | |
4971 | gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE || ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE || ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE )) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4972, __FUNCTION__), 0 : 0)) |
4972 | || SCALAR_FLOAT_TYPE_P (scalar_type))((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE || ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE || ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE )) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4972, __FUNCTION__), 0 : 0)); |
4973 | |
4974 | gcc_assert (nested_in_vect_loop_p (loop, reduc_info)((void)(!(nested_in_vect_loop_p (loop, reduc_info) || loop == (gimple_bb (reduc_info->stmt))->loop_father) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4975, __FUNCTION__), 0 : 0)) |
4975 | || loop == (gimple_bb (reduc_info->stmt))->loop_father)((void)(!(nested_in_vect_loop_p (loop, reduc_info) || loop == (gimple_bb (reduc_info->stmt))->loop_father) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4975, __FUNCTION__), 0 : 0)); |
4976 | |
4977 | if (operand_equal_p (init_val, neutral_op)) |
4978 | { |
4979 | /* If both elements are equal then the vector described above is |
4980 | just a splat. */ |
4981 | neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4981, __FUNCTION__))->typed.type), neutral_op); |
4982 | init_def = gimple_build_vector_from_val (&stmts, vectype, neutral_op); |
4983 | } |
4984 | else |
4985 | { |
4986 | neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4986, __FUNCTION__))->typed.type), neutral_op); |
4987 | init_val = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4987, __FUNCTION__))->typed.type), init_val); |
4988 | if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) |
4989 | { |
4990 | /* Construct a splat of NEUTRAL_OP and insert INIT_VAL into |
4991 | element 0. */ |
4992 | init_def = gimple_build_vector_from_val (&stmts, vectype, |
4993 | neutral_op); |
4994 | init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, |
4995 | vectype, init_def, init_val); |
4996 | } |
4997 | else |
4998 | { |
4999 | /* Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. */ |
5000 | tree_vector_builder elts (vectype, 1, 2); |
5001 | elts.quick_push (init_val); |
5002 | elts.quick_push (neutral_op); |
5003 | init_def = gimple_build_vector (&stmts, &elts); |
5004 | } |
5005 | } |
5006 | |
5007 | if (stmts) |
5008 | vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, stmts); |
5009 | return init_def; |
5010 | } |
5011 | |
5012 | /* Get at the initial defs for the reduction PHIs for REDUC_INFO, |
5013 | which performs a reduction involving GROUP_SIZE scalar statements. |
5014 | NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP |
5015 | is nonnull, introducing extra elements of that value will not change the |
5016 | result. */ |
5017 | |
5018 | static void |
5019 | get_initial_defs_for_reduction (loop_vec_info loop_vinfo, |
5020 | stmt_vec_info reduc_info, |
5021 | vec<tree> *vec_oprnds, |
5022 | unsigned int number_of_vectors, |
5023 | unsigned int group_size, tree neutral_op) |
5024 | { |
5025 | vec<tree> &initial_values = reduc_info->reduc_initial_values; |
5026 | unsigned HOST_WIDE_INTlong nunits; |
5027 | unsigned j, number_of_places_left_in_vector; |
5028 | tree vector_type = STMT_VINFO_VECTYPE (reduc_info)(reduc_info)->vectype; |
5029 | unsigned int i; |
5030 | |
5031 | gcc_assert (group_size == initial_values.length () || neutral_op)((void)(!(group_size == initial_values.length () || neutral_op ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5031, __FUNCTION__), 0 : 0)); |
5032 | |
5033 | /* NUMBER_OF_COPIES is the number of times we need to use the same values in |
5034 | created vectors. It is greater than 1 if unrolling is performed. |
5035 | |
5036 | For example, we have two scalar operands, s1 and s2 (e.g., group of |
5037 | strided accesses of size two), while NUNITS is four (i.e., four scalars |
5038 | of this type can be packed in a vector). The output vector will contain |
5039 | two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES |
5040 | will be 2). |
5041 | |
5042 | If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several |
5043 | vectors containing the operands. |
5044 | |
5045 | For example, NUNITS is four as before, and the group size is 8 |
5046 | (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and |
5047 | {s5, s6, s7, s8}. */ |
5048 | |
5049 | if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) |
5050 | nunits = group_size; |
5051 | |
5052 | number_of_places_left_in_vector = nunits; |
5053 | bool constant_p = true; |
5054 | tree_vector_builder elts (vector_type, nunits, 1); |
5055 | elts.quick_grow (nunits); |
5056 | gimple_seq ctor_seq = NULLnullptr; |
5057 | for (j = 0; j < nunits * number_of_vectors; ++j) |
5058 | { |
5059 | tree op; |
5060 | i = j % group_size; |
5061 | |
5062 | /* Get the def before the loop. In reduction chain we have only |
5063 | one initial value. Else we have as many as PHIs in the group. */ |
5064 | if (i >= initial_values.length () || (j > i && neutral_op)) |
5065 | op = neutral_op; |
5066 | else |
5067 | op = initial_values[i]; |
5068 | |
5069 | /* Create 'vect_ = {op0,op1,...,opn}'. */ |
5070 | number_of_places_left_in_vector--; |
5071 | elts[nunits - number_of_places_left_in_vector - 1] = op; |
5072 | if (!CONSTANT_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_constant)) |
5073 | constant_p = false; |
5074 | |
5075 | if (number_of_places_left_in_vector == 0) |
5076 | { |
5077 | tree init; |
5078 | if (constant_p && !neutral_op |
5079 | ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) |
5080 | : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), nunits))) |
5081 | /* Build the vector directly from ELTS. */ |
5082 | init = gimple_build_vector (&ctor_seq, &elts); |
5083 | else if (neutral_op) |
5084 | { |
5085 | /* Build a vector of the neutral value and shift the |
5086 | other elements into place. */ |
5087 | init = gimple_build_vector_from_val (&ctor_seq, vector_type, |
5088 | neutral_op); |
5089 | int k = nunits; |
5090 | while (k > 0 && elts[k - 1] == neutral_op) |
5091 | k -= 1; |
5092 | while (k > 0) |
5093 | { |
5094 | k -= 1; |
5095 | init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, |
5096 | vector_type, init, elts[k]); |
5097 | } |
5098 | } |
5099 | else |
5100 | { |
5101 | /* First time round, duplicate ELTS to fill the |
5102 | required number of vectors. */ |
5103 | duplicate_and_interleave (loop_vinfo, &ctor_seq, vector_type, |
5104 | elts, number_of_vectors, *vec_oprnds); |
5105 | break; |
5106 | } |
5107 | vec_oprnds->quick_push (init); |
5108 | |
5109 | number_of_places_left_in_vector = nunits; |
5110 | elts.new_vector (vector_type, nunits, 1); |
5111 | elts.quick_grow (nunits); |
5112 | constant_p = true; |
5113 | } |
5114 | } |
5115 | if (ctor_seq != NULLnullptr) |
5116 | vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq); |
5117 | } |
5118 | |
5119 | /* For a statement STMT_INFO taking part in a reduction operation return |
5120 | the stmt_vec_info the meta information is stored on. */ |
5121 | |
5122 | stmt_vec_info |
5123 | info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) |
5124 | { |
5125 | stmt_info = vect_orig_stmt (stmt_info); |
5126 | gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info))((void)(!((stmt_info)->reduc_def) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5126, __FUNCTION__), 0 : 0)); |
5127 | if (!is_a <gphi *> (stmt_info->stmt) |
5128 | || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) |
5129 | stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def; |
5130 | gphi *phi = as_a <gphi *> (stmt_info->stmt); |
5131 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) |
5132 | { |
5133 | if (gimple_phi_num_args (phi) == 1) |
5134 | stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def; |
5135 | } |
5136 | else if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle) |
5137 | { |
5138 | stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi)); |
5139 | if (info && STMT_VINFO_DEF_TYPE (info)(info)->def_type == vect_double_reduction_def) |
5140 | stmt_info = info; |
5141 | } |
5142 | return stmt_info; |
5143 | } |
5144 | |
5145 | /* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that |
5146 | REDUC_INFO can build on. Adjust REDUC_INFO and return true if so, otherwise |
5147 | return false. */ |
5148 | |
5149 | static bool |
5150 | vect_find_reusable_accumulator (loop_vec_info loop_vinfo, |
5151 | stmt_vec_info reduc_info) |
5152 | { |
5153 | loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; |
5154 | if (!main_loop_vinfo) |
5155 | return false; |
5156 | |
5157 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type != TREE_CODE_REDUCTION) |
5158 | return false; |
5159 | |
5160 | unsigned int num_phis = reduc_info->reduc_initial_values.length (); |
5161 | auto_vec<tree, 16> main_loop_results (num_phis); |
5162 | auto_vec<tree, 16> initial_values (num_phis); |
5163 | if (edge main_loop_edge = loop_vinfo->main_loop_edge) |
5164 | { |
5165 | /* The epilogue loop can be entered either from the main loop or |
5166 | from an earlier guard block. */ |
5167 | edge skip_edge = loop_vinfo->skip_main_loop_edge; |
5168 | for (tree incoming_value : reduc_info->reduc_initial_values) |
5169 | { |
5170 | /* Look for: |
5171 | |
5172 | INCOMING_VALUE = phi<MAIN_LOOP_RESULT(main loop), |
5173 | INITIAL_VALUE(guard block)>. */ |
5174 | gcc_assert (TREE_CODE (incoming_value) == SSA_NAME)((void)(!(((enum tree_code) (incoming_value)->base.code) == SSA_NAME) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5174, __FUNCTION__), 0 : 0)); |
5175 | |
5176 | gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (incoming_value)(tree_check ((incoming_value), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5176, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt); |
5177 | gcc_assert (gimple_bb (phi) == main_loop_edge->dest)((void)(!(gimple_bb (phi) == main_loop_edge->dest) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5177, __FUNCTION__), 0 : 0)); |
5178 | |
5179 | tree from_main_loop = PHI_ARG_DEF_FROM_EDGE (phi, main_loop_edge)gimple_phi_arg_def (((phi)), ((main_loop_edge)->dest_idx)); |
5180 | tree from_skip = PHI_ARG_DEF_FROM_EDGE (phi, skip_edge)gimple_phi_arg_def (((phi)), ((skip_edge)->dest_idx)); |
5181 | |
5182 | main_loop_results.quick_push (from_main_loop); |
5183 | initial_values.quick_push (from_skip); |
5184 | } |
5185 | } |
5186 | else |
5187 | /* The main loop dominates the epilogue loop. */ |
5188 | main_loop_results.splice (reduc_info->reduc_initial_values); |
5189 | |
5190 | /* See if the main loop has the kind of accumulator we need. */ |
5191 | vect_reusable_accumulator *accumulator |
5192 | = main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]); |
5193 | if (!accumulator |
5194 | || num_phis != accumulator->reduc_info->reduc_scalar_results.length () |
5195 | || !std::equal (main_loop_results.begin (), main_loop_results.end (), |
5196 | accumulator->reduc_info->reduc_scalar_results.begin ())) |
5197 | return false; |
5198 | |
5199 | /* Handle the case where we can reduce wider vectors to narrower ones. */ |
5200 | tree vectype = STMT_VINFO_VECTYPE (reduc_info)(reduc_info)->vectype; |
5201 | tree old_vectype = TREE_TYPE (accumulator->reduc_input)((contains_struct_check ((accumulator->reduc_input), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5201, __FUNCTION__))->typed.type); |
5202 | unsigned HOST_WIDE_INTlong m; |
5203 | if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype), |
5204 | TYPE_VECTOR_SUBPARTS (vectype), &m)) |
5205 | return false; |
5206 | /* Check the intermediate vector types and operations are available. */ |
5207 | tree prev_vectype = old_vectype; |
5208 | poly_uint64 intermediate_nunits = TYPE_VECTOR_SUBPARTS (old_vectype); |
5209 | while (known_gt (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype))(!maybe_le (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype )))) |
5210 | { |
5211 | intermediate_nunits = exact_div (intermediate_nunits, 2); |
5212 | tree intermediate_vectype = get_related_vectype_for_scalar_type |
5213 | (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5213, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5213, __FUNCTION__))->typed.type), intermediate_nunits); |
5214 | if (!intermediate_vectype |
5215 | || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code, |
5216 | intermediate_vectype) |
5217 | || !can_vec_extract (TYPE_MODE (prev_vectype)((((enum tree_code) ((tree_class_check ((prev_vectype), (tcc_type ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5217, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (prev_vectype) : (prev_vectype)->type_common.mode), |
5218 | TYPE_MODE (intermediate_vectype)((((enum tree_code) ((tree_class_check ((intermediate_vectype ), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5218, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (intermediate_vectype) : (intermediate_vectype)->type_common .mode))) |
5219 | return false; |
5220 | prev_vectype = intermediate_vectype; |
5221 | } |
5222 | |
5223 | /* Non-SLP reductions might apply an adjustment after the reduction |
5224 | operation, in order to simplify the initialization of the accumulator. |
5225 | If the epilogue loop carries on from where the main loop left off, |
5226 | it should apply the same adjustment to the final reduction result. |
5227 | |
5228 | If the epilogue loop can also be entered directly (rather than via |
5229 | the main loop), we need to be able to handle that case in the same way, |
5230 | with the same adjustment. (In principle we could add a PHI node |
5231 | to select the correct adjustment, but in practice that shouldn't be |
5232 | necessary.) */ |
5233 | tree main_adjustment |
5234 | = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info)(accumulator->reduc_info)->reduc_epilogue_adjustment; |
5235 | if (loop_vinfo->main_loop_edge && main_adjustment) |
5236 | { |
5237 | gcc_assert (num_phis == 1)((void)(!(num_phis == 1) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5237, __FUNCTION__), 0 : 0)); |
5238 | tree initial_value = initial_values[0]; |
5239 | /* Check that we can use INITIAL_VALUE as the adjustment and |
5240 | initialize the accumulator with a neutral value instead. */ |
5241 | if (!operand_equal_p (initial_value, main_adjustment)) |
5242 | return false; |
5243 | code_helper code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code; |
5244 | initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value)((contains_struct_check ((initial_value), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5244, __FUNCTION__))->typed.type), |
5245 | code, initial_value); |
5246 | } |
5247 | STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment = main_adjustment; |
5248 | reduc_info->reduc_initial_values.truncate (0); |
5249 | reduc_info->reduc_initial_values.splice (initial_values); |
5250 | reduc_info->reused_accumulator = accumulator; |
5251 | return true; |
5252 | } |
5253 | |
5254 | /* Reduce the vector VEC_DEF down to VECTYPE with reduction operation |
5255 | CODE emitting stmts before GSI. Returns a vector def of VECTYPE. */ |
5256 | |
5257 | static tree |
5258 | vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code, |
5259 | gimple_seq *seq) |
5260 | { |
5261 | unsigned nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_def)((contains_struct_check ((vec_def), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5261, __FUNCTION__))->typed.type)).to_constant (); |
5262 | unsigned nunits1 = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); |
5263 | tree stype = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5263, __FUNCTION__))->typed.type); |
5264 | tree new_temp = vec_def; |
5265 | while (nunits > nunits1) |
5266 | { |
5267 | nunits /= 2; |
5268 | tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5268, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), |
5269 | stype, nunits); |
5270 | unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5270, __FUNCTION__))->type_common.size)); |
5271 | |
5272 | /* The target has to make sure we support lowpart/highpart |
5273 | extraction, either via direct vector extract or through |
5274 | an integer mode punning. */ |
5275 | tree dst1, dst2; |
5276 | gimple *epilog_stmt; |
5277 | if (convert_optab_handler (vec_extract_optab, |
5278 | TYPE_MODE (TREE_TYPE (new_temp))((((enum tree_code) ((tree_class_check ((((contains_struct_check ((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5278, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5278, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (((contains_struct_check ((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5278, __FUNCTION__))->typed.type)) : (((contains_struct_check ((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5278, __FUNCTION__))->typed.type))->type_common.mode), |
5279 | TYPE_MODE (vectype1)((((enum tree_code) ((tree_class_check ((vectype1), (tcc_type ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5279, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype1) : (vectype1)->type_common.mode)) |
5280 | != CODE_FOR_nothing) |
5281 | { |
5282 | /* Extract sub-vectors directly once vec_extract becomes |
5283 | a conversion optab. */ |
5284 | dst1 = make_ssa_name (vectype1); |
5285 | epilog_stmt |
5286 | = gimple_build_assign (dst1, BIT_FIELD_REF, |
5287 | build3 (BIT_FIELD_REF, vectype1, |
5288 | new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5288, __FUNCTION__))->type_common.size), |
5289 | bitsize_int (0)size_int_kind (0, stk_bitsizetype))); |
5290 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); |
5291 | dst2 = make_ssa_name (vectype1); |
5292 | epilog_stmt |
5293 | = gimple_build_assign (dst2, BIT_FIELD_REF, |
5294 | build3 (BIT_FIELD_REF, vectype1, |
5295 | new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5295, __FUNCTION__))->type_common.size), |
5296 | bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype))); |
5297 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); |
5298 | } |
5299 | else |
5300 | { |
5301 | /* Extract via punning to appropriately sized integer mode |
5302 | vector. */ |
5303 | tree eltype = build_nonstandard_integer_type (bitsize, 1); |
5304 | tree etype = build_vector_type (eltype, 2); |
5305 | gcc_assert (convert_optab_handler (vec_extract_optab,((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5308, __FUNCTION__), 0 : 0)) |
5306 | TYPE_MODE (etype),((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5308, __FUNCTION__), 0 : 0)) |
5307 | TYPE_MODE (eltype))((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5308, __FUNCTION__), 0 : 0)) |
5308 | != CODE_FOR_nothing)((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5308, __FUNCTION__), 0 : 0)); |
5309 | tree tem = make_ssa_name (etype); |
5310 | epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR, |
5311 | build1 (VIEW_CONVERT_EXPR, |
5312 | etype, new_temp)); |
5313 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); |
5314 | new_temp = tem; |
5315 | tem = make_ssa_name (eltype); |
5316 | epilog_stmt |
5317 | = gimple_build_assign (tem, BIT_FIELD_REF, |
5318 | build3 (BIT_FIELD_REF, eltype, |
5319 | new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5319, __FUNCTION__))->type_common.size), |
5320 | bitsize_int (0)size_int_kind (0, stk_bitsizetype))); |
5321 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); |
5322 | dst1 = make_ssa_name (vectype1); |
5323 | epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR, |
5324 | build1 (VIEW_CONVERT_EXPR, |
5325 | vectype1, tem)); |
5326 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); |
5327 | tem = make_ssa_name (eltype); |
5328 | epilog_stmt |
5329 | = gimple_build_assign (tem, BIT_FIELD_REF, |
5330 | build3 (BIT_FIELD_REF, eltype, |
5331 | new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5331, __FUNCTION__))->type_common.size), |
5332 | bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype))); |
5333 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); |
5334 | dst2 = make_ssa_name (vectype1); |
5335 | epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR, |
5336 | build1 (VIEW_CONVERT_EXPR, |
5337 | vectype1, tem)); |
5338 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); |
5339 | } |
5340 | |
5341 | new_temp = gimple_build (seq, code, vectype1, dst1, dst2); |
5342 | } |
5343 | |
5344 | return new_temp; |
5345 | } |
5346 | |
5347 | /* Function vect_create_epilog_for_reduction |
5348 | |
5349 | Create code at the loop-epilog to finalize the result of a reduction |
5350 | computation. |
5351 | |
5352 | STMT_INFO is the scalar reduction stmt that is being vectorized. |
5353 | SLP_NODE is an SLP node containing a group of reduction statements. The |
5354 | first one in this group is STMT_INFO. |
5355 | SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE |
5356 | REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi |
5357 | (counting from 0) |
5358 | |
5359 | This function: |
5360 | 1. Completes the reduction def-use cycles. |
5361 | 2. "Reduces" each vector of partial results VECT_DEFS into a single result, |
5362 | by calling the function specified by REDUC_FN if available, or by |
5363 | other means (whole-vector shifts or a scalar loop). |
5364 | The function also creates a new phi node at the loop exit to preserve |
5365 | loop-closed form, as illustrated below. |
5366 | |
5367 | The flow at the entry to this function: |
5368 | |
5369 | loop: |
5370 | vec_def = phi <vec_init, null> # REDUCTION_PHI |
5371 | VECT_DEF = vector_stmt # vectorized form of STMT_INFO |
5372 | s_loop = scalar_stmt # (scalar) STMT_INFO |
5373 | loop_exit: |
5374 | s_out0 = phi <s_loop> # (scalar) EXIT_PHI |
5375 | use <s_out0> |
5376 | use <s_out0> |
5377 | |
5378 | The above is transformed by this function into: |
5379 | |
5380 | loop: |
5381 | vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI |
5382 | VECT_DEF = vector_stmt # vectorized form of STMT_INFO |
5383 | s_loop = scalar_stmt # (scalar) STMT_INFO |
5384 | loop_exit: |
5385 | s_out0 = phi <s_loop> # (scalar) EXIT_PHI |
5386 | v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI |
5387 | v_out2 = reduce <v_out1> |
5388 | s_out3 = extract_field <v_out2, 0> |
5389 | s_out4 = adjust_result <s_out3> |
5390 | use <s_out4> |
5391 | use <s_out4> |
5392 | */ |
5393 | |
5394 | static void |
5395 | vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, |
5396 | stmt_vec_info stmt_info, |
5397 | slp_tree slp_node, |
5398 | slp_instance slp_node_instance) |
5399 | { |
5400 | stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); |
5401 | gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5401, __FUNCTION__), 0 : 0)); |
5402 | /* For double reductions we need to get at the inner loop reduction |
5403 | stmt which has the meta info attached. Our stmt_info is that of the |
5404 | loop-closed PHI of the inner loop which we remember as |
5405 | def for the reduction PHI generation. */ |
5406 | bool double_reduc = false; |
5407 | stmt_vec_info rdef_info = stmt_info; |
5408 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) |
5409 | { |
5410 | gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5410, __FUNCTION__), 0 : 0)); |
5411 | double_reduc = true; |
5412 | stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def |
5413 | (stmt_info->stmt, 0)); |
5414 | stmt_info = vect_stmt_to_vectorize (stmt_info); |
5415 | } |
5416 | gphi *reduc_def_stmt |
5417 | = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))(vect_orig_stmt (stmt_info))->reduc_def->stmt); |
5418 | code_helper code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code; |
5419 | internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info)(reduc_info)->reduc_fn; |
5420 | tree vectype; |
5421 | machine_mode mode; |
5422 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, *outer_loop = NULLnullptr; |
5423 | basic_block exit_bb; |
5424 | tree scalar_dest; |
5425 | tree scalar_type; |
5426 | gimple *new_phi = NULLnullptr, *phi; |
5427 | gimple_stmt_iterator exit_gsi; |
5428 | tree new_temp = NULL_TREE(tree) nullptr, new_name, new_scalar_dest; |
5429 | gimple *epilog_stmt = NULLnullptr; |
5430 | gimple *exit_phi; |
5431 | tree bitsize; |
5432 | tree def; |
5433 | tree orig_name, scalar_result; |
5434 | imm_use_iterator imm_iter, phi_imm_iter; |
5435 | use_operand_p use_p, phi_use_p; |
5436 | gimple *use_stmt; |
5437 | auto_vec<tree> reduc_inputs; |
5438 | int j, i; |
5439 | vec<tree> &scalar_results = reduc_info->reduc_scalar_results; |
5440 | unsigned int group_size = 1, k; |
5441 | auto_vec<gimple *> phis; |
5442 | /* SLP reduction without reduction chain, e.g., |
5443 | # a1 = phi <a2, a0> |
5444 | # b1 = phi <b2, b0> |
5445 | a2 = operation (a1) |
5446 | b2 = operation (b1) */ |
5447 | bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5447, __FUNCTION__), 0 : 0)), (stmt_info)->first_element )); |
5448 | bool direct_slp_reduc; |
5449 | tree induction_index = NULL_TREE(tree) nullptr; |
5450 | |
5451 | if (slp_node) |
5452 | group_size = SLP_TREE_LANES (slp_node)(slp_node)->lanes; |
5453 | |
5454 | if (nested_in_vect_loop_p (loop, stmt_info)) |
5455 | { |
5456 | outer_loop = loop; |
5457 | loop = loop->inner; |
5458 | gcc_assert (!slp_node && double_reduc)((void)(!(!slp_node && double_reduc) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5458, __FUNCTION__), 0 : 0)); |
5459 | } |
5460 | |
5461 | vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info)(reduc_info)->reduc_vectype; |
5462 | gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5462, __FUNCTION__), 0 : 0)); |
5463 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5463, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); |
5464 | |
5465 | tree induc_val = NULL_TREE(tree) nullptr; |
5466 | tree adjustment_def = NULLnullptr; |
5467 | if (slp_node) |
5468 | ; |
5469 | else |
5470 | { |
5471 | /* Optimize: for induction condition reduction, if we can't use zero |
5472 | for induc_val, use initial_def. */ |
5473 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION) |
5474 | induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)(reduc_info)->induc_cond_initial_val; |
5475 | else if (double_reduc) |
5476 | ; |
5477 | else |
5478 | adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment; |
5479 | } |
5480 | |
5481 | stmt_vec_info single_live_out_stmt[] = { stmt_info }; |
5482 | array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt; |
5483 | if (slp_reduc) |
5484 | /* All statements produce live-out values. */ |
5485 | live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts; |
5486 | else if (slp_node) |
5487 | { |
5488 | /* The last statement in the reduction chain produces the live-out |
5489 | value. Note SLP optimization can shuffle scalar stmts to |
5490 | optimize permutations so we have to search for the last stmt. */ |
5491 | for (k = 0; k < group_size; ++k) |
5492 | if (!REDUC_GROUP_NEXT_ELEMENT (SLP_TREE_SCALAR_STMTS (slp_node)[k])(((void)(!(!((slp_node)->stmts[k])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5492, __FUNCTION__), 0 : 0)), ((slp_node)->stmts[k])-> next_element)) |
5493 | { |
5494 | single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts[k]; |
5495 | break; |
5496 | } |
5497 | } |
5498 | |
5499 | unsigned vec_num; |
5500 | int ncopies; |
5501 | if (slp_node) |
5502 | { |
5503 | vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)(slp_node_instance->reduc_phis)->vec_stmts.length (); |
5504 | ncopies = 1; |
5505 | } |
5506 | else |
5507 | { |
5508 | stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt); |
5509 | vec_num = 1; |
5510 | ncopies = STMT_VINFO_VEC_STMTS (reduc_info)(reduc_info)->vec_stmts.length (); |
5511 | } |
5512 | |
5513 | /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) |
5514 | which is updated with the current index of the loop for every match of |
5515 | the original loop's cond_expr (VEC_STMT). This results in a vector |
5516 | containing the last time the condition passed for that vector lane. |
5517 | The first match will be a 1 to allow 0 to be used for non-matching |
5518 | indexes. If there are no matches at all then the vector will be all |
5519 | zeroes. |
5520 | |
5521 | PR92772: This algorithm is broken for architectures that support |
5522 | masked vectors, but do not provide fold_extract_last. */ |
5523 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION) |
5524 | { |
5525 | auto_vec<std::pair<tree, bool>, 2> ccompares; |
5526 | stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info)(reduc_info)->reduc_def; |
5527 | cond_info = vect_stmt_to_vectorize (cond_info); |
5528 | while (cond_info != reduc_info) |
5529 | { |
5530 | if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) |
5531 | { |
5532 | gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)(cond_info)->vec_stmts[0]; |
5533 | gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)((void)(!(gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5533, __FUNCTION__), 0 : 0)); |
5534 | ccompares.safe_push |
5535 | (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)), |
5536 | STMT_VINFO_REDUC_IDX (cond_info)(cond_info)->reduc_idx == 2)); |
5537 | } |
5538 | cond_info |
5539 | = loop_vinfo->lookup_def (gimple_op (cond_info->stmt, |
5540 | 1 + STMT_VINFO_REDUC_IDX(cond_info)->reduc_idx |
5541 | (cond_info)(cond_info)->reduc_idx)); |
5542 | cond_info = vect_stmt_to_vectorize (cond_info); |
5543 | } |
5544 | gcc_assert (ccompares.length () != 0)((void)(!(ccompares.length () != 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5544, __FUNCTION__), 0 : 0)); |
5545 | |
5546 | tree indx_before_incr, indx_after_incr; |
5547 | poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); |
5548 | int scalar_precision |
5549 | = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))(as_a <scalar_mode> ((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5549, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5549, __FUNCTION__))->type_common.mode))); |
5550 | tree cr_index_scalar_type = make_unsigned_type (scalar_precision); |
5551 | tree cr_index_vector_type = get_related_vectype_for_scalar_type |
5552 | (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5552, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), cr_index_scalar_type, |
5553 | TYPE_VECTOR_SUBPARTS (vectype)); |
5554 | |
5555 | /* First we create a simple vector induction variable which starts |
5556 | with the values {1,2,3,...} (SERIES_VECT) and increments by the |
5557 | vector size (STEP). */ |
5558 | |
5559 | /* Create a {1,2,3,...} vector. */ |
5560 | tree series_vect = build_index_vector (cr_index_vector_type, 1, 1); |
5561 | |
5562 | /* Create a vector of the step value. */ |
5563 | tree step = build_int_cst (cr_index_scalar_type, nunits_out); |
5564 | tree vec_step = build_vector_from_val (cr_index_vector_type, step); |
5565 | |
5566 | /* Create an induction variable. */ |
5567 | gimple_stmt_iterator incr_gsi; |
5568 | bool insert_after; |
5569 | standard_iv_increment_position (loop, &incr_gsi, &insert_after); |
5570 | create_iv (series_vect, vec_step, NULL_TREE(tree) nullptr, loop, &incr_gsi, |
5571 | insert_after, &indx_before_incr, &indx_after_incr); |
5572 | |
5573 | /* Next create a new phi node vector (NEW_PHI_TREE) which starts |
5574 | filled with zeros (VEC_ZERO). */ |
5575 | |
5576 | /* Create a vector of 0s. */ |
5577 | tree zero = build_zero_cst (cr_index_scalar_type); |
5578 | tree vec_zero = build_vector_from_val (cr_index_vector_type, zero); |
5579 | |
5580 | /* Create a vector phi node. */ |
5581 | tree new_phi_tree = make_ssa_name (cr_index_vector_type); |
5582 | new_phi = create_phi_node (new_phi_tree, loop->header); |
5583 | add_phi_arg (as_a <gphi *> (new_phi), vec_zero, |
5584 | loop_preheader_edge (loop), UNKNOWN_LOCATION((location_t) 0)); |
5585 | |
5586 | /* Now take the condition from the loops original cond_exprs |
5587 | and produce a new cond_exprs (INDEX_COND_EXPR) which for |
5588 | every match uses values from the induction variable |
5589 | (INDEX_BEFORE_INCR) otherwise uses values from the phi node |
5590 | (NEW_PHI_TREE). |
5591 | Finally, we update the phi (NEW_PHI_TREE) to take the value of |
5592 | the new cond_expr (INDEX_COND_EXPR). */ |
5593 | gimple_seq stmts = NULLnullptr; |
5594 | for (int i = ccompares.length () - 1; i != -1; --i) |
5595 | { |
5596 | tree ccompare = ccompares[i].first; |
5597 | if (ccompares[i].second) |
5598 | new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, |
5599 | cr_index_vector_type, |
5600 | ccompare, |
5601 | indx_before_incr, new_phi_tree); |
5602 | else |
5603 | new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, |
5604 | cr_index_vector_type, |
5605 | ccompare, |
5606 | new_phi_tree, indx_before_incr); |
5607 | } |
5608 | gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT); |
5609 | |
5610 | /* Update the phi with the vec cond. */ |
5611 | induction_index = new_phi_tree; |
5612 | add_phi_arg (as_a <gphi *> (new_phi), induction_index, |
5613 | loop_latch_edge (loop), UNKNOWN_LOCATION((location_t) 0)); |
5614 | } |
5615 | |
5616 | /* 2. Create epilog code. |
5617 | The reduction epilog code operates across the elements of the vector |
5618 | of partial results computed by the vectorized loop. |
5619 | The reduction epilog code consists of: |
5620 | |
5621 | step 1: compute the scalar result in a vector (v_out2) |
5622 | step 2: extract the scalar result (s_out3) from the vector (v_out2) |
5623 | step 3: adjust the scalar result (s_out3) if needed. |
5624 | |
5625 | Step 1 can be accomplished using one the following three schemes: |
5626 | (scheme 1) using reduc_fn, if available. |
5627 | (scheme 2) using whole-vector shifts, if available. |
5628 | (scheme 3) using a scalar loop. In this case steps 1+2 above are |
5629 | combined. |
5630 | |
5631 | The overall epilog code looks like this: |
5632 | |
5633 | s_out0 = phi <s_loop> # original EXIT_PHI |
5634 | v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI |
5635 | v_out2 = reduce <v_out1> # step 1 |
5636 | s_out3 = extract_field <v_out2, 0> # step 2 |
5637 | s_out4 = adjust_result <s_out3> # step 3 |
5638 | |
5639 | (step 3 is optional, and steps 1 and 2 may be combined). |
5640 | Lastly, the uses of s_out0 are replaced by s_out4. */ |
5641 | |
5642 | |
5643 | /* 2.1 Create new loop-exit-phis to preserve loop-closed form: |
5644 | v_out1 = phi <VECT_DEF> |
5645 | Store them in NEW_PHIS. */ |
5646 | if (double_reduc) |
5647 | loop = outer_loop; |
5648 | exit_bb = single_exit (loop)->dest; |
5649 | exit_gsi = gsi_after_labels (exit_bb); |
5650 | reduc_inputs.create (slp_node ? vec_num : ncopies); |
5651 | for (unsigned i = 0; i < vec_num; i++) |
5652 | { |
5653 | gimple_seq stmts = NULLnullptr; |
5654 | if (slp_node) |
5655 | def = vect_get_slp_vect_def (slp_node, i); |
5656 | else |
5657 | def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[0]); |
5658 | for (j = 0; j < ncopies; j++) |
5659 | { |
5660 | tree new_def = copy_ssa_name (def); |
5661 | phi = create_phi_node (new_def, exit_bb); |
5662 | if (j) |
5663 | def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[j]); |
5664 | SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def)set_ssa_use_from_ptr (gimple_phi_arg_imm_use_ptr (((phi)), (( single_exit (loop)->dest_idx))), (def)); |
5665 | new_def = gimple_convert (&stmts, vectype, new_def); |
5666 | reduc_inputs.quick_push (new_def); |
5667 | } |
5668 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
5669 | } |
5670 | |
5671 | /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 |
5672 | (i.e. when reduc_fn is not available) and in the final adjustment |
5673 | code (if needed). Also get the original scalar reduction variable as |
5674 | defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it |
5675 | represents a reduction pattern), the tree-code and scalar-def are |
5676 | taken from the original stmt that the pattern-stmt (STMT) replaces. |
5677 | Otherwise (it is a regular reduction) - the tree-code and scalar-def |
5678 | are taken from STMT. */ |
5679 | |
5680 | stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); |
5681 | if (orig_stmt_info != stmt_info) |
5682 | { |
5683 | /* Reduction pattern */ |
5684 | gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info))((void)(!((orig_stmt_info)->in_pattern_p) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5684, __FUNCTION__), 0 : 0)); |
5685 | gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info)((void)(!((orig_stmt_info)->related_stmt == stmt_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5685, __FUNCTION__), 0 : 0)); |
5686 | } |
5687 | |
5688 | scalar_dest = gimple_get_lhs (orig_stmt_info->stmt); |
5689 | scalar_type = TREE_TYPE (scalar_dest)((contains_struct_check ((scalar_dest), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5689, __FUNCTION__))->typed.type); |
5690 | scalar_results.truncate (0); |
5691 | scalar_results.reserve_exact (group_size); |
5692 | new_scalar_dest = vect_create_destination_var (scalar_dest, NULLnullptr); |
5693 | bitsize = TYPE_SIZE (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5693, __FUNCTION__))->type_common.size); |
5694 | |
5695 | /* True if we should implement SLP_REDUC using native reduction operations |
5696 | instead of scalar operations. */ |
5697 | direct_slp_reduc = (reduc_fn != IFN_LAST |
5698 | && slp_reduc |
5699 | && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ()); |
5700 | |
5701 | /* In case of reduction chain, e.g., |
5702 | # a1 = phi <a3, a0> |
5703 | a2 = operation (a1) |
5704 | a3 = operation (a2), |
5705 | |
5706 | we may end up with more than one vector result. Here we reduce them |
5707 | to one vector. |
5708 | |
5709 | The same is true for a SLP reduction, e.g., |
5710 | # a1 = phi <a2, a0> |
5711 | # b1 = phi <b2, b0> |
5712 | a2 = operation (a1) |
5713 | b2 = operation (a2), |
5714 | |
5715 | where we can end up with more than one vector as well. We can |
5716 | easily accumulate vectors when the number of vector elements is |
5717 | a multiple of the SLP group size. |
5718 | |
5719 | The same is true if we couldn't use a single defuse cycle. */ |
5720 | if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5720, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) |
5721 | || direct_slp_reduc |
5722 | || (slp_reduc |
5723 | && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size)) |
5724 | || ncopies > 1) |
5725 | { |
5726 | gimple_seq stmts = NULLnullptr; |
5727 | tree single_input = reduc_inputs[0]; |
5728 | for (k = 1; k < reduc_inputs.length (); k++) |
5729 | single_input = gimple_build (&stmts, code, vectype, |
5730 | single_input, reduc_inputs[k]); |
5731 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
5732 | |
5733 | reduc_inputs.truncate (0); |
5734 | reduc_inputs.safe_push (single_input); |
5735 | } |
5736 | |
5737 | tree orig_reduc_input = reduc_inputs[0]; |
5738 | |
5739 | /* If this loop is an epilogue loop that can be skipped after the |
5740 |