File: | build/gcc/tree-vect-loop.cc |
Warning: | line 3149, column 8 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Loop Vectorization | |||
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. | |||
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> and | |||
4 | Ira Rosen <irar@il.ibm.com> | |||
5 | ||||
6 | This file is part of GCC. | |||
7 | ||||
8 | GCC is free software; you can redistribute it and/or modify it under | |||
9 | the terms of the GNU General Public License as published by the Free | |||
10 | Software Foundation; either version 3, or (at your option) any later | |||
11 | version. | |||
12 | ||||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |||
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
16 | for more details. | |||
17 | ||||
18 | You should have received a copy of the GNU General Public License | |||
19 | along with GCC; see the file COPYING3. If not see | |||
20 | <http://www.gnu.org/licenses/>. */ | |||
21 | ||||
22 | #define INCLUDE_ALGORITHM | |||
23 | #include "config.h" | |||
24 | #include "system.h" | |||
25 | #include "coretypes.h" | |||
26 | #include "backend.h" | |||
27 | #include "target.h" | |||
28 | #include "rtl.h" | |||
29 | #include "tree.h" | |||
30 | #include "gimple.h" | |||
31 | #include "cfghooks.h" | |||
32 | #include "tree-pass.h" | |||
33 | #include "ssa.h" | |||
34 | #include "optabs-tree.h" | |||
35 | #include "diagnostic-core.h" | |||
36 | #include "fold-const.h" | |||
37 | #include "stor-layout.h" | |||
38 | #include "cfganal.h" | |||
39 | #include "gimplify.h" | |||
40 | #include "gimple-iterator.h" | |||
41 | #include "gimplify-me.h" | |||
42 | #include "tree-ssa-loop-ivopts.h" | |||
43 | #include "tree-ssa-loop-manip.h" | |||
44 | #include "tree-ssa-loop-niter.h" | |||
45 | #include "tree-ssa-loop.h" | |||
46 | #include "cfgloop.h" | |||
47 | #include "tree-scalar-evolution.h" | |||
48 | #include "tree-vectorizer.h" | |||
49 | #include "gimple-fold.h" | |||
50 | #include "cgraph.h" | |||
51 | #include "tree-cfg.h" | |||
52 | #include "tree-if-conv.h" | |||
53 | #include "internal-fn.h" | |||
54 | #include "tree-vector-builder.h" | |||
55 | #include "vec-perm-indices.h" | |||
56 | #include "tree-eh.h" | |||
57 | #include "case-cfn-macros.h" | |||
58 | ||||
59 | /* Loop Vectorization Pass. | |||
60 | ||||
61 | This pass tries to vectorize loops. | |||
62 | ||||
63 | For example, the vectorizer transforms the following simple loop: | |||
64 | ||||
65 | short a[N]; short b[N]; short c[N]; int i; | |||
66 | ||||
67 | for (i=0; i<N; i++){ | |||
68 | a[i] = b[i] + c[i]; | |||
69 | } | |||
70 | ||||
71 | as if it was manually vectorized by rewriting the source code into: | |||
72 | ||||
73 | typedef int __attribute__((mode(V8HI))) v8hi; | |||
74 | short a[N]; short b[N]; short c[N]; int i; | |||
75 | v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; | |||
76 | v8hi va, vb, vc; | |||
77 | ||||
78 | for (i=0; i<N/8; i++){ | |||
79 | vb = pb[i]; | |||
80 | vc = pc[i]; | |||
81 | va = vb + vc; | |||
82 | pa[i] = va; | |||
83 | } | |||
84 | ||||
85 | The main entry to this pass is vectorize_loops(), in which | |||
86 | the vectorizer applies a set of analyses on a given set of loops, | |||
87 | followed by the actual vectorization transformation for the loops that | |||
88 | had successfully passed the analysis phase. | |||
89 | Throughout this pass we make a distinction between two types of | |||
90 | data: scalars (which are represented by SSA_NAMES), and memory references | |||
91 | ("data-refs"). These two types of data require different handling both | |||
92 | during analysis and transformation. The types of data-refs that the | |||
93 | vectorizer currently supports are ARRAY_REFS which base is an array DECL | |||
94 | (not a pointer), and INDIRECT_REFS through pointers; both array and pointer | |||
95 | accesses are required to have a simple (consecutive) access pattern. | |||
96 | ||||
97 | Analysis phase: | |||
98 | =============== | |||
99 | The driver for the analysis phase is vect_analyze_loop(). | |||
100 | It applies a set of analyses, some of which rely on the scalar evolution | |||
101 | analyzer (scev) developed by Sebastian Pop. | |||
102 | ||||
103 | During the analysis phase the vectorizer records some information | |||
104 | per stmt in a "stmt_vec_info" struct which is attached to each stmt in the | |||
105 | loop, as well as general information about the loop as a whole, which is | |||
106 | recorded in a "loop_vec_info" struct attached to each loop. | |||
107 | ||||
108 | Transformation phase: | |||
109 | ===================== | |||
110 | The loop transformation phase scans all the stmts in the loop, and | |||
111 | creates a vector stmt (or a sequence of stmts) for each scalar stmt S in | |||
112 | the loop that needs to be vectorized. It inserts the vector code sequence | |||
113 | just before the scalar stmt S, and records a pointer to the vector code | |||
114 | in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct | |||
115 | attached to S). This pointer will be used for the vectorization of following | |||
116 | stmts which use the def of stmt S. Stmt S is removed if it writes to memory; | |||
117 | otherwise, we rely on dead code elimination for removing it. | |||
118 | ||||
119 | For example, say stmt S1 was vectorized into stmt VS1: | |||
120 | ||||
121 | VS1: vb = px[i]; | |||
122 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |||
123 | S2: a = b; | |||
124 | ||||
125 | To vectorize stmt S2, the vectorizer first finds the stmt that defines | |||
126 | the operand 'b' (S1), and gets the relevant vector def 'vb' from the | |||
127 | vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The | |||
128 | resulting sequence would be: | |||
129 | ||||
130 | VS1: vb = px[i]; | |||
131 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |||
132 | VS2: va = vb; | |||
133 | S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 | |||
134 | ||||
135 | Operands that are not SSA_NAMEs, are data-refs that appear in | |||
136 | load/store operations (like 'x[i]' in S1), and are handled differently. | |||
137 | ||||
138 | Target modeling: | |||
139 | ================= | |||
140 | Currently the only target specific information that is used is the | |||
141 | size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". | |||
142 | Targets that can support different sizes of vectors, for now will need | |||
143 | to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More | |||
144 | flexibility will be added in the future. | |||
145 | ||||
146 | Since we only vectorize operations which vector form can be | |||
147 | expressed using existing tree codes, to verify that an operation is | |||
148 | supported, the vectorizer checks the relevant optab at the relevant | |||
149 | machine_mode (e.g, optab_handler (add_optab, V8HImode)). If | |||
150 | the value found is CODE_FOR_nothing, then there's no target support, and | |||
151 | we can't vectorize the stmt. | |||
152 | ||||
153 | For additional information on this project see: | |||
154 | http://gcc.gnu.org/projects/tree-ssa/vectorization.html | |||
155 | */ | |||
156 | ||||
157 | static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *, | |||
158 | unsigned *); | |||
159 | static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, | |||
160 | bool *, bool *, bool); | |||
161 | ||||
162 | /* Subroutine of vect_determine_vf_for_stmt that handles only one | |||
163 | statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE | |||
164 | may already be set for general statements (not just data refs). */ | |||
165 | ||||
166 | static opt_result | |||
167 | vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, | |||
168 | bool vectype_maybe_set_p, | |||
169 | poly_uint64 *vf) | |||
170 | { | |||
171 | gimple *stmt = stmt_info->stmt; | |||
172 | ||||
173 | if ((!STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) | |||
174 | && !STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) | |||
175 | || gimple_clobber_p (stmt)) | |||
176 | { | |||
177 | if (dump_enabled_p ()) | |||
178 | dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); | |||
179 | return opt_result::success (); | |||
180 | } | |||
181 | ||||
182 | tree stmt_vectype, nunits_vectype; | |||
183 | opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info, | |||
184 | &stmt_vectype, | |||
185 | &nunits_vectype); | |||
186 | if (!res) | |||
187 | return res; | |||
188 | ||||
189 | if (stmt_vectype) | |||
190 | { | |||
191 | if (STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype) | |||
192 | /* The only case when a vectype had been already set is for stmts | |||
193 | that contain a data ref, or for "pattern-stmts" (stmts generated | |||
194 | by the vectorizer to represent/replace a certain idiom). */ | |||
195 | gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 197, __FUNCTION__), 0 : 0)) | |||
196 | || vectype_maybe_set_p)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 197, __FUNCTION__), 0 : 0)) | |||
197 | && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 197, __FUNCTION__), 0 : 0)); | |||
198 | else | |||
199 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = stmt_vectype; | |||
200 | } | |||
201 | ||||
202 | if (nunits_vectype) | |||
203 | vect_update_max_nunits (vf, nunits_vectype); | |||
204 | ||||
205 | return opt_result::success (); | |||
206 | } | |||
207 | ||||
208 | /* Subroutine of vect_determine_vectorization_factor. Set the vector | |||
209 | types of STMT_INFO and all attached pattern statements and update | |||
210 | the vectorization factor VF accordingly. Return true on success | |||
211 | or false if something prevented vectorization. */ | |||
212 | ||||
213 | static opt_result | |||
214 | vect_determine_vf_for_stmt (vec_info *vinfo, | |||
215 | stmt_vec_info stmt_info, poly_uint64 *vf) | |||
216 | { | |||
217 | if (dump_enabled_p ()) | |||
218 | dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", | |||
219 | stmt_info->stmt); | |||
220 | opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf); | |||
221 | if (!res) | |||
222 | return res; | |||
223 | ||||
224 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p | |||
225 | && STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt) | |||
226 | { | |||
227 | gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq; | |||
228 | stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | |||
229 | ||||
230 | /* If a pattern statement has def stmts, analyze them too. */ | |||
231 | for (gimple_stmt_iterator si = gsi_start (pattern_def_seq); | |||
232 | !gsi_end_p (si); gsi_next (&si)) | |||
233 | { | |||
234 | stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); | |||
235 | if (dump_enabled_p ()) | |||
236 | dump_printf_loc (MSG_NOTE, vect_location, | |||
237 | "==> examining pattern def stmt: %G", | |||
238 | def_stmt_info->stmt); | |||
239 | res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf); | |||
240 | if (!res) | |||
241 | return res; | |||
242 | } | |||
243 | ||||
244 | if (dump_enabled_p ()) | |||
245 | dump_printf_loc (MSG_NOTE, vect_location, | |||
246 | "==> examining pattern statement: %G", | |||
247 | stmt_info->stmt); | |||
248 | res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf); | |||
249 | if (!res) | |||
250 | return res; | |||
251 | } | |||
252 | ||||
253 | return opt_result::success (); | |||
254 | } | |||
255 | ||||
256 | /* Function vect_determine_vectorization_factor | |||
257 | ||||
258 | Determine the vectorization factor (VF). VF is the number of data elements | |||
259 | that are operated upon in parallel in a single iteration of the vectorized | |||
260 | loop. For example, when vectorizing a loop that operates on 4byte elements, | |||
261 | on a target with vector size (VS) 16byte, the VF is set to 4, since 4 | |||
262 | elements can fit in a single vector register. | |||
263 | ||||
264 | We currently support vectorization of loops in which all types operated upon | |||
265 | are of the same size. Therefore this function currently sets VF according to | |||
266 | the size of the types operated upon, and fails if there are multiple sizes | |||
267 | in the loop. | |||
268 | ||||
269 | VF is also the factor by which the loop iterations are strip-mined, e.g.: | |||
270 | original loop: | |||
271 | for (i=0; i<N; i++){ | |||
272 | a[i] = b[i] + c[i]; | |||
273 | } | |||
274 | ||||
275 | vectorized loop: | |||
276 | for (i=0; i<N; i+=VF){ | |||
277 | a[i:VF] = b[i:VF] + c[i:VF]; | |||
278 | } | |||
279 | */ | |||
280 | ||||
281 | static opt_result | |||
282 | vect_determine_vectorization_factor (loop_vec_info loop_vinfo) | |||
283 | { | |||
284 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
285 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; | |||
286 | unsigned nbbs = loop->num_nodes; | |||
287 | poly_uint64 vectorization_factor = 1; | |||
288 | tree scalar_type = NULL_TREE(tree) nullptr; | |||
289 | gphi *phi; | |||
290 | tree vectype; | |||
291 | stmt_vec_info stmt_info; | |||
292 | unsigned i; | |||
293 | ||||
294 | DUMP_VECT_SCOPE ("vect_determine_vectorization_factor")auto_dump_scope scope ("vect_determine_vectorization_factor", vect_location); | |||
295 | ||||
296 | for (i = 0; i < nbbs; i++) | |||
297 | { | |||
298 | basic_block bb = bbs[i]; | |||
299 | ||||
300 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); | |||
301 | gsi_next (&si)) | |||
302 | { | |||
303 | phi = si.phi (); | |||
304 | stmt_info = loop_vinfo->lookup_stmt (phi); | |||
305 | if (dump_enabled_p ()) | |||
306 | dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G", | |||
307 | (gimple *) phi); | |||
308 | ||||
309 | gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 309, __FUNCTION__), 0 : 0)); | |||
310 | ||||
311 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) | |||
312 | || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) | |||
313 | { | |||
314 | gcc_assert (!STMT_VINFO_VECTYPE (stmt_info))((void)(!(!(stmt_info)->vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 314, __FUNCTION__), 0 : 0)); | |||
315 | scalar_type = TREE_TYPE (PHI_RESULT (phi))((contains_struct_check ((get_def_from_ptr (gimple_phi_result_ptr (phi))), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 315, __FUNCTION__))->typed.type); | |||
316 | ||||
317 | if (dump_enabled_p ()) | |||
318 | dump_printf_loc (MSG_NOTE, vect_location, | |||
319 | "get vectype for scalar type: %T\n", | |||
320 | scalar_type); | |||
321 | ||||
322 | vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); | |||
323 | if (!vectype) | |||
324 | return opt_result::failure_at (phi, | |||
325 | "not vectorized: unsupported " | |||
326 | "data-type %T\n", | |||
327 | scalar_type); | |||
328 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = vectype; | |||
329 | ||||
330 | if (dump_enabled_p ()) | |||
331 | dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", | |||
332 | vectype); | |||
333 | ||||
334 | if (dump_enabled_p ()) | |||
335 | { | |||
336 | dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); | |||
337 | dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype)); | |||
338 | dump_printf (MSG_NOTE, "\n"); | |||
339 | } | |||
340 | ||||
341 | vect_update_max_nunits (&vectorization_factor, vectype); | |||
342 | } | |||
343 | } | |||
344 | ||||
345 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | |||
346 | gsi_next (&si)) | |||
347 | { | |||
348 | if (is_gimple_debug (gsi_stmt (si))) | |||
349 | continue; | |||
350 | stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); | |||
351 | opt_result res | |||
352 | = vect_determine_vf_for_stmt (loop_vinfo, | |||
353 | stmt_info, &vectorization_factor); | |||
354 | if (!res) | |||
355 | return res; | |||
356 | } | |||
357 | } | |||
358 | ||||
359 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ | |||
360 | if (dump_enabled_p ()) | |||
361 | { | |||
362 | dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); | |||
363 | dump_dec (MSG_NOTE, vectorization_factor); | |||
364 | dump_printf (MSG_NOTE, "\n"); | |||
365 | } | |||
366 | ||||
367 | if (known_le (vectorization_factor, 1U)(!maybe_lt (1U, vectorization_factor))) | |||
368 | return opt_result::failure_at (vect_location, | |||
369 | "not vectorized: unsupported data-type\n"); | |||
370 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor; | |||
371 | return opt_result::success (); | |||
372 | } | |||
373 | ||||
374 | ||||
375 | /* Function vect_is_simple_iv_evolution. | |||
376 | ||||
377 | FORNOW: A simple evolution of an induction variables in the loop is | |||
378 | considered a polynomial evolution. */ | |||
379 | ||||
380 | static bool | |||
381 | vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, | |||
382 | tree * step) | |||
383 | { | |||
384 | tree init_expr; | |||
385 | tree step_expr; | |||
386 | tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); | |||
387 | basic_block bb; | |||
388 | ||||
389 | /* When there is no evolution in this loop, the evolution function | |||
390 | is not "simple". */ | |||
391 | if (evolution_part == NULL_TREE(tree) nullptr) | |||
392 | return false; | |||
393 | ||||
394 | /* When the evolution is a polynomial of degree >= 2 | |||
395 | the evolution function is not "simple". */ | |||
396 | if (tree_is_chrec (evolution_part)) | |||
397 | return false; | |||
398 | ||||
399 | step_expr = evolution_part; | |||
400 | init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); | |||
401 | ||||
402 | if (dump_enabled_p ()) | |||
403 | dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n", | |||
404 | step_expr, init_expr); | |||
405 | ||||
406 | *init = init_expr; | |||
407 | *step = step_expr; | |||
408 | ||||
409 | if (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != INTEGER_CST | |||
410 | && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != SSA_NAME | |||
411 | || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)(tree_check ((step_expr), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 411, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt)) | |||
412 | && flow_bb_inside_loop_p (get_loop (cfun(cfun + 0), loop_nb), bb)) | |||
413 | || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 413, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE || ((enum tree_code) (((contains_struct_check ((step_expr), ( TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 413, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE || ((enum tree_code) (((contains_struct_check ((step_expr), ( TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 413, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE ) | |||
414 | && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 414, __FUNCTION__))->typed.type))->base.code) == REAL_TYPE ) | |||
415 | || !flag_associative_mathglobal_options.x_flag_associative_math))) | |||
416 | && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != REAL_CST | |||
417 | || !flag_associative_mathglobal_options.x_flag_associative_math)) | |||
418 | { | |||
419 | if (dump_enabled_p ()) | |||
420 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
421 | "step unknown.\n"); | |||
422 | return false; | |||
423 | } | |||
424 | ||||
425 | return true; | |||
426 | } | |||
427 | ||||
428 | /* Function vect_is_nonlinear_iv_evolution | |||
429 | ||||
430 | Only support nonlinear induction for integer type | |||
431 | 1. neg | |||
432 | 2. mul by constant | |||
433 | 3. lshift/rshift by constant. | |||
434 | ||||
435 | For neg induction, return a fake step as integer -1. */ | |||
436 | static bool | |||
437 | vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info, | |||
438 | gphi* loop_phi_node, tree *init, tree *step) | |||
439 | { | |||
440 | tree init_expr, ev_expr, result, op1, op2; | |||
441 | gimple* def; | |||
442 | ||||
443 | if (gimple_phi_num_args (loop_phi_node) != 2) | |||
444 | return false; | |||
445 | ||||
446 | init_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_preheader_edge (loop))gimple_phi_arg_def (((loop_phi_node)), ((loop_preheader_edge ( loop))->dest_idx)); | |||
447 | ev_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_latch_edge (loop))gimple_phi_arg_def (((loop_phi_node)), ((loop_latch_edge (loop ))->dest_idx)); | |||
448 | ||||
449 | /* Support nonlinear induction only for integer type. */ | |||
450 | if (!INTEGRAL_TYPE_P (TREE_TYPE (init_expr))(((enum tree_code) (((contains_struct_check ((init_expr), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 450, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE || ((enum tree_code) (((contains_struct_check ((init_expr), ( TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 450, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE || ((enum tree_code) (((contains_struct_check ((init_expr), ( TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 450, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE )) | |||
451 | return false; | |||
452 | ||||
453 | *init = init_expr; | |||
454 | result = PHI_RESULT (loop_phi_node)get_def_from_ptr (gimple_phi_result_ptr (loop_phi_node)); | |||
455 | ||||
456 | if (TREE_CODE (ev_expr)((enum tree_code) (ev_expr)->base.code) != SSA_NAME | |||
457 | || ((def = SSA_NAME_DEF_STMT (ev_expr)(tree_check ((ev_expr), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 457, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt), false) | |||
458 | || !is_gimple_assign (def)) | |||
459 | return false; | |||
460 | ||||
461 | enum tree_code t_code = gimple_assign_rhs_code (def); | |||
462 | switch (t_code) | |||
463 | { | |||
464 | case NEGATE_EXPR: | |||
465 | if (gimple_assign_rhs1 (def) != result) | |||
466 | return false; | |||
467 | *step = build_int_cst (TREE_TYPE (init_expr)((contains_struct_check ((init_expr), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 467, __FUNCTION__))->typed.type), -1); | |||
468 | STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_neg; | |||
469 | break; | |||
470 | ||||
471 | case RSHIFT_EXPR: | |||
472 | case LSHIFT_EXPR: | |||
473 | case MULT_EXPR: | |||
474 | op1 = gimple_assign_rhs1 (def); | |||
475 | op2 = gimple_assign_rhs2 (def); | |||
476 | if (TREE_CODE (op2)((enum tree_code) (op2)->base.code) != INTEGER_CST | |||
477 | || op1 != result) | |||
478 | return false; | |||
479 | *step = op2; | |||
480 | if (t_code == LSHIFT_EXPR) | |||
481 | STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_shl; | |||
482 | else if (t_code == RSHIFT_EXPR) | |||
483 | STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_shr; | |||
484 | /* NEGATE_EXPR and MULT_EXPR are both vect_step_op_mul. */ | |||
485 | else | |||
486 | STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_mul; | |||
487 | break; | |||
488 | ||||
489 | default: | |||
490 | return false; | |||
491 | } | |||
492 | ||||
493 | STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info)(stmt_info)->loop_phi_evolution_base_unchanged = *init; | |||
494 | STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info)(stmt_info)->loop_phi_evolution_part = *step; | |||
495 | ||||
496 | return true; | |||
497 | } | |||
498 | ||||
499 | /* Return true if PHI, described by STMT_INFO, is the inner PHI in | |||
500 | what we are assuming is a double reduction. For example, given | |||
501 | a structure like this: | |||
502 | ||||
503 | outer1: | |||
504 | x_1 = PHI <x_4(outer2), ...>; | |||
505 | ... | |||
506 | ||||
507 | inner: | |||
508 | x_2 = PHI <x_1(outer1), ...>; | |||
509 | ... | |||
510 | x_3 = ...; | |||
511 | ... | |||
512 | ||||
513 | outer2: | |||
514 | x_4 = PHI <x_3(inner)>; | |||
515 | ... | |||
516 | ||||
517 | outer loop analysis would treat x_1 as a double reduction phi and | |||
518 | this function would then return true for x_2. */ | |||
519 | ||||
520 | static bool | |||
521 | vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi) | |||
522 | { | |||
523 | use_operand_p use_p; | |||
524 | ssa_op_iter op_iter; | |||
525 | FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)for ((use_p) = op_iter_init_phiuse (&(op_iter), phi, 0x01 ); !op_iter_done (&(op_iter)); (use_p) = op_iter_next_use (&(op_iter))) | |||
526 | if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)get_use_from_ptr (use_p))) | |||
527 | if (STMT_VINFO_DEF_TYPE (def_info)(def_info)->def_type == vect_double_reduction_def) | |||
528 | return true; | |||
529 | return false; | |||
530 | } | |||
531 | ||||
532 | /* Returns true if Phi is a first-order recurrence. A first-order | |||
533 | recurrence is a non-reduction recurrence relation in which the value of | |||
534 | the recurrence in the current loop iteration equals a value defined in | |||
535 | the previous iteration. */ | |||
536 | ||||
537 | static bool | |||
538 | vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop, | |||
539 | gphi *phi) | |||
540 | { | |||
541 | /* A nested cycle isn't vectorizable as first order recurrence. */ | |||
542 | if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop) | |||
543 | return false; | |||
544 | ||||
545 | /* Ensure the loop latch definition is from within the loop. */ | |||
546 | edge latch = loop_latch_edge (loop); | |||
547 | tree ldef = PHI_ARG_DEF_FROM_EDGE (phi, latch)gimple_phi_arg_def (((phi)), ((latch)->dest_idx)); | |||
548 | if (TREE_CODE (ldef)((enum tree_code) (ldef)->base.code) != SSA_NAME | |||
549 | || SSA_NAME_IS_DEFAULT_DEF (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 549, __FUNCTION__, (SSA_NAME)))->base.default_def_flag | |||
550 | || is_a <gphi *> (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 550, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt) | |||
551 | || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 551, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt))) | |||
552 | return false; | |||
553 | ||||
554 | tree def = gimple_phi_result (phi); | |||
555 | ||||
556 | /* Ensure every use_stmt of the phi node is dominated by the latch | |||
557 | definition. */ | |||
558 | imm_use_iterator imm_iter; | |||
559 | use_operand_p use_p; | |||
560 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (def) ); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p) = next_readonly_imm_use (&(imm_iter)))) | |||
561 | if (!is_gimple_debug (USE_STMT (use_p)(use_p)->loc.stmt) | |||
562 | && (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 562, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt == USE_STMT (use_p)(use_p)->loc.stmt | |||
563 | || !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 563, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt, | |||
564 | USE_STMT (use_p)(use_p)->loc.stmt))) | |||
565 | return false; | |||
566 | ||||
567 | /* First-order recurrence autovectorization needs shuffle vector. */ | |||
568 | tree scalar_type = TREE_TYPE (def)((contains_struct_check ((def), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 568, __FUNCTION__))->typed.type); | |||
569 | tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); | |||
570 | if (!vectype) | |||
571 | return false; | |||
572 | ||||
573 | return true; | |||
574 | } | |||
575 | ||||
576 | /* Function vect_analyze_scalar_cycles_1. | |||
577 | ||||
578 | Examine the cross iteration def-use cycles of scalar variables | |||
579 | in LOOP. LOOP_VINFO represents the loop that is now being | |||
580 | considered for vectorization (can be LOOP, or an outer-loop | |||
581 | enclosing LOOP). SLP indicates there will be some subsequent | |||
582 | slp analyses or not. */ | |||
583 | ||||
584 | static void | |||
585 | vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop, | |||
586 | bool slp) | |||
587 | { | |||
588 | basic_block bb = loop->header; | |||
589 | tree init, step; | |||
590 | auto_vec<stmt_vec_info, 64> worklist; | |||
591 | gphi_iterator gsi; | |||
592 | bool double_reduc, reduc_chain; | |||
593 | ||||
594 | DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles")auto_dump_scope scope ("vect_analyze_scalar_cycles", vect_location ); | |||
595 | ||||
596 | /* First - identify all inductions. Reduction detection assumes that all the | |||
597 | inductions have been identified, therefore, this order must not be | |||
598 | changed. */ | |||
599 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |||
600 | { | |||
601 | gphi *phi = gsi.phi (); | |||
602 | tree access_fn = NULLnullptr; | |||
603 | tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); | |||
604 | stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi); | |||
605 | ||||
606 | if (dump_enabled_p ()) | |||
607 | dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", | |||
608 | (gimple *) phi); | |||
609 | ||||
610 | /* Skip virtual phi's. The data dependences that are associated with | |||
611 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |||
612 | if (virtual_operand_p (def)) | |||
613 | continue; | |||
614 | ||||
615 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_unknown_def_type; | |||
616 | ||||
617 | /* Analyze the evolution function. */ | |||
618 | access_fn = analyze_scalar_evolution (loop, def); | |||
619 | if (access_fn) | |||
620 | { | |||
621 | STRIP_NOPS (access_fn)(access_fn) = tree_strip_nop_conversions ((const_cast<union tree_node *> (((access_fn))))); | |||
622 | if (dump_enabled_p ()) | |||
623 | dump_printf_loc (MSG_NOTE, vect_location, | |||
624 | "Access function of PHI: %T\n", access_fn); | |||
625 | STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_base_unchanged | |||
626 | = initial_condition_in_loop_num (access_fn, loop->num); | |||
627 | STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_part | |||
628 | = evolution_part_in_loop_num (access_fn, loop->num); | |||
629 | } | |||
630 | ||||
631 | if ((!access_fn | |||
632 | || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi) | |||
633 | || !vect_is_simple_iv_evolution (loop->num, access_fn, | |||
634 | &init, &step) | |||
635 | || (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop | |||
636 | && TREE_CODE (step)((enum tree_code) (step)->base.code) != INTEGER_CST)) | |||
637 | /* Only handle nonlinear iv for same loop. */ | |||
638 | && (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop | |||
639 | || !vect_is_nonlinear_iv_evolution (loop, stmt_vinfo, | |||
640 | phi, &init, &step))) | |||
641 | { | |||
642 | worklist.safe_push (stmt_vinfo); | |||
643 | continue; | |||
644 | } | |||
645 | ||||
646 | gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged != (tree) nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 647, __FUNCTION__), 0 : 0)) | |||
647 | != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged != (tree) nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 647, __FUNCTION__), 0 : 0)); | |||
648 | gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_part != (tree) nullptr ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 648, __FUNCTION__), 0 : 0)); | |||
649 | ||||
650 | if (dump_enabled_p ()) | |||
651 | dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n"); | |||
652 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_induction_def; | |||
653 | } | |||
654 | ||||
655 | ||||
656 | /* Second - identify all reductions and nested cycles. */ | |||
657 | while (worklist.length () > 0) | |||
658 | { | |||
659 | stmt_vec_info stmt_vinfo = worklist.pop (); | |||
660 | gphi *phi = as_a <gphi *> (stmt_vinfo->stmt); | |||
661 | tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); | |||
662 | ||||
663 | if (dump_enabled_p ()) | |||
664 | dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", | |||
665 | (gimple *) phi); | |||
666 | ||||
667 | gcc_assert (!virtual_operand_p (def)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)-> def_type == vect_unknown_def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 668, __FUNCTION__), 0 : 0)) | |||
668 | && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)-> def_type == vect_unknown_def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 668, __FUNCTION__), 0 : 0)); | |||
669 | ||||
670 | stmt_vec_info reduc_stmt_info | |||
671 | = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc, | |||
672 | &reduc_chain, slp); | |||
673 | if (reduc_stmt_info) | |||
674 | { | |||
675 | STMT_VINFO_REDUC_DEF (stmt_vinfo)(stmt_vinfo)->reduc_def = reduc_stmt_info; | |||
676 | STMT_VINFO_REDUC_DEF (reduc_stmt_info)(reduc_stmt_info)->reduc_def = stmt_vinfo; | |||
677 | if (double_reduc) | |||
678 | { | |||
679 | if (dump_enabled_p ()) | |||
680 | dump_printf_loc (MSG_NOTE, vect_location, | |||
681 | "Detected double reduction.\n"); | |||
682 | ||||
683 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_double_reduction_def; | |||
684 | STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_double_reduction_def; | |||
685 | } | |||
686 | else | |||
687 | { | |||
688 | if (loop != LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop) | |||
689 | { | |||
690 | if (dump_enabled_p ()) | |||
691 | dump_printf_loc (MSG_NOTE, vect_location, | |||
692 | "Detected vectorizable nested cycle.\n"); | |||
693 | ||||
694 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_nested_cycle; | |||
695 | } | |||
696 | else | |||
697 | { | |||
698 | if (dump_enabled_p ()) | |||
699 | dump_printf_loc (MSG_NOTE, vect_location, | |||
700 | "Detected reduction.\n"); | |||
701 | ||||
702 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_reduction_def; | |||
703 | STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_reduction_def; | |||
704 | /* Store the reduction cycles for possible vectorization in | |||
705 | loop-aware SLP if it was not detected as reduction | |||
706 | chain. */ | |||
707 | if (! reduc_chain) | |||
708 | LOOP_VINFO_REDUCTIONS (loop_vinfo)(loop_vinfo)->reductions.safe_push | |||
709 | (reduc_stmt_info); | |||
710 | } | |||
711 | } | |||
712 | } | |||
713 | else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi)) | |||
714 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_first_order_recurrence; | |||
715 | else | |||
716 | if (dump_enabled_p ()) | |||
717 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
718 | "Unknown def-use cycle pattern.\n"); | |||
719 | } | |||
720 | } | |||
721 | ||||
722 | ||||
723 | /* Function vect_analyze_scalar_cycles. | |||
724 | ||||
725 | Examine the cross iteration def-use cycles of scalar variables, by | |||
726 | analyzing the loop-header PHIs of scalar variables. Classify each | |||
727 | cycle as one of the following: invariant, induction, reduction, unknown. | |||
728 | We do that for the loop represented by LOOP_VINFO, and also to its | |||
729 | inner-loop, if exists. | |||
730 | Examples for scalar cycles: | |||
731 | ||||
732 | Example1: reduction: | |||
733 | ||||
734 | loop1: | |||
735 | for (i=0; i<N; i++) | |||
736 | sum += a[i]; | |||
737 | ||||
738 | Example2: induction: | |||
739 | ||||
740 | loop2: | |||
741 | for (i=0; i<N; i++) | |||
742 | a[i] = i; */ | |||
743 | ||||
744 | static void | |||
745 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo, bool slp) | |||
746 | { | |||
747 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
748 | ||||
749 | vect_analyze_scalar_cycles_1 (loop_vinfo, loop, slp); | |||
750 | ||||
751 | /* When vectorizing an outer-loop, the inner-loop is executed sequentially. | |||
752 | Reductions in such inner-loop therefore have different properties than | |||
753 | the reductions in the nest that gets vectorized: | |||
754 | 1. When vectorized, they are executed in the same order as in the original | |||
755 | scalar loop, so we can't change the order of computation when | |||
756 | vectorizing them. | |||
757 | 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the | |||
758 | current checks are too strict. */ | |||
759 | ||||
760 | if (loop->inner) | |||
761 | vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner, slp); | |||
762 | } | |||
763 | ||||
764 | /* Transfer group and reduction information from STMT_INFO to its | |||
765 | pattern stmt. */ | |||
766 | ||||
767 | static void | |||
768 | vect_fixup_reduc_chain (stmt_vec_info stmt_info) | |||
769 | { | |||
770 | stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | |||
771 | stmt_vec_info stmtp; | |||
772 | gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 772, __FUNCTION__), 0 : 0)), (firstp)->first_element) && (((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 773, __FUNCTION__), 0 : 0)), (stmt_info)->first_element) ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 773, __FUNCTION__), 0 : 0)) | |||
773 | && REDUC_GROUP_FIRST_ELEMENT (stmt_info))((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 772, __FUNCTION__), 0 : 0)), (firstp)->first_element) && (((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 773, __FUNCTION__), 0 : 0)), (stmt_info)->first_element) ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 773, __FUNCTION__), 0 : 0)); | |||
774 | REDUC_GROUP_SIZE (firstp)(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 774, __FUNCTION__), 0 : 0)), (firstp)->size) = REDUC_GROUP_SIZE (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 774, __FUNCTION__), 0 : 0)), (stmt_info)->size); | |||
775 | do | |||
776 | { | |||
777 | stmtp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | |||
778 | gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)((void)(!((stmtp)->def_type == (stmt_info)->def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 779, __FUNCTION__), 0 : 0)) | |||
779 | == STMT_VINFO_DEF_TYPE (stmt_info))((void)(!((stmtp)->def_type == (stmt_info)->def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 779, __FUNCTION__), 0 : 0)); | |||
780 | REDUC_GROUP_FIRST_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 780, __FUNCTION__), 0 : 0)), (stmtp)->first_element) = firstp; | |||
781 | stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 781, __FUNCTION__), 0 : 0)), (stmt_info)->next_element); | |||
782 | if (stmt_info) | |||
783 | REDUC_GROUP_NEXT_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 783, __FUNCTION__), 0 : 0)), (stmtp)->next_element) | |||
784 | = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | |||
785 | } | |||
786 | while (stmt_info); | |||
787 | } | |||
788 | ||||
789 | /* Fixup scalar cycles that now have their stmts detected as patterns. */ | |||
790 | ||||
791 | static void | |||
792 | vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo) | |||
793 | { | |||
794 | stmt_vec_info first; | |||
795 | unsigned i; | |||
796 | ||||
797 | FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)for (i = 0; ((loop_vinfo)->reduction_chains).iterate ((i), &(first)); ++(i)) | |||
798 | { | |||
799 | stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first)(((void)(!(!(first)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 799, __FUNCTION__), 0 : 0)), (first)->next_element); | |||
800 | while (next) | |||
801 | { | |||
802 | if ((STMT_VINFO_IN_PATTERN_P (next)(next)->in_pattern_p | |||
803 | != STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p) | |||
804 | || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next))(vect_stmt_to_vectorize (next))->reduc_idx == -1) | |||
805 | break; | |||
806 | next = REDUC_GROUP_NEXT_ELEMENT (next)(((void)(!(!(next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 806, __FUNCTION__), 0 : 0)), (next)->next_element); | |||
807 | } | |||
808 | /* If all reduction chain members are well-formed patterns adjust | |||
809 | the group to group the pattern stmts instead. */ | |||
810 | if (! next | |||
811 | && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->reduc_idx != -1) | |||
812 | { | |||
813 | if (STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p) | |||
814 | { | |||
815 | vect_fixup_reduc_chain (first); | |||
816 | LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains[i] | |||
817 | = STMT_VINFO_RELATED_STMT (first)(first)->related_stmt; | |||
818 | } | |||
819 | } | |||
820 | /* If not all stmt in the chain are patterns or if we failed | |||
821 | to update STMT_VINFO_REDUC_IDX dissolve the chain and handle | |||
822 | it as regular reduction instead. */ | |||
823 | else | |||
824 | { | |||
825 | stmt_vec_info vinfo = first; | |||
826 | stmt_vec_info last = NULLnullptr; | |||
827 | while (vinfo) | |||
828 | { | |||
829 | next = REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 829, __FUNCTION__), 0 : 0)), (vinfo)->next_element); | |||
830 | REDUC_GROUP_FIRST_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 830, __FUNCTION__), 0 : 0)), (vinfo)->first_element) = NULLnullptr; | |||
831 | REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 831, __FUNCTION__), 0 : 0)), (vinfo)->next_element) = NULLnullptr; | |||
832 | last = vinfo; | |||
833 | vinfo = next; | |||
834 | } | |||
835 | STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->def_type | |||
836 | = vect_internal_def; | |||
837 | loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last)); | |||
838 | LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.unordered_remove (i); | |||
839 | --i; | |||
840 | } | |||
841 | } | |||
842 | } | |||
843 | ||||
844 | /* Function vect_get_loop_niters. | |||
845 | ||||
846 | Determine how many iterations the loop is executed and place it | |||
847 | in NUMBER_OF_ITERATIONS. Place the number of latch iterations | |||
848 | in NUMBER_OF_ITERATIONSM1. Place the condition under which the | |||
849 | niter information holds in ASSUMPTIONS. | |||
850 | ||||
851 | Return the loop exit condition. */ | |||
852 | ||||
853 | ||||
854 | static gcond * | |||
855 | vect_get_loop_niters (class loop *loop, tree *assumptions, | |||
856 | tree *number_of_iterations, tree *number_of_iterationsm1) | |||
857 | { | |||
858 | edge exit = single_exit (loop); | |||
859 | class tree_niter_desc niter_desc; | |||
860 | tree niter_assumptions, niter, may_be_zero; | |||
861 | gcond *cond = get_loop_exit_condition (loop); | |||
862 | ||||
863 | *assumptions = boolean_true_nodeglobal_trees[TI_BOOLEAN_TRUE]; | |||
864 | *number_of_iterationsm1 = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; | |||
865 | *number_of_iterations = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; | |||
866 | DUMP_VECT_SCOPE ("get_loop_niters")auto_dump_scope scope ("get_loop_niters", vect_location); | |||
867 | ||||
868 | if (!exit) | |||
869 | return cond; | |||
870 | ||||
871 | may_be_zero = NULL_TREE(tree) nullptr; | |||
872 | if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULLnullptr) | |||
873 | || chrec_contains_undetermined (niter_desc.niter)) | |||
874 | return cond; | |||
875 | ||||
876 | niter_assumptions = niter_desc.assumptions; | |||
877 | may_be_zero = niter_desc.may_be_zero; | |||
878 | niter = niter_desc.niter; | |||
879 | ||||
880 | if (may_be_zero && integer_zerop (may_be_zero)) | |||
881 | may_be_zero = NULL_TREE(tree) nullptr; | |||
882 | ||||
883 | if (may_be_zero) | |||
884 | { | |||
885 | if (COMPARISON_CLASS_P (may_be_zero)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (may_be_zero)->base.code))] == tcc_comparison)) | |||
886 | { | |||
887 | /* Try to combine may_be_zero with assumptions, this can simplify | |||
888 | computation of niter expression. */ | |||
889 | if (niter_assumptions && !integer_nonzerop (niter_assumptions)) | |||
890 | niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) | |||
891 | niter_assumptions,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) | |||
892 | fold_build1 (TRUTH_NOT_EXPR,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) | |||
893 | boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) | |||
894 | may_be_zero))fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ); | |||
895 | else | |||
896 | niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ) | |||
897 | build_int_cst (TREE_TYPE (niter), 0),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ) | |||
898 | rewrite_to_non_trapping_overflow (niter))fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ); | |||
899 | ||||
900 | may_be_zero = NULL_TREE(tree) nullptr; | |||
901 | } | |||
902 | else if (integer_nonzerop (may_be_zero)) | |||
903 | { | |||
904 | *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 904, __FUNCTION__))->typed.type), 0); | |||
905 | *number_of_iterations = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 905, __FUNCTION__))->typed.type), 1); | |||
906 | return cond; | |||
907 | } | |||
908 | else | |||
909 | return cond; | |||
910 | } | |||
911 | ||||
912 | *assumptions = niter_assumptions; | |||
913 | *number_of_iterationsm1 = niter; | |||
914 | ||||
915 | /* We want the number of loop header executions which is the number | |||
916 | of latch executions plus one. | |||
917 | ??? For UINT_MAX latch executions this number overflows to zero | |||
918 | for loops like do { n++; } while (n != 0); */ | |||
919 | if (niter && !chrec_contains_undetermined (niter)) | |||
920 | niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 920, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 921, __FUNCTION__))->typed.type), 1) ) | |||
921 | build_int_cst (TREE_TYPE (niter), 1))fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 920, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 921, __FUNCTION__))->typed.type), 1) ); | |||
922 | *number_of_iterations = niter; | |||
923 | ||||
924 | return cond; | |||
925 | } | |||
926 | ||||
927 | /* Function bb_in_loop_p | |||
928 | ||||
929 | Used as predicate for dfs order traversal of the loop bbs. */ | |||
930 | ||||
931 | static bool | |||
932 | bb_in_loop_p (const_basic_block bb, const void *data) | |||
933 | { | |||
934 | const class loop *const loop = (const class loop *)data; | |||
935 | if (flow_bb_inside_loop_p (loop, bb)) | |||
936 | return true; | |||
937 | return false; | |||
938 | } | |||
939 | ||||
940 | ||||
941 | /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as | |||
942 | stmt_vec_info structs for all the stmts in LOOP_IN. */ | |||
943 | ||||
944 | _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) | |||
945 | : vec_info (vec_info::loop, shared), | |||
946 | loop (loop_in), | |||
947 | bbs (XCNEWVEC (basic_block, loop->num_nodes)((basic_block *) xcalloc ((loop->num_nodes), sizeof (basic_block )))), | |||
948 | num_itersm1 (NULL_TREE(tree) nullptr), | |||
949 | num_iters (NULL_TREE(tree) nullptr), | |||
950 | num_iters_unchanged (NULL_TREE(tree) nullptr), | |||
951 | num_iters_assumptions (NULL_TREE(tree) nullptr), | |||
952 | vector_costs (nullptr), | |||
953 | scalar_costs (nullptr), | |||
954 | th (0), | |||
955 | versioning_threshold (0), | |||
956 | vectorization_factor (0), | |||
957 | main_loop_edge (nullptr), | |||
958 | skip_main_loop_edge (nullptr), | |||
959 | skip_this_loop_edge (nullptr), | |||
960 | reusable_accumulators (), | |||
961 | suggested_unroll_factor (1), | |||
962 | max_vectorization_factor (0), | |||
963 | mask_skip_niters (NULL_TREE(tree) nullptr), | |||
964 | rgroup_compare_type (NULL_TREE(tree) nullptr), | |||
965 | simd_if_cond (NULL_TREE(tree) nullptr), | |||
966 | unaligned_dr (NULLnullptr), | |||
967 | peeling_for_alignment (0), | |||
968 | ptr_mask (0), | |||
969 | ivexpr_map (NULLnullptr), | |||
970 | scan_map (NULLnullptr), | |||
971 | slp_unrolling_factor (1), | |||
972 | inner_loop_cost_factor (param_vect_inner_loop_cost_factorglobal_options.x_param_vect_inner_loop_cost_factor), | |||
973 | vectorizable (false), | |||
974 | can_use_partial_vectors_p (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0), | |||
975 | using_partial_vectors_p (false), | |||
976 | epil_using_partial_vectors_p (false), | |||
977 | partial_load_store_bias (0), | |||
978 | peeling_for_gaps (false), | |||
979 | peeling_for_niter (false), | |||
980 | no_data_dependencies (false), | |||
981 | has_mask_store (false), | |||
982 | scalar_loop_scaling (profile_probability::uninitialized ()), | |||
983 | scalar_loop (NULLnullptr), | |||
984 | orig_loop_info (NULLnullptr) | |||
985 | { | |||
986 | /* CHECKME: We want to visit all BBs before their successors (except for | |||
987 | latch blocks, for which this assertion wouldn't hold). In the simple | |||
988 | case of the loop forms we allow, a dfs order of the BBs would the same | |||
989 | as reversed postorder traversal, so we are safe. */ | |||
990 | ||||
991 | unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, | |||
992 | bbs, loop->num_nodes, loop); | |||
993 | gcc_assert (nbbs == loop->num_nodes)((void)(!(nbbs == loop->num_nodes) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 993, __FUNCTION__), 0 : 0)); | |||
994 | ||||
995 | for (unsigned int i = 0; i < nbbs; i++) | |||
996 | { | |||
997 | basic_block bb = bbs[i]; | |||
998 | gimple_stmt_iterator si; | |||
999 | ||||
1000 | for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |||
1001 | { | |||
1002 | gimple *phi = gsi_stmt (si); | |||
1003 | gimple_set_uid (phi, 0); | |||
1004 | add_stmt (phi); | |||
1005 | } | |||
1006 | ||||
1007 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |||
1008 | { | |||
1009 | gimple *stmt = gsi_stmt (si); | |||
1010 | gimple_set_uid (stmt, 0); | |||
1011 | if (is_gimple_debug (stmt)) | |||
1012 | continue; | |||
1013 | add_stmt (stmt); | |||
1014 | /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the | |||
1015 | third argument is the #pragma omp simd if (x) condition, when 0, | |||
1016 | loop shouldn't be vectorized, when non-zero constant, it should | |||
1017 | be vectorized normally, otherwise versioned with vectorized loop | |||
1018 | done if the condition is non-zero at runtime. */ | |||
1019 | if (loop_in->simduid | |||
1020 | && is_gimple_call (stmt) | |||
1021 | && gimple_call_internal_p (stmt) | |||
1022 | && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE | |||
1023 | && gimple_call_num_args (stmt) >= 3 | |||
1024 | && TREE_CODE (gimple_call_arg (stmt, 0))((enum tree_code) (gimple_call_arg (stmt, 0))->base.code) == SSA_NAME | |||
1025 | && (loop_in->simduid | |||
1026 | == SSA_NAME_VAR (gimple_call_arg (stmt, 0))((tree_check ((gimple_call_arg (stmt, 0)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1026, __FUNCTION__, (SSA_NAME)))->ssa_name.var == (tree) nullptr || ((enum tree_code) ((gimple_call_arg (stmt, 0))-> ssa_name.var)->base.code) == IDENTIFIER_NODE ? (tree) nullptr : (gimple_call_arg (stmt, 0))->ssa_name.var))) | |||
1027 | { | |||
1028 | tree arg = gimple_call_arg (stmt, 2); | |||
1029 | if (integer_zerop (arg) || TREE_CODE (arg)((enum tree_code) (arg)->base.code) == SSA_NAME) | |||
1030 | simd_if_cond = arg; | |||
1031 | else | |||
1032 | gcc_assert (integer_nonzerop (arg))((void)(!(integer_nonzerop (arg)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1032, __FUNCTION__), 0 : 0)); | |||
1033 | } | |||
1034 | } | |||
1035 | } | |||
1036 | ||||
1037 | epilogue_vinfos.create (6); | |||
1038 | } | |||
1039 | ||||
1040 | /* Free all levels of rgroup CONTROLS. */ | |||
1041 | ||||
1042 | void | |||
1043 | release_vec_loop_controls (vec<rgroup_controls> *controls) | |||
1044 | { | |||
1045 | rgroup_controls *rgc; | |||
1046 | unsigned int i; | |||
1047 | FOR_EACH_VEC_ELT (*controls, i, rgc)for (i = 0; (*controls).iterate ((i), &(rgc)); ++(i)) | |||
1048 | rgc->controls.release (); | |||
1049 | controls->release (); | |||
1050 | } | |||
1051 | ||||
1052 | /* Free all memory used by the _loop_vec_info, as well as all the | |||
1053 | stmt_vec_info structs of all the stmts in the loop. */ | |||
1054 | ||||
1055 | _loop_vec_info::~_loop_vec_info () | |||
1056 | { | |||
1057 | free (bbs); | |||
1058 | ||||
1059 | release_vec_loop_controls (&masks); | |||
1060 | release_vec_loop_controls (&lens); | |||
1061 | delete ivexpr_map; | |||
1062 | delete scan_map; | |||
1063 | epilogue_vinfos.release (); | |||
1064 | delete scalar_costs; | |||
1065 | delete vector_costs; | |||
1066 | ||||
1067 | /* When we release an epiloge vinfo that we do not intend to use | |||
1068 | avoid clearing AUX of the main loop which should continue to | |||
1069 | point to the main loop vinfo since otherwise we'll leak that. */ | |||
1070 | if (loop->aux == this) | |||
1071 | loop->aux = NULLnullptr; | |||
1072 | } | |||
1073 | ||||
1074 | /* Return an invariant or register for EXPR and emit necessary | |||
1075 | computations in the LOOP_VINFO loop preheader. */ | |||
1076 | ||||
1077 | tree | |||
1078 | cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr) | |||
1079 | { | |||
1080 | if (is_gimple_reg (expr) | |||
1081 | || is_gimple_min_invariant (expr)) | |||
1082 | return expr; | |||
1083 | ||||
1084 | if (! loop_vinfo->ivexpr_map) | |||
1085 | loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>; | |||
1086 | tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr); | |||
1087 | if (! cached) | |||
1088 | { | |||
1089 | gimple_seq stmts = NULLnullptr; | |||
1090 | cached = force_gimple_operand (unshare_expr (expr), | |||
1091 | &stmts, true, NULL_TREE(tree) nullptr); | |||
1092 | if (stmts) | |||
1093 | { | |||
1094 | edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); | |||
1095 | gsi_insert_seq_on_edge_immediate (e, stmts); | |||
1096 | } | |||
1097 | } | |||
1098 | return cached; | |||
1099 | } | |||
1100 | ||||
1101 | /* Return true if we can use CMP_TYPE as the comparison type to produce | |||
1102 | all masks required to mask LOOP_VINFO. */ | |||
1103 | ||||
1104 | static bool | |||
1105 | can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type) | |||
1106 | { | |||
1107 | rgroup_controls *rgm; | |||
1108 | unsigned int i; | |||
1109 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm) ); ++(i)) | |||
1110 | if (rgm->type != NULL_TREE(tree) nullptr | |||
1111 | && !direct_internal_fn_supported_p (IFN_WHILE_ULT, | |||
1112 | cmp_type, rgm->type, | |||
1113 | OPTIMIZE_FOR_SPEED)) | |||
1114 | return false; | |||
1115 | return true; | |||
1116 | } | |||
1117 | ||||
1118 | /* Calculate the maximum number of scalars per iteration for every | |||
1119 | rgroup in LOOP_VINFO. */ | |||
1120 | ||||
1121 | static unsigned int | |||
1122 | vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo) | |||
1123 | { | |||
1124 | unsigned int res = 1; | |||
1125 | unsigned int i; | |||
1126 | rgroup_controls *rgm; | |||
1127 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm) ); ++(i)) | |||
1128 | res = MAX (res, rgm->max_nscalars_per_iter)((res) > (rgm->max_nscalars_per_iter) ? (res) : (rgm-> max_nscalars_per_iter)); | |||
1129 | return res; | |||
1130 | } | |||
1131 | ||||
1132 | /* Calculate the minimum precision necessary to represent: | |||
1133 | ||||
1134 | MAX_NITERS * FACTOR | |||
1135 | ||||
1136 | as an unsigned integer, where MAX_NITERS is the maximum number of | |||
1137 | loop header iterations for the original scalar form of LOOP_VINFO. */ | |||
1138 | ||||
1139 | static unsigned | |||
1140 | vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor) | |||
1141 | { | |||
1142 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
1143 | ||||
1144 | /* Get the maximum number of iterations that is representable | |||
1145 | in the counter type. */ | |||
1146 | tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo))((contains_struct_check (((loop_vinfo)->num_itersm1), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1146, __FUNCTION__))->typed.type); | |||
1147 | widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)((tree_check5 ((ni_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1147, __FUNCTION__, (INTEGER_TYPE), (ENUMERAL_TYPE), (BOOLEAN_TYPE ), (REAL_TYPE), (FIXED_POINT_TYPE)))->type_non_common.maxval )) + 1; | |||
1148 | ||||
1149 | /* Get a more refined estimate for the number of iterations. */ | |||
1150 | widest_int max_back_edges; | |||
1151 | if (max_loop_iterations (loop, &max_back_edges)) | |||
1152 | max_ni = wi::smin (max_ni, max_back_edges + 1); | |||
1153 | ||||
1154 | /* Work out how many bits we need to represent the limit. */ | |||
1155 | return wi::min_precision (max_ni * factor, UNSIGNED); | |||
1156 | } | |||
1157 | ||||
1158 | /* True if the loop needs peeling or partial vectors when vectorized. */ | |||
1159 | ||||
1160 | static bool | |||
1161 | vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo) | |||
1162 | { | |||
1163 | unsigned HOST_WIDE_INTlong const_vf; | |||
1164 | HOST_WIDE_INTlong max_niter | |||
1165 | = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); | |||
1166 | ||||
1167 | unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th; | |||
1168 | if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) | |||
1169 | th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO((loop_vinfo)->orig_loop_info)->th | |||
1170 | (loop_vinfo))((loop_vinfo)->orig_loop_info)->th; | |||
1171 | ||||
1172 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) | |||
1173 | && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment >= 0) | |||
1174 | { | |||
1175 | /* Work out the (constant) number of iterations that need to be | |||
1176 | peeled for reasons other than niters. */ | |||
1177 | unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; | |||
1178 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps) | |||
1179 | peel_niter += 1; | |||
1180 | if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1180, __FUNCTION__)))) - peel_niter, | |||
1181 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor)) | |||
1182 | return true; | |||
1183 | } | |||
1184 | else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment | |||
1185 | /* ??? When peeling for gaps but not alignment, we could | |||
1186 | try to check whether the (variable) niters is known to be | |||
1187 | VF * N + 1. That's something of a niche case though. */ | |||
1188 | || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps | |||
1189 | || !LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor.is_constant (&const_vf) | |||
1190 | || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters) | |||
1191 | < (unsigned) exact_log2 (const_vf)) | |||
1192 | /* In case of versioning, check if the maximum number of | |||
1193 | iterations is greater than th. If they are identical, | |||
1194 | the epilogue is unnecessary. */ | |||
1195 | && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) | |||
1196 | || ((unsigned HOST_WIDE_INTlong) max_niter | |||
1197 | > (th / const_vf) * const_vf)))) | |||
1198 | return true; | |||
1199 | ||||
1200 | return false; | |||
1201 | } | |||
1202 | ||||
1203 | /* Each statement in LOOP_VINFO can be masked where necessary. Check | |||
1204 | whether we can actually generate the masks required. Return true if so, | |||
1205 | storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */ | |||
1206 | ||||
1207 | static bool | |||
1208 | vect_verify_full_masking (loop_vec_info loop_vinfo) | |||
1209 | { | |||
1210 | unsigned int min_ni_width; | |||
1211 | unsigned int max_nscalars_per_iter | |||
1212 | = vect_get_max_nscalars_per_iter (loop_vinfo); | |||
1213 | ||||
1214 | /* Use a normal loop if there are no statements that need masking. | |||
1215 | This only happens in rare degenerate cases: it means that the loop | |||
1216 | has no loads, no stores, and no live-out values. */ | |||
1217 | if (LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ()) | |||
1218 | return false; | |||
1219 | ||||
1220 | /* Work out how many bits we need to represent the limit. */ | |||
1221 | min_ni_width | |||
1222 | = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter); | |||
1223 | ||||
1224 | /* Find a scalar mode for which WHILE_ULT is supported. */ | |||
1225 | opt_scalar_int_mode cmp_mode_iter; | |||
1226 | tree cmp_type = NULL_TREE(tree) nullptr; | |||
1227 | tree iv_type = NULL_TREE(tree) nullptr; | |||
1228 | widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo); | |||
1229 | unsigned int iv_precision = UINT_MAX(2147483647 *2U +1U); | |||
1230 | ||||
1231 | if (iv_limit != -1) | |||
1232 | iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter, | |||
1233 | UNSIGNED); | |||
1234 | ||||
1235 | FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)for (mode_iterator::start (&(cmp_mode_iter), MODE_INT); mode_iterator ::iterate_p (&(cmp_mode_iter)); mode_iterator::get_next ( &(cmp_mode_iter))) | |||
1236 | { | |||
1237 | unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); | |||
1238 | if (cmp_bits >= min_ni_width | |||
1239 | && targetm.scalar_mode_supported_p (cmp_mode_iter.require ())) | |||
1240 | { | |||
1241 | tree this_type = build_nonstandard_integer_type (cmp_bits, true); | |||
1242 | if (this_type | |||
1243 | && can_produce_all_loop_masks_p (loop_vinfo, this_type)) | |||
1244 | { | |||
1245 | /* Although we could stop as soon as we find a valid mode, | |||
1246 | there are at least two reasons why that's not always the | |||
1247 | best choice: | |||
1248 | ||||
1249 | - An IV that's Pmode or wider is more likely to be reusable | |||
1250 | in address calculations than an IV that's narrower than | |||
1251 | Pmode. | |||
1252 | ||||
1253 | - Doing the comparison in IV_PRECISION or wider allows | |||
1254 | a natural 0-based IV, whereas using a narrower comparison | |||
1255 | type requires mitigations against wrap-around. | |||
1256 | ||||
1257 | Conversely, if the IV limit is variable, doing the comparison | |||
1258 | in a wider type than the original type can introduce | |||
1259 | unnecessary extensions, so picking the widest valid mode | |||
1260 | is not always a good choice either. | |||
1261 | ||||
1262 | Here we prefer the first IV type that's Pmode or wider, | |||
1263 | and the first comparison type that's IV_PRECISION or wider. | |||
1264 | (The comparison type must be no wider than the IV type, | |||
1265 | to avoid extensions in the vector loop.) | |||
1266 | ||||
1267 | ??? We might want to try continuing beyond Pmode for ILP32 | |||
1268 | targets if CMP_BITS < IV_PRECISION. */ | |||
1269 | iv_type = this_type; | |||
1270 | if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)((tree_class_check ((cmp_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1270, __FUNCTION__))->type_common.precision)) | |||
1271 | cmp_type = this_type; | |||
1272 | if (cmp_bits >= GET_MODE_BITSIZE (Pmode(global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ( (scalar_int_mode::from_int) E_DImode)) : (scalar_int_mode ((scalar_int_mode ::from_int) E_SImode))))) | |||
1273 | break; | |||
1274 | } | |||
1275 | } | |||
1276 | } | |||
1277 | ||||
1278 | if (!cmp_type) | |||
1279 | return false; | |||
1280 | ||||
1281 | LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = cmp_type; | |||
1282 | LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type; | |||
1283 | return true; | |||
1284 | } | |||
1285 | ||||
1286 | /* Check whether we can use vector access with length based on precison | |||
1287 | comparison. So far, to keep it simple, we only allow the case that the | |||
1288 | precision of the target supported length is larger than the precision | |||
1289 | required by loop niters. */ | |||
1290 | ||||
1291 | static bool | |||
1292 | vect_verify_loop_lens (loop_vec_info loop_vinfo) | |||
1293 | { | |||
1294 | if (LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ()) | |||
1295 | return false; | |||
1296 | ||||
1297 | machine_mode len_load_mode = get_len_load_store_mode | |||
1298 | (loop_vinfo->vector_mode, true).require (); | |||
1299 | machine_mode len_store_mode = get_len_load_store_mode | |||
1300 | (loop_vinfo->vector_mode, false).require (); | |||
1301 | ||||
1302 | signed char partial_load_bias = internal_len_load_store_bias | |||
1303 | (IFN_LEN_LOAD, len_load_mode); | |||
1304 | ||||
1305 | signed char partial_store_bias = internal_len_load_store_bias | |||
1306 | (IFN_LEN_STORE, len_store_mode); | |||
1307 | ||||
1308 | gcc_assert (partial_load_bias == partial_store_bias)((void)(!(partial_load_bias == partial_store_bias) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1308, __FUNCTION__), 0 : 0)); | |||
1309 | ||||
1310 | if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED127) | |||
1311 | return false; | |||
1312 | ||||
1313 | /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit | |||
1314 | len_loads with a length of zero. In order to avoid that we prohibit | |||
1315 | more than one loop length here. */ | |||
1316 | if (partial_load_bias == -1 | |||
1317 | && LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.length () > 1) | |||
1318 | return false; | |||
1319 | ||||
1320 | LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)(loop_vinfo)->partial_load_store_bias = partial_load_bias; | |||
1321 | ||||
1322 | unsigned int max_nitems_per_iter = 1; | |||
1323 | unsigned int i; | |||
1324 | rgroup_controls *rgl; | |||
1325 | /* Find the maximum number of items per iteration for every rgroup. */ | |||
1326 | FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)for (i = 0; ((loop_vinfo)->lens).iterate ((i), &(rgl)) ; ++(i)) | |||
1327 | { | |||
1328 | unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor; | |||
1329 | max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter)((max_nitems_per_iter) > (nitems_per_iter) ? (max_nitems_per_iter ) : (nitems_per_iter)); | |||
1330 | } | |||
1331 | ||||
1332 | /* Work out how many bits we need to represent the length limit. */ | |||
1333 | unsigned int min_ni_prec | |||
1334 | = vect_min_prec_for_max_niters (loop_vinfo, max_nitems_per_iter); | |||
1335 | ||||
1336 | /* Now use the maximum of below precisions for one suitable IV type: | |||
1337 | - the IV's natural precision | |||
1338 | - the precision needed to hold: the maximum number of scalar | |||
1339 | iterations multiplied by the scale factor (min_ni_prec above) | |||
1340 | - the Pmode precision | |||
1341 | ||||
1342 | If min_ni_prec is less than the precision of the current niters, | |||
1343 | we perfer to still use the niters type. Prefer to use Pmode and | |||
1344 | wider IV to avoid narrow conversions. */ | |||
1345 | ||||
1346 | unsigned int ni_prec | |||
1347 | = TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)))((tree_class_check ((((contains_struct_check (((loop_vinfo)-> num_iters), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1347, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1347, __FUNCTION__))->type_common.precision); | |||
1348 | min_ni_prec = MAX (min_ni_prec, ni_prec)((min_ni_prec) > (ni_prec) ? (min_ni_prec) : (ni_prec)); | |||
1349 | min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode))((min_ni_prec) > (GET_MODE_BITSIZE ((global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode )) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode)) ))) ? (min_ni_prec) : (GET_MODE_BITSIZE ((global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode )) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode)) )))); | |||
1350 | ||||
1351 | tree iv_type = NULL_TREE(tree) nullptr; | |||
1352 | opt_scalar_int_mode tmode_iter; | |||
1353 | FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)for (mode_iterator::start (&(tmode_iter), MODE_INT); mode_iterator ::iterate_p (&(tmode_iter)); mode_iterator::get_next (& (tmode_iter))) | |||
1354 | { | |||
1355 | scalar_mode tmode = tmode_iter.require (); | |||
1356 | unsigned int tbits = GET_MODE_BITSIZE (tmode); | |||
1357 | ||||
1358 | /* ??? Do we really want to construct one IV whose precision exceeds | |||
1359 | BITS_PER_WORD? */ | |||
1360 | if (tbits > BITS_PER_WORD((8) * (((global_options.x_ix86_isa_flags & (1UL << 1)) != 0) ? 8 : 4))) | |||
1361 | break; | |||
1362 | ||||
1363 | /* Find the first available standard integral type. */ | |||
1364 | if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode)) | |||
1365 | { | |||
1366 | iv_type = build_nonstandard_integer_type (tbits, true); | |||
1367 | break; | |||
1368 | } | |||
1369 | } | |||
1370 | ||||
1371 | if (!iv_type) | |||
1372 | { | |||
1373 | if (dump_enabled_p ()) | |||
1374 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
1375 | "can't vectorize with length-based partial vectors" | |||
1376 | " because there is no suitable iv type.\n"); | |||
1377 | return false; | |||
1378 | } | |||
1379 | ||||
1380 | LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = iv_type; | |||
1381 | LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type; | |||
1382 | ||||
1383 | return true; | |||
1384 | } | |||
1385 | ||||
1386 | /* Calculate the cost of one scalar iteration of the loop. */ | |||
1387 | static void | |||
1388 | vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) | |||
1389 | { | |||
1390 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
1391 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; | |||
1392 | int nbbs = loop->num_nodes, factor; | |||
1393 | int innerloop_iters, i; | |||
1394 | ||||
1395 | DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost")auto_dump_scope scope ("vect_compute_single_scalar_iteration_cost" , vect_location); | |||
1396 | ||||
1397 | /* Gather costs for statements in the scalar loop. */ | |||
1398 | ||||
1399 | /* FORNOW. */ | |||
1400 | innerloop_iters = 1; | |||
1401 | if (loop->inner) | |||
1402 | innerloop_iters = LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)(loop_vinfo)->inner_loop_cost_factor; | |||
1403 | ||||
1404 | for (i = 0; i < nbbs; i++) | |||
1405 | { | |||
1406 | gimple_stmt_iterator si; | |||
1407 | basic_block bb = bbs[i]; | |||
1408 | ||||
1409 | if (bb->loop_father == loop->inner) | |||
1410 | factor = innerloop_iters; | |||
1411 | else | |||
1412 | factor = 1; | |||
1413 | ||||
1414 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |||
1415 | { | |||
1416 | gimple *stmt = gsi_stmt (si); | |||
1417 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); | |||
1418 | ||||
1419 | if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) | |||
1420 | continue; | |||
1421 | ||||
1422 | /* Skip stmts that are not vectorized inside the loop. */ | |||
1423 | stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info); | |||
1424 | if (!STMT_VINFO_RELEVANT_P (vstmt_info)((vstmt_info)->relevant != vect_unused_in_scope) | |||
1425 | && (!STMT_VINFO_LIVE_P (vstmt_info)(vstmt_info)->live | |||
1426 | || !VECTORIZABLE_CYCLE_DEF((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info )->def_type) == vect_double_reduction_def) || (((vstmt_info )->def_type) == vect_nested_cycle)) | |||
1427 | (STMT_VINFO_DEF_TYPE (vstmt_info))((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info )->def_type) == vect_double_reduction_def) || (((vstmt_info )->def_type) == vect_nested_cycle)))) | |||
1428 | continue; | |||
1429 | ||||
1430 | vect_cost_for_stmt kind; | |||
1431 | if (STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0)) | |||
1432 | { | |||
1433 | if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))(((stmt_info)->dr_aux.dr + 0))->is_read) | |||
1434 | kind = scalar_load; | |||
1435 | else | |||
1436 | kind = scalar_store; | |||
1437 | } | |||
1438 | else if (vect_nop_conversion_p (stmt_info)) | |||
1439 | continue; | |||
1440 | else | |||
1441 | kind = scalar_stmt; | |||
1442 | ||||
1443 | /* We are using vect_prologue here to avoid scaling twice | |||
1444 | by the inner loop factor. */ | |||
1445 | record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec, | |||
1446 | factor, kind, stmt_info, 0, vect_prologue); | |||
1447 | } | |||
1448 | } | |||
1449 | ||||
1450 | /* Now accumulate cost. */ | |||
1451 | loop_vinfo->scalar_costs = init_cost (loop_vinfo, true); | |||
1452 | add_stmt_costs (loop_vinfo->scalar_costs, | |||
1453 | &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec); | |||
1454 | loop_vinfo->scalar_costs->finish_cost (nullptr); | |||
1455 | } | |||
1456 | ||||
1457 | ||||
1458 | /* Function vect_analyze_loop_form. | |||
1459 | ||||
1460 | Verify that certain CFG restrictions hold, including: | |||
1461 | - the loop has a pre-header | |||
1462 | - the loop has a single entry and exit | |||
1463 | - the loop exit condition is simple enough | |||
1464 | - the number of iterations can be analyzed, i.e, a countable loop. The | |||
1465 | niter could be analyzed under some assumptions. */ | |||
1466 | ||||
1467 | opt_result | |||
1468 | vect_analyze_loop_form (class loop *loop, vect_loop_form_info *info) | |||
1469 | { | |||
1470 | DUMP_VECT_SCOPE ("vect_analyze_loop_form")auto_dump_scope scope ("vect_analyze_loop_form", vect_location ); | |||
1471 | ||||
1472 | /* Different restrictions apply when we are considering an inner-most loop, | |||
1473 | vs. an outer (nested) loop. | |||
1474 | (FORNOW. May want to relax some of these restrictions in the future). */ | |||
1475 | ||||
1476 | info->inner_loop_cond = NULLnullptr; | |||
1477 | if (!loop->inner) | |||
1478 | { | |||
1479 | /* Inner-most loop. We currently require that the number of BBs is | |||
1480 | exactly 2 (the header and latch). Vectorizable inner-most loops | |||
1481 | look like this: | |||
1482 | ||||
1483 | (pre-header) | |||
1484 | | | |||
1485 | header <--------+ | |||
1486 | | | | | |||
1487 | | +--> latch --+ | |||
1488 | | | |||
1489 | (exit-bb) */ | |||
1490 | ||||
1491 | if (loop->num_nodes != 2) | |||
1492 | return opt_result::failure_at (vect_location, | |||
1493 | "not vectorized:" | |||
1494 | " control flow in loop.\n"); | |||
1495 | ||||
1496 | if (empty_block_p (loop->header)) | |||
1497 | return opt_result::failure_at (vect_location, | |||
1498 | "not vectorized: empty loop.\n"); | |||
1499 | } | |||
1500 | else | |||
1501 | { | |||
1502 | class loop *innerloop = loop->inner; | |||
1503 | edge entryedge; | |||
1504 | ||||
1505 | /* Nested loop. We currently require that the loop is doubly-nested, | |||
1506 | contains a single inner loop, and the number of BBs is exactly 5. | |||
1507 | Vectorizable outer-loops look like this: | |||
1508 | ||||
1509 | (pre-header) | |||
1510 | | | |||
1511 | header <---+ | |||
1512 | | | | |||
1513 | inner-loop | | |||
1514 | | | | |||
1515 | tail ------+ | |||
1516 | | | |||
1517 | (exit-bb) | |||
1518 | ||||
1519 | The inner-loop has the properties expected of inner-most loops | |||
1520 | as described above. */ | |||
1521 | ||||
1522 | if ((loop->inner)->inner || (loop->inner)->next) | |||
1523 | return opt_result::failure_at (vect_location, | |||
1524 | "not vectorized:" | |||
1525 | " multiple nested loops.\n"); | |||
1526 | ||||
1527 | if (loop->num_nodes != 5) | |||
1528 | return opt_result::failure_at (vect_location, | |||
1529 | "not vectorized:" | |||
1530 | " control flow in loop.\n"); | |||
1531 | ||||
1532 | entryedge = loop_preheader_edge (innerloop); | |||
1533 | if (entryedge->src != loop->header | |||
1534 | || !single_exit (innerloop) | |||
1535 | || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)(*(loop->latch)->preds)[(0)]->src) | |||
1536 | return opt_result::failure_at (vect_location, | |||
1537 | "not vectorized:" | |||
1538 | " unsupported outerloop form.\n"); | |||
1539 | ||||
1540 | /* Analyze the inner-loop. */ | |||
1541 | vect_loop_form_info inner; | |||
1542 | opt_result res = vect_analyze_loop_form (loop->inner, &inner); | |||
1543 | if (!res) | |||
1544 | { | |||
1545 | if (dump_enabled_p ()) | |||
1546 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
1547 | "not vectorized: Bad inner loop.\n"); | |||
1548 | return res; | |||
1549 | } | |||
1550 | ||||
1551 | /* Don't support analyzing niter under assumptions for inner | |||
1552 | loop. */ | |||
1553 | if (!integer_onep (inner.assumptions)) | |||
1554 | return opt_result::failure_at (vect_location, | |||
1555 | "not vectorized: Bad inner loop.\n"); | |||
1556 | ||||
1557 | if (!expr_invariant_in_loop_p (loop, inner.number_of_iterations)) | |||
1558 | return opt_result::failure_at (vect_location, | |||
1559 | "not vectorized: inner-loop count not" | |||
1560 | " invariant.\n"); | |||
1561 | ||||
1562 | if (dump_enabled_p ()) | |||
1563 | dump_printf_loc (MSG_NOTE, vect_location, | |||
1564 | "Considering outer-loop vectorization.\n"); | |||
1565 | info->inner_loop_cond = inner.loop_cond; | |||
1566 | } | |||
1567 | ||||
1568 | if (!single_exit (loop)) | |||
1569 | return opt_result::failure_at (vect_location, | |||
1570 | "not vectorized: multiple exits.\n"); | |||
1571 | if (EDGE_COUNT (loop->header->preds)vec_safe_length (loop->header->preds) != 2) | |||
1572 | return opt_result::failure_at (vect_location, | |||
1573 | "not vectorized:" | |||
1574 | " too many incoming edges.\n"); | |||
1575 | ||||
1576 | /* We assume that the loop exit condition is at the end of the loop. i.e, | |||
1577 | that the loop is represented as a do-while (with a proper if-guard | |||
1578 | before the loop if needed), where the loop header contains all the | |||
1579 | executable statements, and the latch is empty. */ | |||
1580 | if (!empty_block_p (loop->latch) | |||
1581 | || !gimple_seq_empty_p (phi_nodes (loop->latch))) | |||
1582 | return opt_result::failure_at (vect_location, | |||
1583 | "not vectorized: latch block not empty.\n"); | |||
1584 | ||||
1585 | /* Make sure the exit is not abnormal. */ | |||
1586 | edge e = single_exit (loop); | |||
1587 | if (e->flags & EDGE_ABNORMAL) | |||
1588 | return opt_result::failure_at (vect_location, | |||
1589 | "not vectorized:" | |||
1590 | " abnormal loop exit edge.\n"); | |||
1591 | ||||
1592 | info->loop_cond | |||
1593 | = vect_get_loop_niters (loop, &info->assumptions, | |||
1594 | &info->number_of_iterations, | |||
1595 | &info->number_of_iterationsm1); | |||
1596 | if (!info->loop_cond) | |||
1597 | return opt_result::failure_at | |||
1598 | (vect_location, | |||
1599 | "not vectorized: complicated exit condition.\n"); | |||
1600 | ||||
1601 | if (integer_zerop (info->assumptions) | |||
1602 | || !info->number_of_iterations | |||
1603 | || chrec_contains_undetermined (info->number_of_iterations)) | |||
1604 | return opt_result::failure_at | |||
1605 | (info->loop_cond, | |||
1606 | "not vectorized: number of iterations cannot be computed.\n"); | |||
1607 | ||||
1608 | if (integer_zerop (info->number_of_iterations)) | |||
1609 | return opt_result::failure_at | |||
1610 | (info->loop_cond, | |||
1611 | "not vectorized: number of iterations = 0.\n"); | |||
1612 | ||||
1613 | if (!(tree_fits_shwi_p (info->number_of_iterations) | |||
1614 | && tree_to_shwi (info->number_of_iterations) > 0)) | |||
1615 | { | |||
1616 | if (dump_enabled_p ()) | |||
1617 | { | |||
1618 | dump_printf_loc (MSG_NOTE, vect_location, | |||
1619 | "Symbolic number of iterations is "); | |||
1620 | dump_generic_expr (MSG_NOTE, TDF_DETAILS, info->number_of_iterations); | |||
1621 | dump_printf (MSG_NOTE, "\n"); | |||
1622 | } | |||
1623 | } | |||
1624 | ||||
1625 | return opt_result::success (); | |||
1626 | } | |||
1627 | ||||
1628 | /* Create a loop_vec_info for LOOP with SHARED and the | |||
1629 | vect_analyze_loop_form result. */ | |||
1630 | ||||
1631 | loop_vec_info | |||
1632 | vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, | |||
1633 | const vect_loop_form_info *info, | |||
1634 | loop_vec_info main_loop_info) | |||
1635 | { | |||
1636 | loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared); | |||
1637 | LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1 = info->number_of_iterationsm1; | |||
1638 | LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters = info->number_of_iterations; | |||
1639 | LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo)(loop_vinfo)->num_iters_unchanged = info->number_of_iterations; | |||
1640 | LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = main_loop_info; | |||
1641 | /* Also record the assumptions for versioning. */ | |||
1642 | if (!integer_onep (info->assumptions) && !main_loop_info) | |||
1643 | LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo)(loop_vinfo)->num_iters_assumptions = info->assumptions; | |||
1644 | ||||
1645 | stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (info->loop_cond); | |||
1646 | STMT_VINFO_TYPE (loop_cond_info)(loop_cond_info)->type = loop_exit_ctrl_vec_info_type; | |||
1647 | if (info->inner_loop_cond) | |||
1648 | { | |||
1649 | stmt_vec_info inner_loop_cond_info | |||
1650 | = loop_vinfo->lookup_stmt (info->inner_loop_cond); | |||
1651 | STMT_VINFO_TYPE (inner_loop_cond_info)(inner_loop_cond_info)->type = loop_exit_ctrl_vec_info_type; | |||
1652 | /* If we have an estimate on the number of iterations of the inner | |||
1653 | loop use that to limit the scale for costing, otherwise use | |||
1654 | --param vect-inner-loop-cost-factor literally. */ | |||
1655 | widest_int nit; | |||
1656 | if (estimated_stmt_executions (loop->inner, &nit)) | |||
1657 | LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)(loop_vinfo)->inner_loop_cost_factor | |||
1658 | = wi::smin (nit, param_vect_inner_loop_cost_factorglobal_options.x_param_vect_inner_loop_cost_factor).to_uhwi (); | |||
1659 | } | |||
1660 | ||||
1661 | return loop_vinfo; | |||
1662 | } | |||
1663 | ||||
1664 | ||||
1665 | ||||
1666 | /* Scan the loop stmts and dependent on whether there are any (non-)SLP | |||
1667 | statements update the vectorization factor. */ | |||
1668 | ||||
1669 | static void | |||
1670 | vect_update_vf_for_slp (loop_vec_info loop_vinfo) | |||
1671 | { | |||
1672 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
1673 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; | |||
1674 | int nbbs = loop->num_nodes; | |||
1675 | poly_uint64 vectorization_factor; | |||
1676 | int i; | |||
1677 | ||||
1678 | DUMP_VECT_SCOPE ("vect_update_vf_for_slp")auto_dump_scope scope ("vect_update_vf_for_slp", vect_location ); | |||
1679 | ||||
1680 | vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | |||
1681 | gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1681, __FUNCTION__), 0 : 0)); | |||
1682 | ||||
1683 | /* If all the stmts in the loop can be SLPed, we perform only SLP, and | |||
1684 | vectorization factor of the loop is the unrolling factor required by | |||
1685 | the SLP instances. If that unrolling factor is 1, we say, that we | |||
1686 | perform pure SLP on loop - cross iteration parallelism is not | |||
1687 | exploited. */ | |||
1688 | bool only_slp_in_loop = true; | |||
1689 | for (i = 0; i < nbbs; i++) | |||
1690 | { | |||
1691 | basic_block bb = bbs[i]; | |||
1692 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); | |||
1693 | gsi_next (&si)) | |||
1694 | { | |||
1695 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ()); | |||
1696 | if (!stmt_info) | |||
1697 | continue; | |||
1698 | if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) | |||
1699 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) | |||
1700 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | |||
1701 | /* STMT needs both SLP and loop-based vectorization. */ | |||
1702 | only_slp_in_loop = false; | |||
1703 | } | |||
1704 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | |||
1705 | gsi_next (&si)) | |||
1706 | { | |||
1707 | if (is_gimple_debug (gsi_stmt (si))) | |||
1708 | continue; | |||
1709 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); | |||
1710 | stmt_info = vect_stmt_to_vectorize (stmt_info); | |||
1711 | if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) | |||
1712 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) | |||
1713 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | |||
1714 | /* STMT needs both SLP and loop-based vectorization. */ | |||
1715 | only_slp_in_loop = false; | |||
1716 | } | |||
1717 | } | |||
1718 | ||||
1719 | if (only_slp_in_loop) | |||
1720 | { | |||
1721 | if (dump_enabled_p ()) | |||
1722 | dump_printf_loc (MSG_NOTE, vect_location, | |||
1723 | "Loop contains only SLP stmts\n"); | |||
1724 | vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor; | |||
1725 | } | |||
1726 | else | |||
1727 | { | |||
1728 | if (dump_enabled_p ()) | |||
1729 | dump_printf_loc (MSG_NOTE, vect_location, | |||
1730 | "Loop contains SLP and non-SLP stmts\n"); | |||
1731 | /* Both the vectorization factor and unroll factor have the form | |||
1732 | GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X, | |||
1733 | so they must have a common multiple. */ | |||
1734 | vectorization_factor | |||
1735 | = force_common_multiple (vectorization_factor, | |||
1736 | LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor); | |||
1737 | } | |||
1738 | ||||
1739 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor; | |||
1740 | if (dump_enabled_p ()) | |||
1741 | { | |||
1742 | dump_printf_loc (MSG_NOTE, vect_location, | |||
1743 | "Updating vectorization factor to "); | |||
1744 | dump_dec (MSG_NOTE, vectorization_factor); | |||
1745 | dump_printf (MSG_NOTE, ".\n"); | |||
1746 | } | |||
1747 | } | |||
1748 | ||||
1749 | /* Return true if STMT_INFO describes a double reduction phi and if | |||
1750 | the other phi in the reduction is also relevant for vectorization. | |||
1751 | This rejects cases such as: | |||
1752 | ||||
1753 | outer1: | |||
1754 | x_1 = PHI <x_3(outer2), ...>; | |||
1755 | ... | |||
1756 | ||||
1757 | inner: | |||
1758 | x_2 = ...; | |||
1759 | ... | |||
1760 | ||||
1761 | outer2: | |||
1762 | x_3 = PHI <x_2(inner)>; | |||
1763 | ||||
1764 | if nothing in x_2 or elsewhere makes x_1 relevant. */ | |||
1765 | ||||
1766 | static bool | |||
1767 | vect_active_double_reduction_p (stmt_vec_info stmt_info) | |||
1768 | { | |||
1769 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_double_reduction_def) | |||
1770 | return false; | |||
1771 | ||||
1772 | return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info))(((stmt_info)->reduc_def)->relevant != vect_unused_in_scope ); | |||
1773 | } | |||
1774 | ||||
1775 | /* Function vect_analyze_loop_operations. | |||
1776 | ||||
1777 | Scan the loop stmts and make sure they are all vectorizable. */ | |||
1778 | ||||
1779 | static opt_result | |||
1780 | vect_analyze_loop_operations (loop_vec_info loop_vinfo) | |||
1781 | { | |||
1782 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
1783 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; | |||
1784 | int nbbs = loop->num_nodes; | |||
1785 | int i; | |||
1786 | stmt_vec_info stmt_info; | |||
1787 | bool need_to_vectorize = false; | |||
1788 | bool ok; | |||
1789 | ||||
1790 | DUMP_VECT_SCOPE ("vect_analyze_loop_operations")auto_dump_scope scope ("vect_analyze_loop_operations", vect_location ); | |||
1791 | ||||
1792 | auto_vec<stmt_info_for_cost> cost_vec; | |||
1793 | ||||
1794 | for (i = 0; i < nbbs; i++) | |||
1795 | { | |||
1796 | basic_block bb = bbs[i]; | |||
1797 | ||||
1798 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); | |||
1799 | gsi_next (&si)) | |||
1800 | { | |||
1801 | gphi *phi = si.phi (); | |||
1802 | ok = true; | |||
1803 | ||||
1804 | stmt_info = loop_vinfo->lookup_stmt (phi); | |||
1805 | if (dump_enabled_p ()) | |||
1806 | dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", | |||
1807 | (gimple *) phi); | |||
1808 | if (virtual_operand_p (gimple_phi_result (phi))) | |||
1809 | continue; | |||
1810 | ||||
1811 | /* Inner-loop loop-closed exit phi in outer-loop vectorization | |||
1812 | (i.e., a phi in the tail of the outer-loop). */ | |||
1813 | if (! is_loop_header_bb_p (bb)) | |||
1814 | { | |||
1815 | /* FORNOW: we currently don't support the case that these phis | |||
1816 | are not used in the outerloop (unless it is double reduction, | |||
1817 | i.e., this phi is vect_reduction_def), cause this case | |||
1818 | requires to actually do something here. */ | |||
1819 | if (STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live | |||
1820 | && !vect_active_double_reduction_p (stmt_info)) | |||
1821 | return opt_result::failure_at (phi, | |||
1822 | "Unsupported loop-closed phi" | |||
1823 | " in outer-loop.\n"); | |||
1824 | ||||
1825 | /* If PHI is used in the outer loop, we check that its operand | |||
1826 | is defined in the inner loop. */ | |||
1827 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) | |||
1828 | { | |||
1829 | tree phi_op; | |||
1830 | ||||
1831 | if (gimple_phi_num_args (phi) != 1) | |||
1832 | return opt_result::failure_at (phi, "unsupported phi"); | |||
1833 | ||||
1834 | phi_op = PHI_ARG_DEF (phi, 0)gimple_phi_arg_def ((phi), (0)); | |||
1835 | stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); | |||
1836 | if (!op_def_info) | |||
1837 | return opt_result::failure_at (phi, "unsupported phi\n"); | |||
1838 | ||||
1839 | if (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant != vect_used_in_outer | |||
1840 | && (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant | |||
1841 | != vect_used_in_outer_by_reduction)) | |||
1842 | return opt_result::failure_at (phi, "unsupported phi\n"); | |||
1843 | ||||
1844 | if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_internal_def | |||
1845 | || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type | |||
1846 | == vect_double_reduction_def)) | |||
1847 | && !vectorizable_lc_phi (loop_vinfo, | |||
1848 | stmt_info, NULLnullptr, NULLnullptr)) | |||
1849 | return opt_result::failure_at (phi, "unsupported phi\n"); | |||
1850 | } | |||
1851 | ||||
1852 | continue; | |||
1853 | } | |||
1854 | ||||
1855 | gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1855, __FUNCTION__), 0 : 0)); | |||
1856 | ||||
1857 | if ((STMT_VINFO_RELEVANT (stmt_info)(stmt_info)->relevant == vect_used_in_scope | |||
1858 | || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) | |||
1859 | && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_induction_def | |||
1860 | && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_first_order_recurrence) | |||
1861 | /* A scalar-dependence cycle that we don't support. */ | |||
1862 | return opt_result::failure_at (phi, | |||
1863 | "not vectorized:" | |||
1864 | " scalar dependence cycle.\n"); | |||
1865 | ||||
1866 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) | |||
1867 | { | |||
1868 | need_to_vectorize = true; | |||
1869 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_induction_def | |||
1870 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | |||
1871 | ok = vectorizable_induction (loop_vinfo, | |||
1872 | stmt_info, NULLnullptr, NULLnullptr, | |||
1873 | &cost_vec); | |||
1874 | else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def | |||
1875 | || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type | |||
1876 | == vect_double_reduction_def) | |||
1877 | || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle) | |||
1878 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | |||
1879 | ok = vectorizable_reduction (loop_vinfo, | |||
1880 | stmt_info, NULLnullptr, NULLnullptr, &cost_vec); | |||
1881 | else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type | |||
1882 | == vect_first_order_recurrence) | |||
1883 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | |||
1884 | ok = vectorizable_recurr (loop_vinfo, stmt_info, NULLnullptr, NULLnullptr, | |||
1885 | &cost_vec); | |||
1886 | } | |||
1887 | ||||
1888 | /* SLP PHIs are tested by vect_slp_analyze_node_operations. */ | |||
1889 | if (ok | |||
1890 | && STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live | |||
1891 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | |||
1892 | ok = vectorizable_live_operation (loop_vinfo, | |||
1893 | stmt_info, NULLnullptr, NULLnullptr, NULLnullptr, | |||
1894 | -1, false, &cost_vec); | |||
1895 | ||||
1896 | if (!ok) | |||
1897 | return opt_result::failure_at (phi, | |||
1898 | "not vectorized: relevant phi not " | |||
1899 | "supported: %G", | |||
1900 | static_cast <gimple *> (phi)); | |||
1901 | } | |||
1902 | ||||
1903 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | |||
1904 | gsi_next (&si)) | |||
1905 | { | |||
1906 | gimple *stmt = gsi_stmt (si); | |||
1907 | if (!gimple_clobber_p (stmt) | |||
1908 | && !is_gimple_debug (stmt)) | |||
1909 | { | |||
1910 | opt_result res | |||
1911 | = vect_analyze_stmt (loop_vinfo, | |||
1912 | loop_vinfo->lookup_stmt (stmt), | |||
1913 | &need_to_vectorize, | |||
1914 | NULLnullptr, NULLnullptr, &cost_vec); | |||
1915 | if (!res) | |||
1916 | return res; | |||
1917 | } | |||
1918 | } | |||
1919 | } /* bbs */ | |||
1920 | ||||
1921 | add_stmt_costs (loop_vinfo->vector_costs, &cost_vec); | |||
1922 | ||||
1923 | /* All operations in the loop are either irrelevant (deal with loop | |||
1924 | control, or dead), or only used outside the loop and can be moved | |||
1925 | out of the loop (e.g. invariants, inductions). The loop can be | |||
1926 | optimized away by scalar optimizations. We're better off not | |||
1927 | touching this loop. */ | |||
1928 | if (!need_to_vectorize) | |||
1929 | { | |||
1930 | if (dump_enabled_p ()) | |||
1931 | dump_printf_loc (MSG_NOTE, vect_location, | |||
1932 | "All the computation can be taken out of the loop.\n"); | |||
1933 | return opt_result::failure_at | |||
1934 | (vect_location, | |||
1935 | "not vectorized: redundant loop. no profit to vectorize.\n"); | |||
1936 | } | |||
1937 | ||||
1938 | return opt_result::success (); | |||
1939 | } | |||
1940 | ||||
1941 | /* Return true if we know that the iteration count is smaller than the | |||
1942 | vectorization factor. Return false if it isn't, or if we can't be sure | |||
1943 | either way. */ | |||
1944 | ||||
1945 | static bool | |||
1946 | vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo) | |||
1947 | { | |||
1948 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); | |||
1949 | ||||
1950 | HOST_WIDE_INTlong max_niter; | |||
1951 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) | |||
1952 | max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 1952, __FUNCTION__)))); | |||
1953 | else | |||
1954 | max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); | |||
1955 | ||||
1956 | if (max_niter != -1 && (unsigned HOST_WIDE_INTlong) max_niter < assumed_vf) | |||
1957 | return true; | |||
1958 | ||||
1959 | return false; | |||
1960 | } | |||
1961 | ||||
1962 | /* Analyze the cost of the loop described by LOOP_VINFO. Decide if it | |||
1963 | is worthwhile to vectorize. Return 1 if definitely yes, 0 if | |||
1964 | definitely no, or -1 if it's worth retrying. */ | |||
1965 | ||||
1966 | static int | |||
1967 | vect_analyze_loop_costing (loop_vec_info loop_vinfo, | |||
1968 | unsigned *suggested_unroll_factor) | |||
1969 | { | |||
1970 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
1971 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); | |||
1972 | ||||
1973 | /* Only loops that can handle partially-populated vectors can have iteration | |||
1974 | counts less than the vectorization factor. */ | |||
1975 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
1976 | { | |||
1977 | if (vect_known_niters_smaller_than_vf (loop_vinfo)) | |||
1978 | { | |||
1979 | if (dump_enabled_p ()) | |||
1980 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
1981 | "not vectorized: iteration count smaller than " | |||
1982 | "vectorization factor.\n"); | |||
1983 | return 0; | |||
1984 | } | |||
1985 | } | |||
1986 | ||||
1987 | /* If using the "very cheap" model. reject cases in which we'd keep | |||
1988 | a copy of the scalar code (even if we might be able to vectorize it). */ | |||
1989 | if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP | |||
1990 | && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment | |||
1991 | || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps | |||
1992 | || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter)) | |||
1993 | { | |||
1994 | if (dump_enabled_p ()) | |||
1995 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
1996 | "some scalar iterations would need to be peeled\n"); | |||
1997 | return 0; | |||
1998 | } | |||
1999 | ||||
2000 | int min_profitable_iters, min_profitable_estimate; | |||
2001 | vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, | |||
2002 | &min_profitable_estimate, | |||
2003 | suggested_unroll_factor); | |||
2004 | ||||
2005 | if (min_profitable_iters < 0) | |||
2006 | { | |||
2007 | if (dump_enabled_p ()) | |||
2008 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2009 | "not vectorized: vectorization not profitable.\n"); | |||
2010 | if (dump_enabled_p ()) | |||
2011 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2012 | "not vectorized: vector version will never be " | |||
2013 | "profitable.\n"); | |||
2014 | return -1; | |||
2015 | } | |||
2016 | ||||
2017 | int min_scalar_loop_bound = (param_min_vect_loop_boundglobal_options.x_param_min_vect_loop_bound | |||
2018 | * assumed_vf); | |||
2019 | ||||
2020 | /* Use the cost model only if it is more conservative than user specified | |||
2021 | threshold. */ | |||
2022 | unsigned int th = (unsigned) MAX (min_scalar_loop_bound,((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound ) : (min_profitable_iters)) | |||
2023 | min_profitable_iters)((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound ) : (min_profitable_iters)); | |||
2024 | ||||
2025 | LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = th; | |||
2026 | ||||
2027 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) | |||
2028 | && LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2028, __FUNCTION__)))) < th) | |||
2029 | { | |||
2030 | if (dump_enabled_p ()) | |||
2031 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2032 | "not vectorized: vectorization not profitable.\n"); | |||
2033 | if (dump_enabled_p ()) | |||
2034 | dump_printf_loc (MSG_NOTE, vect_location, | |||
2035 | "not vectorized: iteration count smaller than user " | |||
2036 | "specified loop bound parameter or minimum profitable " | |||
2037 | "iterations (whichever is more conservative).\n"); | |||
2038 | return 0; | |||
2039 | } | |||
2040 | ||||
2041 | /* The static profitablity threshold min_profitable_estimate includes | |||
2042 | the cost of having to check at runtime whether the scalar loop | |||
2043 | should be used instead. If it turns out that we don't need or want | |||
2044 | such a check, the threshold we should use for the static estimate | |||
2045 | is simply the point at which the vector loop becomes more profitable | |||
2046 | than the scalar loop. */ | |||
2047 | if (min_profitable_estimate > min_profitable_iters | |||
2048 | && !LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) | |||
2049 | && !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter | |||
2050 | && !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment | |||
2051 | && !vect_apply_runtime_profitability_check_p (loop_vinfo)) | |||
2052 | { | |||
2053 | if (dump_enabled_p ()) | |||
2054 | dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime" | |||
2055 | " choice between the scalar and vector loops\n"); | |||
2056 | min_profitable_estimate = min_profitable_iters; | |||
2057 | } | |||
2058 | ||||
2059 | /* If the vector loop needs multiple iterations to be beneficial then | |||
2060 | things are probably too close to call, and the conservative thing | |||
2061 | would be to stick with the scalar code. */ | |||
2062 | if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP | |||
2063 | && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo)) | |||
2064 | { | |||
2065 | if (dump_enabled_p ()) | |||
2066 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2067 | "one iteration of the vector loop would be" | |||
2068 | " more expensive than the equivalent number of" | |||
2069 | " iterations of the scalar loop\n"); | |||
2070 | return 0; | |||
2071 | } | |||
2072 | ||||
2073 | HOST_WIDE_INTlong estimated_niter; | |||
2074 | ||||
2075 | /* If we are vectorizing an epilogue then we know the maximum number of | |||
2076 | scalar iterations it will cover is at least one lower than the | |||
2077 | vectorization factor of the main loop. */ | |||
2078 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) | |||
2079 | estimated_niter | |||
2080 | = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) - 1; | |||
2081 | else | |||
2082 | { | |||
2083 | estimated_niter = estimated_stmt_executions_int (loop); | |||
2084 | if (estimated_niter == -1) | |||
2085 | estimated_niter = likely_max_stmt_executions_int (loop); | |||
2086 | } | |||
2087 | if (estimated_niter != -1 | |||
2088 | && ((unsigned HOST_WIDE_INTlong) estimated_niter | |||
2089 | < MAX (th, (unsigned) min_profitable_estimate)((th) > ((unsigned) min_profitable_estimate) ? (th) : ((unsigned ) min_profitable_estimate)))) | |||
2090 | { | |||
2091 | if (dump_enabled_p ()) | |||
2092 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2093 | "not vectorized: estimated iteration count too " | |||
2094 | "small.\n"); | |||
2095 | if (dump_enabled_p ()) | |||
2096 | dump_printf_loc (MSG_NOTE, vect_location, | |||
2097 | "not vectorized: estimated iteration count smaller " | |||
2098 | "than specified loop bound parameter or minimum " | |||
2099 | "profitable iterations (whichever is more " | |||
2100 | "conservative).\n"); | |||
2101 | return -1; | |||
2102 | } | |||
2103 | ||||
2104 | return 1; | |||
2105 | } | |||
2106 | ||||
2107 | static opt_result | |||
2108 | vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, | |||
2109 | vec<data_reference_p> *datarefs, | |||
2110 | unsigned int *n_stmts) | |||
2111 | { | |||
2112 | *n_stmts = 0; | |||
2113 | for (unsigned i = 0; i < loop->num_nodes; i++) | |||
2114 | for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); | |||
2115 | !gsi_end_p (gsi); gsi_next (&gsi)) | |||
2116 | { | |||
2117 | gimple *stmt = gsi_stmt (gsi); | |||
2118 | if (is_gimple_debug (stmt)) | |||
2119 | continue; | |||
2120 | ++(*n_stmts); | |||
2121 | opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs, | |||
2122 | NULLnullptr, 0); | |||
2123 | if (!res) | |||
2124 | { | |||
2125 | if (is_gimple_call (stmt) && loop->safelen) | |||
2126 | { | |||
2127 | tree fndecl = gimple_call_fndecl (stmt), op; | |||
2128 | if (fndecl == NULL_TREE(tree) nullptr | |||
2129 | && gimple_call_internal_p (stmt, IFN_MASK_CALL)) | |||
2130 | { | |||
2131 | fndecl = gimple_call_arg (stmt, 0); | |||
2132 | gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR)((void)(!(((enum tree_code) (fndecl)->base.code) == ADDR_EXPR ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2132, __FUNCTION__), 0 : 0)); | |||
2133 | fndecl = TREE_OPERAND (fndecl, 0)(*((const_cast<tree*> (tree_operand_check ((fndecl), (0 ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2133, __FUNCTION__))))); | |||
2134 | gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL)((void)(!(((enum tree_code) (fndecl)->base.code) == FUNCTION_DECL ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2134, __FUNCTION__), 0 : 0)); | |||
2135 | } | |||
2136 | if (fndecl != NULL_TREE(tree) nullptr) | |||
2137 | { | |||
2138 | cgraph_node *node = cgraph_node::get (fndecl); | |||
2139 | if (node != NULLnullptr && node->simd_clones != NULLnullptr) | |||
2140 | { | |||
2141 | unsigned int j, n = gimple_call_num_args (stmt); | |||
2142 | for (j = 0; j < n; j++) | |||
2143 | { | |||
2144 | op = gimple_call_arg (stmt, j); | |||
2145 | if (DECL_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_declaration) | |||
2146 | || (REFERENCE_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_reference) | |||
2147 | && get_base_address (op))) | |||
2148 | break; | |||
2149 | } | |||
2150 | op = gimple_call_lhs (stmt); | |||
2151 | /* Ignore #pragma omp declare simd functions | |||
2152 | if they don't have data references in the | |||
2153 | call stmt itself. */ | |||
2154 | if (j == n | |||
2155 | && !(op | |||
2156 | && (DECL_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_declaration) | |||
2157 | || (REFERENCE_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_reference) | |||
2158 | && get_base_address (op))))) | |||
2159 | continue; | |||
2160 | } | |||
2161 | } | |||
2162 | } | |||
2163 | return res; | |||
2164 | } | |||
2165 | /* If dependence analysis will give up due to the limit on the | |||
2166 | number of datarefs stop here and fail fatally. */ | |||
2167 | if (datarefs->length () | |||
2168 | > (unsigned)param_loop_max_datarefs_for_datadepsglobal_options.x_param_loop_max_datarefs_for_datadeps) | |||
2169 | return opt_result::failure_at (stmt, "exceeded param " | |||
2170 | "loop-max-datarefs-for-datadeps\n"); | |||
2171 | } | |||
2172 | return opt_result::success (); | |||
2173 | } | |||
2174 | ||||
2175 | /* Look for SLP-only access groups and turn each individual access into its own | |||
2176 | group. */ | |||
2177 | static void | |||
2178 | vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo) | |||
2179 | { | |||
2180 | unsigned int i; | |||
2181 | struct data_reference *dr; | |||
2182 | ||||
2183 | DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups")auto_dump_scope scope ("vect_dissolve_slp_only_groups", vect_location ); | |||
2184 | ||||
2185 | vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs; | |||
2186 | FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i)) | |||
2187 | { | |||
2188 | gcc_assert (DR_REF (dr))((void)(!((dr)->ref) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2188, __FUNCTION__), 0 : 0)); | |||
2189 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr)(dr)->stmt); | |||
2190 | ||||
2191 | /* Check if the load is a part of an interleaving chain. */ | |||
2192 | if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2192, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))) | |||
2193 | { | |||
2194 | stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2194, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ); | |||
2195 | dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element)(((void)(!((first_element)->dr_aux.stmt == (first_element) ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2195, __FUNCTION__), 0 : 0)), &(first_element)->dr_aux ); | |||
2196 | unsigned int group_size = DR_GROUP_SIZE (first_element)(((void)(!((first_element)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2196, __FUNCTION__), 0 : 0)), (first_element)->size); | |||
2197 | ||||
2198 | /* Check if SLP-only groups. */ | |||
2199 | if (!STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type | |||
2200 | && STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p) | |||
2201 | { | |||
2202 | /* Dissolve the group. */ | |||
2203 | STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p = false; | |||
2204 | ||||
2205 | stmt_vec_info vinfo = first_element; | |||
2206 | while (vinfo) | |||
2207 | { | |||
2208 | stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2208, __FUNCTION__), 0 : 0)), (vinfo)->next_element); | |||
2209 | DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2209, __FUNCTION__), 0 : 0)), (vinfo)->first_element) = vinfo; | |||
2210 | DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2210, __FUNCTION__), 0 : 0)), (vinfo)->next_element) = NULLnullptr; | |||
2211 | DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2211, __FUNCTION__), 0 : 0)), (vinfo)->size) = 1; | |||
2212 | if (STMT_VINFO_STRIDED_P (first_element)(first_element)->strided_p) | |||
2213 | DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2213, __FUNCTION__), 0 : 0)), (vinfo)->gap) = 0; | |||
2214 | else | |||
2215 | DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2215, __FUNCTION__), 0 : 0)), (vinfo)->gap) = group_size - 1; | |||
2216 | /* Duplicate and adjust alignment info, it needs to | |||
2217 | be present on each group leader, see dr_misalignment. */ | |||
2218 | if (vinfo != first_element) | |||
2219 | { | |||
2220 | dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo)(((void)(!((vinfo)->dr_aux.stmt == (vinfo)) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2220, __FUNCTION__), 0 : 0)), &(vinfo)->dr_aux); | |||
2221 | dr_info2->target_alignment = dr_info->target_alignment; | |||
2222 | int misalignment = dr_info->misalignment; | |||
2223 | if (misalignment != DR_MISALIGNMENT_UNKNOWN(-1)) | |||
2224 | { | |||
2225 | HOST_WIDE_INTlong diff | |||
2226 | = (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info2->dr) ->innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2226, __FUNCTION__))) | |||
2227 | - TREE_INT_CST_LOW (DR_INIT (dr_info->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info->dr)-> innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2227, __FUNCTION__)))); | |||
2228 | unsigned HOST_WIDE_INTlong align_c | |||
2229 | = dr_info->target_alignment.to_constant (); | |||
2230 | misalignment = (misalignment + diff) % align_c; | |||
2231 | } | |||
2232 | dr_info2->misalignment = misalignment; | |||
2233 | } | |||
2234 | vinfo = next; | |||
2235 | } | |||
2236 | } | |||
2237 | } | |||
2238 | } | |||
2239 | } | |||
2240 | ||||
2241 | /* Determine if operating on full vectors for LOOP_VINFO might leave | |||
2242 | some scalar iterations still to do. If so, decide how we should | |||
2243 | handle those scalar iterations. The possibilities are: | |||
2244 | ||||
2245 | (1) Make LOOP_VINFO operate on partial vectors instead of full vectors. | |||
2246 | In this case: | |||
2247 | ||||
2248 | LOOP_VINFO_USING_PARTIAL_VECTORS_P == true | |||
2249 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false | |||
2250 | LOOP_VINFO_PEELING_FOR_NITER == false | |||
2251 | ||||
2252 | (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop | |||
2253 | to handle the remaining scalar iterations. In this case: | |||
2254 | ||||
2255 | LOOP_VINFO_USING_PARTIAL_VECTORS_P == false | |||
2256 | LOOP_VINFO_PEELING_FOR_NITER == true | |||
2257 | ||||
2258 | There are two choices: | |||
2259 | ||||
2260 | (2a) Consider vectorizing the epilogue loop at the same VF as the | |||
2261 | main loop, but using partial vectors instead of full vectors. | |||
2262 | In this case: | |||
2263 | ||||
2264 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true | |||
2265 | ||||
2266 | (2b) Consider vectorizing the epilogue loop at lower VFs only. | |||
2267 | In this case: | |||
2268 | ||||
2269 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false | |||
2270 | ||||
2271 | When FOR_EPILOGUE_P is true, make this determination based on the | |||
2272 | assumption that LOOP_VINFO is an epilogue loop, otherwise make it | |||
2273 | based on the assumption that LOOP_VINFO is the main loop. The caller | |||
2274 | has made sure that the number of iterations is set appropriately for | |||
2275 | this value of FOR_EPILOGUE_P. */ | |||
2276 | ||||
2277 | opt_result | |||
2278 | vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo, | |||
2279 | bool for_epilogue_p) | |||
2280 | { | |||
2281 | /* Determine whether there would be any scalar iterations left over. */ | |||
2282 | bool need_peeling_or_partial_vectors_p | |||
2283 | = vect_need_peeling_or_partial_vectors_p (loop_vinfo); | |||
2284 | ||||
2285 | /* Decide whether to vectorize the loop with partial vectors. */ | |||
2286 | LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = false; | |||
2287 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = false; | |||
2288 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | |||
2289 | && need_peeling_or_partial_vectors_p) | |||
2290 | { | |||
2291 | /* For partial-vector-usage=1, try to push the handling of partial | |||
2292 | vectors to the epilogue, with the main loop continuing to operate | |||
2293 | on full vectors. | |||
2294 | ||||
2295 | If we are unrolling we also do not want to use partial vectors. This | |||
2296 | is to avoid the overhead of generating multiple masks and also to | |||
2297 | avoid having to execute entire iterations of FALSE masked instructions | |||
2298 | when dealing with one or less full iterations. | |||
2299 | ||||
2300 | ??? We could then end up failing to use partial vectors if we | |||
2301 | decide to peel iterations into a prologue, and if the main loop | |||
2302 | then ends up processing fewer than VF iterations. */ | |||
2303 | if ((param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage == 1 | |||
2304 | || loop_vinfo->suggested_unroll_factor > 1) | |||
2305 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) | |||
2306 | && !vect_known_niters_smaller_than_vf (loop_vinfo)) | |||
2307 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = true; | |||
2308 | else | |||
2309 | LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = true; | |||
2310 | } | |||
2311 | ||||
2312 | if (dump_enabled_p ()) | |||
2313 | { | |||
2314 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
2315 | dump_printf_loc (MSG_NOTE, vect_location, | |||
2316 | "operating on partial vectors%s.\n", | |||
2317 | for_epilogue_p ? " for epilogue loop" : ""); | |||
2318 | else | |||
2319 | dump_printf_loc (MSG_NOTE, vect_location, | |||
2320 | "operating only on full vectors%s.\n", | |||
2321 | for_epilogue_p ? " for epilogue loop" : ""); | |||
2322 | } | |||
2323 | ||||
2324 | if (for_epilogue_p) | |||
2325 | { | |||
2326 | loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; | |||
2327 | gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2327, __FUNCTION__), 0 : 0)); | |||
2328 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
2329 | gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor , (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2330, __FUNCTION__), 0 : 0)) | |||
2330 | LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor , (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2330, __FUNCTION__), 0 : 0)); | |||
2331 | } | |||
2332 | ||||
2333 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) | |||
2334 | && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
2335 | { | |||
2336 | /* Check that the loop processes at least one full vector. */ | |||
2337 | poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | |||
2338 | tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters; | |||
2339 | if (known_lt (wi::to_widest (scalar_niters), vf)(!maybe_le (vf, wi::to_widest (scalar_niters)))) | |||
2340 | return opt_result::failure_at (vect_location, | |||
2341 | "loop does not have enough iterations" | |||
2342 | " to support vectorization.\n"); | |||
2343 | ||||
2344 | /* If we need to peel an extra epilogue iteration to handle data | |||
2345 | accesses with gaps, check that there are enough scalar iterations | |||
2346 | available. | |||
2347 | ||||
2348 | The check above is redundant with this one when peeling for gaps, | |||
2349 | but the distinction is useful for diagnostics. */ | |||
2350 | tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1; | |||
2351 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps | |||
2352 | && known_lt (wi::to_widest (scalar_nitersm1), vf)(!maybe_le (vf, wi::to_widest (scalar_nitersm1)))) | |||
2353 | return opt_result::failure_at (vect_location, | |||
2354 | "loop does not have enough iterations" | |||
2355 | " to support peeling for gaps.\n"); | |||
2356 | } | |||
2357 | ||||
2358 | LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter | |||
2359 | = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p | |||
2360 | && need_peeling_or_partial_vectors_p); | |||
2361 | ||||
2362 | return opt_result::success (); | |||
2363 | } | |||
2364 | ||||
2365 | /* Function vect_analyze_loop_2. | |||
2366 | ||||
2367 | Apply a set of analyses on LOOP specified by LOOP_VINFO, the different | |||
2368 | analyses will record information in some members of LOOP_VINFO. FATAL | |||
2369 | indicates if some analysis meets fatal error. If one non-NULL pointer | |||
2370 | SUGGESTED_UNROLL_FACTOR is provided, it's intent to be filled with one | |||
2371 | worked out suggested unroll factor, while one NULL pointer shows it's | |||
2372 | going to apply the suggested unroll factor. SLP_DONE_FOR_SUGGESTED_UF | |||
2373 | is to hold the slp decision when the suggested unroll factor is worked | |||
2374 | out. */ | |||
2375 | static opt_result | |||
2376 | vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, | |||
2377 | unsigned *suggested_unroll_factor, | |||
2378 | bool& slp_done_for_suggested_uf) | |||
2379 | { | |||
2380 | opt_result ok = opt_result::success (); | |||
2381 | int res; | |||
2382 | unsigned int max_vf = MAX_VECTORIZATION_FACTOR2147483647; | |||
2383 | poly_uint64 min_vf = 2; | |||
2384 | loop_vec_info orig_loop_vinfo = NULLnullptr; | |||
2385 | ||||
2386 | /* If we are dealing with an epilogue then orig_loop_vinfo points to the | |||
2387 | loop_vec_info of the first vectorized loop. */ | |||
2388 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) | |||
2389 | orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; | |||
2390 | else | |||
2391 | orig_loop_vinfo = loop_vinfo; | |||
2392 | gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2392, __FUNCTION__), 0 : 0)); | |||
2393 | ||||
2394 | /* The first group of checks is independent of the vector size. */ | |||
2395 | fatal = true; | |||
2396 | ||||
2397 | if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond | |||
2398 | && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond)) | |||
2399 | return opt_result::failure_at (vect_location, | |||
2400 | "not vectorized: simd if(0)\n"); | |||
2401 | ||||
2402 | /* Find all data references in the loop (which correspond to vdefs/vuses) | |||
2403 | and analyze their evolution in the loop. */ | |||
2404 | ||||
2405 | loop_p loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
2406 | ||||
2407 | /* Gather the data references and count stmts in the loop. */ | |||
2408 | if (!LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.exists ()) | |||
2409 | { | |||
2410 | opt_result res | |||
2411 | = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs, | |||
2412 | &LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs, | |||
2413 | &LOOP_VINFO_N_STMTS (loop_vinfo)(loop_vinfo)->shared->n_stmts); | |||
2414 | if (!res) | |||
2415 | { | |||
2416 | if (dump_enabled_p ()) | |||
2417 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2418 | "not vectorized: loop contains function " | |||
2419 | "calls or data references that cannot " | |||
2420 | "be analyzed\n"); | |||
2421 | return res; | |||
2422 | } | |||
2423 | loop_vinfo->shared->save_datarefs (); | |||
2424 | } | |||
2425 | else | |||
2426 | loop_vinfo->shared->check_datarefs (); | |||
2427 | ||||
2428 | /* Analyze the data references and also adjust the minimal | |||
2429 | vectorization factor according to the loads and stores. */ | |||
2430 | ||||
2431 | ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal); | |||
2432 | if (!ok) | |||
2433 | { | |||
2434 | if (dump_enabled_p ()) | |||
2435 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2436 | "bad data references.\n"); | |||
2437 | return ok; | |||
2438 | } | |||
2439 | ||||
2440 | /* Check if we are applying unroll factor now. */ | |||
2441 | bool applying_suggested_uf = loop_vinfo->suggested_unroll_factor > 1; | |||
2442 | gcc_assert (!applying_suggested_uf || !suggested_unroll_factor)((void)(!(!applying_suggested_uf || !suggested_unroll_factor) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2442, __FUNCTION__), 0 : 0)); | |||
2443 | ||||
2444 | /* If the slp decision is false when suggested unroll factor is worked | |||
2445 | out, and we are applying suggested unroll factor, we can simply skip | |||
2446 | all slp related analyses this time. */ | |||
2447 | bool slp = !applying_suggested_uf || slp_done_for_suggested_uf; | |||
2448 | ||||
2449 | /* Classify all cross-iteration scalar data-flow cycles. | |||
2450 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ | |||
2451 | vect_analyze_scalar_cycles (loop_vinfo, slp); | |||
2452 | ||||
2453 | vect_pattern_recog (loop_vinfo); | |||
2454 | ||||
2455 | vect_fixup_scalar_cycles_with_patterns (loop_vinfo); | |||
2456 | ||||
2457 | /* Analyze the access patterns of the data-refs in the loop (consecutive, | |||
2458 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ | |||
2459 | ||||
2460 | ok = vect_analyze_data_ref_accesses (loop_vinfo, NULLnullptr); | |||
2461 | if (!ok) | |||
2462 | { | |||
2463 | if (dump_enabled_p ()) | |||
2464 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2465 | "bad data access.\n"); | |||
2466 | return ok; | |||
2467 | } | |||
2468 | ||||
2469 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ | |||
2470 | ||||
2471 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal); | |||
2472 | if (!ok) | |||
2473 | { | |||
2474 | if (dump_enabled_p ()) | |||
2475 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2476 | "unexpected pattern.\n"); | |||
2477 | return ok; | |||
2478 | } | |||
2479 | ||||
2480 | /* While the rest of the analysis below depends on it in some way. */ | |||
2481 | fatal = false; | |||
2482 | ||||
2483 | /* Analyze data dependences between the data-refs in the loop | |||
2484 | and adjust the maximum vectorization factor according to | |||
2485 | the dependences. | |||
2486 | FORNOW: fail at the first data dependence that we encounter. */ | |||
2487 | ||||
2488 | ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf); | |||
2489 | if (!ok) | |||
2490 | { | |||
2491 | if (dump_enabled_p ()) | |||
2492 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2493 | "bad data dependence.\n"); | |||
2494 | return ok; | |||
2495 | } | |||
2496 | if (max_vf != MAX_VECTORIZATION_FACTOR2147483647 | |||
2497 | && maybe_lt (max_vf, min_vf)) | |||
2498 | return opt_result::failure_at (vect_location, "bad data dependence.\n"); | |||
2499 | LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor = max_vf; | |||
2500 | ||||
2501 | ok = vect_determine_vectorization_factor (loop_vinfo); | |||
2502 | if (!ok) | |||
2503 | { | |||
2504 | if (dump_enabled_p ()) | |||
2505 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2506 | "can't determine vectorization factor.\n"); | |||
2507 | return ok; | |||
2508 | } | |||
2509 | if (max_vf != MAX_VECTORIZATION_FACTOR2147483647 | |||
2510 | && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor)) | |||
2511 | return opt_result::failure_at (vect_location, "bad data dependence.\n"); | |||
2512 | ||||
2513 | /* Compute the scalar iteration cost. */ | |||
2514 | vect_compute_single_scalar_iteration_cost (loop_vinfo); | |||
2515 | ||||
2516 | poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | |||
2517 | ||||
2518 | if (slp) | |||
2519 | { | |||
2520 | /* Check the SLP opportunities in the loop, analyze and build | |||
2521 | SLP trees. */ | |||
2522 | ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo)(loop_vinfo)->shared->n_stmts); | |||
2523 | if (!ok) | |||
2524 | return ok; | |||
2525 | ||||
2526 | /* If there are any SLP instances mark them as pure_slp. */ | |||
2527 | slp = vect_make_slp_decision (loop_vinfo); | |||
2528 | if (slp) | |||
2529 | { | |||
2530 | /* Find stmts that need to be both vectorized and SLPed. */ | |||
2531 | vect_detect_hybrid_slp (loop_vinfo); | |||
2532 | ||||
2533 | /* Update the vectorization factor based on the SLP decision. */ | |||
2534 | vect_update_vf_for_slp (loop_vinfo); | |||
2535 | ||||
2536 | /* Optimize the SLP graph with the vectorization factor fixed. */ | |||
2537 | vect_optimize_slp (loop_vinfo); | |||
2538 | ||||
2539 | /* Gather the loads reachable from the SLP graph entries. */ | |||
2540 | vect_gather_slp_loads (loop_vinfo); | |||
2541 | } | |||
2542 | } | |||
2543 | ||||
2544 | bool saved_can_use_partial_vectors_p | |||
2545 | = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p; | |||
2546 | ||||
2547 | /* We don't expect to have to roll back to anything other than an empty | |||
2548 | set of rgroups. */ | |||
2549 | gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())((void)(!((loop_vinfo)->masks.is_empty ()) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2549, __FUNCTION__), 0 : 0)); | |||
2550 | ||||
2551 | /* This is the point where we can re-start analysis with SLP forced off. */ | |||
2552 | start_over: | |||
2553 | ||||
2554 | /* Apply the suggested unrolling factor, this was determined by the backend | |||
2555 | during finish_cost the first time we ran the analyzis for this | |||
2556 | vector mode. */ | |||
2557 | if (applying_suggested_uf) | |||
2558 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor *= loop_vinfo->suggested_unroll_factor; | |||
2559 | ||||
2560 | /* Now the vectorization factor is final. */ | |||
2561 | poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | |||
2562 | gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2562, __FUNCTION__), 0 : 0)); | |||
2563 | ||||
2564 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && dump_enabled_p ()) | |||
2565 | { | |||
2566 | dump_printf_loc (MSG_NOTE, vect_location, | |||
2567 | "vectorization_factor = "); | |||
2568 | dump_dec (MSG_NOTE, vectorization_factor); | |||
2569 | dump_printf (MSG_NOTE, ", niters = %wd\n", | |||
2570 | LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2570, __FUNCTION__))))); | |||
2571 | } | |||
2572 | ||||
2573 | loop_vinfo->vector_costs = init_cost (loop_vinfo, false); | |||
2574 | ||||
2575 | /* Analyze the alignment of the data-refs in the loop. | |||
2576 | Fail if a data reference is found that cannot be vectorized. */ | |||
2577 | ||||
2578 | ok = vect_analyze_data_refs_alignment (loop_vinfo); | |||
2579 | if (!ok) | |||
2580 | { | |||
2581 | if (dump_enabled_p ()) | |||
2582 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2583 | "bad data alignment.\n"); | |||
2584 | return ok; | |||
2585 | } | |||
2586 | ||||
2587 | /* Prune the list of ddrs to be tested at run-time by versioning for alias. | |||
2588 | It is important to call pruning after vect_analyze_data_ref_accesses, | |||
2589 | since we use grouping information gathered by interleaving analysis. */ | |||
2590 | ok = vect_prune_runtime_alias_test_list (loop_vinfo); | |||
2591 | if (!ok) | |||
2592 | return ok; | |||
2593 | ||||
2594 | /* Do not invoke vect_enhance_data_refs_alignment for epilogue | |||
2595 | vectorization, since we do not want to add extra peeling or | |||
2596 | add versioning for alignment. */ | |||
2597 | if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) | |||
2598 | /* This pass will decide on using loop versioning and/or loop peeling in | |||
2599 | order to enhance the alignment of data references in the loop. */ | |||
2600 | ok = vect_enhance_data_refs_alignment (loop_vinfo); | |||
2601 | if (!ok) | |||
2602 | return ok; | |||
2603 | ||||
2604 | if (slp) | |||
2605 | { | |||
2606 | /* Analyze operations in the SLP instances. Note this may | |||
2607 | remove unsupported SLP instances which makes the above | |||
2608 | SLP kind detection invalid. */ | |||
2609 | unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length (); | |||
2610 | vect_slp_analyze_operations (loop_vinfo); | |||
2611 | if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length () != old_size) | |||
2612 | { | |||
2613 | ok = opt_result::failure_at (vect_location, | |||
2614 | "unsupported SLP instances\n"); | |||
2615 | goto again; | |||
2616 | } | |||
2617 | ||||
2618 | /* Check whether any load in ALL SLP instances is possibly permuted. */ | |||
2619 | slp_tree load_node, slp_root; | |||
2620 | unsigned i, x; | |||
2621 | slp_instance instance; | |||
2622 | bool can_use_lanes = true; | |||
2623 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)for (x = 0; ((loop_vinfo)->slp_instances).iterate ((x), & (instance)); ++(x)) | |||
2624 | { | |||
2625 | slp_root = SLP_INSTANCE_TREE (instance)(instance)->root; | |||
2626 | int group_size = SLP_TREE_LANES (slp_root)(slp_root)->lanes; | |||
2627 | tree vectype = SLP_TREE_VECTYPE (slp_root)(slp_root)->vectype; | |||
2628 | bool loads_permuted = false; | |||
2629 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node )); ++(i)) | |||
2630 | { | |||
2631 | if (!SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation.exists ()) | |||
2632 | continue; | |||
2633 | unsigned j; | |||
2634 | stmt_vec_info load_info; | |||
2635 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)for (j = 0; ((load_node)->stmts).iterate ((j), &(load_info )); ++(j)) | |||
2636 | if (SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation[j] != j) | |||
2637 | { | |||
2638 | loads_permuted = true; | |||
2639 | break; | |||
2640 | } | |||
2641 | } | |||
2642 | ||||
2643 | /* If the loads and stores can be handled with load/store-lane | |||
2644 | instructions record it and move on to the next instance. */ | |||
2645 | if (loads_permuted | |||
2646 | && SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store | |||
2647 | && vect_store_lanes_supported (vectype, group_size, false)) | |||
2648 | { | |||
2649 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node )); ++(i)) | |||
2650 | { | |||
2651 | stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2652, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])-> first_element) | |||
2652 | (SLP_TREE_SCALAR_STMTS (load_node)[0])(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2652, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])-> first_element); | |||
2653 | /* Use SLP for strided accesses (or if we can't | |||
2654 | load-lanes). */ | |||
2655 | if (STMT_VINFO_STRIDED_P (stmt_vinfo)(stmt_vinfo)->strided_p | |||
2656 | || ! vect_load_lanes_supported | |||
2657 | (STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype, | |||
2658 | DR_GROUP_SIZE (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2658, __FUNCTION__), 0 : 0)), (stmt_vinfo)->size), false)) | |||
2659 | break; | |||
2660 | } | |||
2661 | ||||
2662 | can_use_lanes | |||
2663 | = can_use_lanes && i == SLP_INSTANCE_LOADS (instance)(instance)->loads.length (); | |||
2664 | ||||
2665 | if (can_use_lanes && dump_enabled_p ()) | |||
2666 | dump_printf_loc (MSG_NOTE, vect_location, | |||
2667 | "SLP instance %p can use load/store-lanes\n", | |||
2668 | (void *) instance); | |||
2669 | } | |||
2670 | else | |||
2671 | { | |||
2672 | can_use_lanes = false; | |||
2673 | break; | |||
2674 | } | |||
2675 | } | |||
2676 | ||||
2677 | /* If all SLP instances can use load/store-lanes abort SLP and try again | |||
2678 | with SLP disabled. */ | |||
2679 | if (can_use_lanes) | |||
2680 | { | |||
2681 | ok = opt_result::failure_at (vect_location, | |||
2682 | "Built SLP cancelled: can use " | |||
2683 | "load/store-lanes\n"); | |||
2684 | if (dump_enabled_p ()) | |||
2685 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2686 | "Built SLP cancelled: all SLP instances support " | |||
2687 | "load/store-lanes\n"); | |||
2688 | goto again; | |||
2689 | } | |||
2690 | } | |||
2691 | ||||
2692 | /* Dissolve SLP-only groups. */ | |||
2693 | vect_dissolve_slp_only_groups (loop_vinfo); | |||
2694 | ||||
2695 | /* Scan all the remaining operations in the loop that are not subject | |||
2696 | to SLP and make sure they are vectorizable. */ | |||
2697 | ok = vect_analyze_loop_operations (loop_vinfo); | |||
2698 | if (!ok) | |||
2699 | { | |||
2700 | if (dump_enabled_p ()) | |||
2701 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2702 | "bad operation or unsupported loop bound.\n"); | |||
2703 | return ok; | |||
2704 | } | |||
2705 | ||||
2706 | /* For now, we don't expect to mix both masking and length approaches for one | |||
2707 | loop, disable it if both are recorded. */ | |||
2708 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | |||
2709 | && !LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty () | |||
2710 | && !LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ()) | |||
2711 | { | |||
2712 | if (dump_enabled_p ()) | |||
2713 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
2714 | "can't vectorize a loop with partial vectors" | |||
2715 | " because we don't expect to mix different" | |||
2716 | " approaches with partial vectors for the" | |||
2717 | " same loop.\n"); | |||
2718 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false; | |||
2719 | } | |||
2720 | ||||
2721 | /* If we still have the option of using partial vectors, | |||
2722 | check whether we can generate the necessary loop controls. */ | |||
2723 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | |||
2724 | && !vect_verify_full_masking (loop_vinfo) | |||
2725 | && !vect_verify_loop_lens (loop_vinfo)) | |||
2726 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false; | |||
2727 | ||||
2728 | /* If we're vectorizing an epilogue loop, the vectorized loop either needs | |||
2729 | to be able to handle fewer than VF scalars, or needs to have a lower VF | |||
2730 | than the main loop. */ | |||
2731 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) | |||
2732 | && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | |||
2733 | && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo )->vectorization_factor) | |||
2734 | LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo )->vectorization_factor)) | |||
2735 | return opt_result::failure_at (vect_location, | |||
2736 | "Vectorization factor too high for" | |||
2737 | " epilogue loop.\n"); | |||
2738 | ||||
2739 | /* Decide whether this loop_vinfo should use partial vectors or peeling, | |||
2740 | assuming that the loop will be used as a main loop. We will redo | |||
2741 | this analysis later if we instead decide to use the loop as an | |||
2742 | epilogue loop. */ | |||
2743 | ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false); | |||
2744 | if (!ok) | |||
2745 | return ok; | |||
2746 | ||||
2747 | /* Check the costings of the loop make vectorizing worthwhile. */ | |||
2748 | res = vect_analyze_loop_costing (loop_vinfo, suggested_unroll_factor); | |||
2749 | if (res < 0) | |||
2750 | { | |||
2751 | ok = opt_result::failure_at (vect_location, | |||
2752 | "Loop costings may not be worthwhile.\n"); | |||
2753 | goto again; | |||
2754 | } | |||
2755 | if (!res) | |||
2756 | return opt_result::failure_at (vect_location, | |||
2757 | "Loop costings not worthwhile.\n"); | |||
2758 | ||||
2759 | /* If an epilogue loop is required make sure we can create one. */ | |||
2760 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps | |||
2761 | || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter) | |||
2762 | { | |||
2763 | if (dump_enabled_p ()) | |||
2764 | dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n"); | |||
2765 | if (!vect_can_advance_ivs_p (loop_vinfo) | |||
2766 | || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, | |||
2767 | single_exit (LOOP_VINFO_LOOP(loop_vinfo)->loop | |||
2768 | (loop_vinfo)(loop_vinfo)->loop))) | |||
2769 | { | |||
2770 | ok = opt_result::failure_at (vect_location, | |||
2771 | "not vectorized: can't create required " | |||
2772 | "epilog loop\n"); | |||
2773 | goto again; | |||
2774 | } | |||
2775 | } | |||
2776 | ||||
2777 | /* During peeling, we need to check if number of loop iterations is | |||
2778 | enough for both peeled prolog loop and vector loop. This check | |||
2779 | can be merged along with threshold check of loop versioning, so | |||
2780 | increase threshold for this case if necessary. | |||
2781 | ||||
2782 | If we are analyzing an epilogue we still want to check what its | |||
2783 | versioning threshold would be. If we decide to vectorize the epilogues we | |||
2784 | will want to use the lowest versioning threshold of all epilogues and main | |||
2785 | loop. This will enable us to enter a vectorized epilogue even when | |||
2786 | versioning the loop. We can't simply check whether the epilogue requires | |||
2787 | versioning though since we may have skipped some versioning checks when | |||
2788 | analyzing the epilogue. For instance, checks for alias versioning will be | |||
2789 | skipped when dealing with epilogues as we assume we already checked them | |||
2790 | for the main loop. So instead we always check the 'orig_loop_vinfo'. */ | |||
2791 | if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo)(((orig_loop_vinfo)->may_misalign_stmts.length () > 0) || ((orig_loop_vinfo)->comp_alias_ddrs.length () > 0 || ( orig_loop_vinfo)->check_unequal_addrs.length () > 0 || ( orig_loop_vinfo)->lower_bounds.length () > 0) || ((orig_loop_vinfo )->num_iters_assumptions) || ((orig_loop_vinfo)->simd_if_cond ))) | |||
2792 | { | |||
2793 | poly_uint64 niters_th = 0; | |||
2794 | unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th; | |||
2795 | ||||
2796 | if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) | |||
2797 | { | |||
2798 | /* Niters for peeled prolog loop. */ | |||
2799 | if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0) | |||
2800 | { | |||
2801 | dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo)(loop_vinfo)->unaligned_dr; | |||
2802 | tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype; | |||
2803 | niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1; | |||
2804 | } | |||
2805 | else | |||
2806 | niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; | |||
2807 | } | |||
2808 | ||||
2809 | /* Niters for at least one iteration of vectorized loop. */ | |||
2810 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
2811 | niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | |||
2812 | /* One additional iteration because of peeling for gap. */ | |||
2813 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps) | |||
2814 | niters_th += 1; | |||
2815 | ||||
2816 | /* Use the same condition as vect_transform_loop to decide when to use | |||
2817 | the cost to determine a versioning threshold. */ | |||
2818 | if (vect_apply_runtime_profitability_check_p (loop_vinfo) | |||
2819 | && ordered_p (th, niters_th)) | |||
2820 | niters_th = ordered_max (poly_uint64 (th), niters_th); | |||
2821 | ||||
2822 | LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = niters_th; | |||
2823 | } | |||
2824 | ||||
2825 | gcc_assert (known_eq (vectorization_factor,((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)-> vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2826, __FUNCTION__), 0 : 0)) | |||
2826 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)))((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)-> vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2826, __FUNCTION__), 0 : 0)); | |||
2827 | ||||
2828 | slp_done_for_suggested_uf = slp; | |||
2829 | ||||
2830 | /* Ok to vectorize! */ | |||
2831 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)(loop_vinfo)->vectorizable = 1; | |||
2832 | return opt_result::success (); | |||
2833 | ||||
2834 | again: | |||
2835 | /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */ | |||
2836 | gcc_assert (!ok)((void)(!(!ok) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2836, __FUNCTION__), 0 : 0)); | |||
2837 | ||||
2838 | /* Try again with SLP forced off but if we didn't do any SLP there is | |||
2839 | no point in re-trying. */ | |||
2840 | if (!slp) | |||
2841 | return ok; | |||
2842 | ||||
2843 | /* If the slp decision is true when suggested unroll factor is worked | |||
2844 | out, and we are applying suggested unroll factor, we don't need to | |||
2845 | re-try any more. */ | |||
2846 | if (applying_suggested_uf && slp_done_for_suggested_uf) | |||
2847 | return ok; | |||
2848 | ||||
2849 | /* If there are reduction chains re-trying will fail anyway. */ | |||
2850 | if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.is_empty ()) | |||
2851 | return ok; | |||
2852 | ||||
2853 | /* Likewise if the grouped loads or stores in the SLP cannot be handled | |||
2854 | via interleaving or lane instructions. */ | |||
2855 | slp_instance instance; | |||
2856 | slp_tree node; | |||
2857 | unsigned i, j; | |||
2858 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)for (i = 0; ((loop_vinfo)->slp_instances).iterate ((i), & (instance)); ++(i)) | |||
2859 | { | |||
2860 | stmt_vec_info vinfo; | |||
2861 | vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))((instance)->root)->stmts[0]; | |||
2862 | if (! STMT_VINFO_GROUPED_ACCESS (vinfo)((vinfo)->dr_aux.dr && (((void)(!((vinfo)->dr_aux .dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2862, __FUNCTION__), 0 : 0)), (vinfo)->first_element))) | |||
2863 | continue; | |||
2864 | vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2864, __FUNCTION__), 0 : 0)), (vinfo)->first_element); | |||
2865 | unsigned int size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2865, __FUNCTION__), 0 : 0)), (vinfo)->size); | |||
2866 | tree vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype; | |||
2867 | if (! vect_store_lanes_supported (vectype, size, false) | |||
2868 | && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), 1U)) | |||
2869 | && ! vect_grouped_store_supported (vectype, size)) | |||
2870 | return opt_result::failure_at (vinfo->stmt, | |||
2871 | "unsupported grouped store\n"); | |||
2872 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)for (j = 0; ((instance)->loads).iterate ((j), &(node)) ; ++(j)) | |||
2873 | { | |||
2874 | vinfo = SLP_TREE_SCALAR_STMTS (node)(node)->stmts[0]; | |||
2875 | vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2875, __FUNCTION__), 0 : 0)), (vinfo)->first_element); | |||
2876 | bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2876, __FUNCTION__), 0 : 0)), (vinfo)->next_element); | |||
2877 | size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2877, __FUNCTION__), 0 : 0)), (vinfo)->size); | |||
2878 | vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype; | |||
2879 | if (! vect_load_lanes_supported (vectype, size, false) | |||
2880 | && ! vect_grouped_load_supported (vectype, single_element_p, | |||
2881 | size)) | |||
2882 | return opt_result::failure_at (vinfo->stmt, | |||
2883 | "unsupported grouped load\n"); | |||
2884 | } | |||
2885 | } | |||
2886 | ||||
2887 | if (dump_enabled_p ()) | |||
2888 | dump_printf_loc (MSG_NOTE, vect_location, | |||
2889 | "re-trying with SLP disabled\n"); | |||
2890 | ||||
2891 | /* Roll back state appropriately. No SLP this time. */ | |||
2892 | slp = false; | |||
2893 | /* Restore vectorization factor as it were without SLP. */ | |||
2894 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = saved_vectorization_factor; | |||
2895 | /* Free the SLP instances. */ | |||
2896 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)for (j = 0; ((loop_vinfo)->slp_instances).iterate ((j), & (instance)); ++(j)) | |||
2897 | vect_free_slp_instance (instance); | |||
2898 | LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.release (); | |||
2899 | /* Reset SLP type to loop_vect on all stmts. */ | |||
2900 | for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->num_nodes; ++i) | |||
2901 | { | |||
2902 | basic_block bb = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs[i]; | |||
2903 | for (gimple_stmt_iterator si = gsi_start_phis (bb); | |||
2904 | !gsi_end_p (si); gsi_next (&si)) | |||
2905 | { | |||
2906 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); | |||
2907 | STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect; | |||
2908 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def | |||
2909 | || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) | |||
2910 | { | |||
2911 | /* vectorizable_reduction adjusts reduction stmt def-types, | |||
2912 | restore them to that of the PHI. */ | |||
2913 | STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))((stmt_info)->reduc_def)->def_type | |||
2914 | = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type; | |||
2915 | STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type | |||
2916 | (STMT_VINFO_REDUC_DEF (stmt_info)))(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type | |||
2917 | = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type; | |||
2918 | } | |||
2919 | } | |||
2920 | for (gimple_stmt_iterator si = gsi_start_bb (bb); | |||
2921 | !gsi_end_p (si); gsi_next (&si)) | |||
2922 | { | |||
2923 | if (is_gimple_debug (gsi_stmt (si))) | |||
2924 | continue; | |||
2925 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); | |||
2926 | STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect; | |||
2927 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p) | |||
2928 | { | |||
2929 | stmt_vec_info pattern_stmt_info | |||
2930 | = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | |||
2931 | if (STMT_VINFO_SLP_VECT_ONLY_PATTERN (pattern_stmt_info)(pattern_stmt_info)->slp_vect_pattern_only_p) | |||
2932 | STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p = false; | |||
2933 | ||||
2934 | gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq; | |||
2935 | STMT_SLP_TYPE (pattern_stmt_info)(pattern_stmt_info)->slp_type = loop_vect; | |||
2936 | for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq); | |||
2937 | !gsi_end_p (pi); gsi_next (&pi)) | |||
2938 | STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))(loop_vinfo->lookup_stmt (gsi_stmt (pi)))->slp_type | |||
2939 | = loop_vect; | |||
2940 | } | |||
2941 | } | |||
2942 | } | |||
2943 | /* Free optimized alias test DDRS. */ | |||
2944 | LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.truncate (0); | |||
2945 | LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.release (); | |||
2946 | LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.release (); | |||
2947 | /* Reset target cost data. */ | |||
2948 | delete loop_vinfo->vector_costs; | |||
2949 | loop_vinfo->vector_costs = nullptr; | |||
2950 | /* Reset accumulated rgroup information. */ | |||
2951 | release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks); | |||
2952 | release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens); | |||
2953 | /* Reset assorted flags. */ | |||
2954 | LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter = false; | |||
2955 | LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps = false; | |||
2956 | LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = 0; | |||
2957 | LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = 0; | |||
2958 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | |||
2959 | = saved_can_use_partial_vectors_p; | |||
2960 | ||||
2961 | goto start_over; | |||
2962 | } | |||
2963 | ||||
2964 | /* Return true if vectorizing a loop using NEW_LOOP_VINFO appears | |||
2965 | to be better than vectorizing it using OLD_LOOP_VINFO. Assume that | |||
2966 | OLD_LOOP_VINFO is better unless something specifically indicates | |||
2967 | otherwise. | |||
2968 | ||||
2969 | Note that this deliberately isn't a partial order. */ | |||
2970 | ||||
2971 | static bool | |||
2972 | vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo, | |||
2973 | loop_vec_info old_loop_vinfo) | |||
2974 | { | |||
2975 | struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo)(new_loop_vinfo)->loop; | |||
2976 | gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop)((void)(!((old_loop_vinfo)->loop == loop) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 2976, __FUNCTION__), 0 : 0)); | |||
2977 | ||||
2978 | poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo)(new_loop_vinfo)->vectorization_factor; | |||
2979 | poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo)(old_loop_vinfo)->vectorization_factor; | |||
2980 | ||||
2981 | /* Always prefer a VF of loop->simdlen over any other VF. */ | |||
2982 | if (loop->simdlen) | |||
2983 | { | |||
2984 | bool new_simdlen_p = known_eq (new_vf, loop->simdlen)(!maybe_ne (new_vf, loop->simdlen)); | |||
2985 | bool old_simdlen_p = known_eq (old_vf, loop->simdlen)(!maybe_ne (old_vf, loop->simdlen)); | |||
2986 | if (new_simdlen_p != old_simdlen_p) | |||
2987 | return new_simdlen_p; | |||
2988 | } | |||
2989 | ||||
2990 | const auto *old_costs = old_loop_vinfo->vector_costs; | |||
2991 | const auto *new_costs = new_loop_vinfo->vector_costs; | |||
2992 | if (loop_vec_info main_loop = LOOP_VINFO_ORIG_LOOP_INFO (old_loop_vinfo)(old_loop_vinfo)->orig_loop_info) | |||
2993 | return new_costs->better_epilogue_loop_than_p (old_costs, main_loop); | |||
2994 | ||||
2995 | return new_costs->better_main_loop_than_p (old_costs); | |||
2996 | } | |||
2997 | ||||
2998 | /* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return | |||
2999 | true if we should. */ | |||
3000 | ||||
3001 | static bool | |||
3002 | vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, | |||
3003 | loop_vec_info old_loop_vinfo) | |||
3004 | { | |||
3005 | if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo)) | |||
3006 | return false; | |||
3007 | ||||
3008 | if (dump_enabled_p ()) | |||
3009 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3010 | "***** Preferring vector mode %s to vector mode %s\n", | |||
3011 | GET_MODE_NAME (new_loop_vinfo->vector_mode)mode_name[new_loop_vinfo->vector_mode], | |||
3012 | GET_MODE_NAME (old_loop_vinfo->vector_mode)mode_name[old_loop_vinfo->vector_mode]); | |||
3013 | return true; | |||
3014 | } | |||
3015 | ||||
3016 | /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is | |||
3017 | not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance | |||
3018 | MODE_I to the next mode useful to analyze. | |||
3019 | Return the loop_vinfo on success and wrapped null on failure. */ | |||
3020 | ||||
3021 | static opt_loop_vec_info | |||
3022 | vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, | |||
3023 | const vect_loop_form_info *loop_form_info, | |||
3024 | loop_vec_info main_loop_vinfo, | |||
3025 | const vector_modes &vector_modes, unsigned &mode_i, | |||
3026 | machine_mode &autodetected_vector_mode, | |||
3027 | bool &fatal) | |||
3028 | { | |||
3029 | loop_vec_info loop_vinfo | |||
3030 | = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); | |||
3031 | ||||
3032 | machine_mode vector_mode = vector_modes[mode_i]; | |||
3033 | loop_vinfo->vector_mode = vector_mode; | |||
3034 | unsigned int suggested_unroll_factor = 1; | |||
3035 | bool slp_done_for_suggested_uf; | |||
3036 | ||||
3037 | /* Run the main analysis. */ | |||
3038 | opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, | |||
3039 | &suggested_unroll_factor, | |||
3040 | slp_done_for_suggested_uf); | |||
3041 | if (dump_enabled_p ()) | |||
3042 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3043 | "***** Analysis %s with vector mode %s\n", | |||
3044 | res ? "succeeded" : " failed", | |||
3045 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); | |||
3046 | ||||
3047 | if (!main_loop_vinfo && suggested_unroll_factor > 1) | |||
3048 | { | |||
3049 | if (dump_enabled_p ()) | |||
3050 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3051 | "***** Re-trying analysis for unrolling" | |||
3052 | " with unroll factor %d and slp %s.\n", | |||
3053 | suggested_unroll_factor, | |||
3054 | slp_done_for_suggested_uf ? "on" : "off"); | |||
3055 | loop_vec_info unroll_vinfo | |||
3056 | = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); | |||
3057 | unroll_vinfo->vector_mode = vector_mode; | |||
3058 | unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; | |||
3059 | opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULLnullptr, | |||
3060 | slp_done_for_suggested_uf); | |||
3061 | if (new_res) | |||
3062 | { | |||
3063 | delete loop_vinfo; | |||
3064 | loop_vinfo = unroll_vinfo; | |||
3065 | } | |||
3066 | else | |||
3067 | delete unroll_vinfo; | |||
3068 | } | |||
3069 | ||||
3070 | /* Remember the autodetected vector mode. */ | |||
3071 | if (vector_mode == VOIDmode((void) 0, E_VOIDmode)) | |||
3072 | autodetected_vector_mode = loop_vinfo->vector_mode; | |||
3073 | ||||
3074 | /* Advance mode_i, first skipping modes that would result in the | |||
3075 | same analysis result. */ | |||
3076 | while (mode_i + 1 < vector_modes.length () | |||
3077 | && vect_chooses_same_modes_p (loop_vinfo, | |||
3078 | vector_modes[mode_i + 1])) | |||
3079 | { | |||
3080 | if (dump_enabled_p ()) | |||
3081 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3082 | "***** The result for vector mode %s would" | |||
3083 | " be the same\n", | |||
3084 | GET_MODE_NAME (vector_modes[mode_i + 1])mode_name[vector_modes[mode_i + 1]]); | |||
3085 | mode_i += 1; | |||
3086 | } | |||
3087 | if (mode_i + 1 < vector_modes.length () | |||
3088 | && VECTOR_MODE_P (autodetected_vector_mode)(((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_BOOL || ((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_INT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_UACCUM) | |||
3089 | && (related_vector_mode (vector_modes[mode_i + 1], | |||
3090 | GET_MODE_INNER (autodetected_vector_mode)(mode_to_inner (autodetected_vector_mode))) | |||
3091 | == autodetected_vector_mode) | |||
3092 | && (related_vector_mode (autodetected_vector_mode, | |||
3093 | GET_MODE_INNER (vector_modes[mode_i + 1])(mode_to_inner (vector_modes[mode_i + 1]))) | |||
3094 | == vector_modes[mode_i + 1])) | |||
3095 | { | |||
3096 | if (dump_enabled_p ()) | |||
3097 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3098 | "***** Skipping vector mode %s, which would" | |||
3099 | " repeat the analysis for %s\n", | |||
3100 | GET_MODE_NAME (vector_modes[mode_i + 1])mode_name[vector_modes[mode_i + 1]], | |||
3101 | GET_MODE_NAME (autodetected_vector_mode)mode_name[autodetected_vector_mode]); | |||
3102 | mode_i += 1; | |||
3103 | } | |||
3104 | mode_i++; | |||
3105 | ||||
3106 | if (!res) | |||
3107 | { | |||
3108 | delete loop_vinfo; | |||
3109 | if (fatal) | |||
3110 | gcc_checking_assert (main_loop_vinfo == NULL)((void)(!(main_loop_vinfo == nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3110, __FUNCTION__), 0 : 0)); | |||
3111 | return opt_loop_vec_info::propagate_failure (res); | |||
3112 | } | |||
3113 | ||||
3114 | return opt_loop_vec_info::success (loop_vinfo); | |||
3115 | } | |||
3116 | ||||
3117 | /* Function vect_analyze_loop. | |||
3118 | ||||
3119 | Apply a set of analyses on LOOP, and create a loop_vec_info struct | |||
3120 | for it. The different analyses will record information in the | |||
3121 | loop_vec_info struct. */ | |||
3122 | opt_loop_vec_info | |||
3123 | vect_analyze_loop (class loop *loop, vec_info_shared *shared) | |||
3124 | { | |||
3125 | DUMP_VECT_SCOPE ("analyze_loop_nest")auto_dump_scope scope ("analyze_loop_nest", vect_location); | |||
3126 | ||||
3127 | if (loop_outer (loop) | |||
3128 | && loop_vec_info_for_loop (loop_outer (loop)) | |||
3129 | && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))(loop_vec_info_for_loop (loop_outer (loop)))->vectorizable) | |||
3130 | return opt_loop_vec_info::failure_at (vect_location, | |||
3131 | "outer-loop already vectorized.\n"); | |||
3132 | ||||
3133 | if (!find_loop_nest (loop, &shared->loop_nest)) | |||
| ||||
3134 | return opt_loop_vec_info::failure_at | |||
3135 | (vect_location, | |||
3136 | "not vectorized: loop nest containing two or more consecutive inner" | |||
3137 | " loops cannot be vectorized\n"); | |||
3138 | ||||
3139 | /* Analyze the loop form. */ | |||
3140 | vect_loop_form_info loop_form_info; | |||
3141 | opt_result res = vect_analyze_loop_form (loop, &loop_form_info); | |||
3142 | if (!res) | |||
3143 | { | |||
3144 | if (dump_enabled_p ()) | |||
3145 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
3146 | "bad loop form.\n"); | |||
3147 | return opt_loop_vec_info::propagate_failure (res); | |||
3148 | } | |||
3149 | if (!integer_onep (loop_form_info.assumptions)) | |||
| ||||
3150 | { | |||
3151 | /* We consider to vectorize this loop by versioning it under | |||
3152 | some assumptions. In order to do this, we need to clear | |||
3153 | existing information computed by scev and niter analyzer. */ | |||
3154 | scev_reset_htab (); | |||
3155 | free_numbers_of_iterations_estimates (loop); | |||
3156 | /* Also set flag for this loop so that following scev and niter | |||
3157 | analysis are done under the assumptions. */ | |||
3158 | loop_constraint_set (loop, LOOP_C_FINITE(1 << 1)); | |||
3159 | } | |||
3160 | ||||
3161 | auto_vector_modes vector_modes; | |||
3162 | /* Autodetect first vector size we try. */ | |||
3163 | vector_modes.safe_push (VOIDmode((void) 0, E_VOIDmode)); | |||
3164 | unsigned int autovec_flags | |||
3165 | = targetm.vectorize.autovectorize_vector_modes (&vector_modes, | |||
3166 | loop->simdlen != 0); | |||
3167 | bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS) | |||
3168 | && !unlimited_cost_model (loop)); | |||
3169 | machine_mode autodetected_vector_mode = VOIDmode((void) 0, E_VOIDmode); | |||
3170 | opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | |||
3171 | unsigned int mode_i = 0; | |||
3172 | unsigned HOST_WIDE_INTlong simdlen = loop->simdlen; | |||
3173 | ||||
3174 | /* Keep track of the VF for each mode. Initialize all to 0 which indicates | |||
3175 | a mode has not been analyzed. */ | |||
3176 | auto_vec<poly_uint64, 8> cached_vf_per_mode; | |||
3177 | for (unsigned i = 0; i < vector_modes.length (); ++i) | |||
3178 | cached_vf_per_mode.safe_push (0); | |||
3179 | ||||
3180 | /* First determine the main loop vectorization mode, either the first | |||
3181 | one that works, starting with auto-detecting the vector mode and then | |||
3182 | following the targets order of preference, or the one with the | |||
3183 | lowest cost if pick_lowest_cost_p. */ | |||
3184 | while (1) | |||
3185 | { | |||
3186 | bool fatal; | |||
3187 | unsigned int last_mode_i = mode_i; | |||
3188 | /* Set cached VF to -1 prior to analysis, which indicates a mode has | |||
3189 | failed. */ | |||
3190 | cached_vf_per_mode[last_mode_i] = -1; | |||
3191 | opt_loop_vec_info loop_vinfo | |||
3192 | = vect_analyze_loop_1 (loop, shared, &loop_form_info, | |||
3193 | NULLnullptr, vector_modes, mode_i, | |||
3194 | autodetected_vector_mode, fatal); | |||
3195 | if (fatal) | |||
3196 | break; | |||
3197 | ||||
3198 | if (loop_vinfo) | |||
3199 | { | |||
3200 | /* Analyzis has been successful so update the VF value. The | |||
3201 | VF should always be a multiple of unroll_factor and we want to | |||
3202 | capture the original VF here. */ | |||
3203 | cached_vf_per_mode[last_mode_i] | |||
3204 | = exact_div (LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor, | |||
3205 | loop_vinfo->suggested_unroll_factor); | |||
3206 | /* Once we hit the desired simdlen for the first time, | |||
3207 | discard any previous attempts. */ | |||
3208 | if (simdlen | |||
3209 | && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen)(!maybe_ne ((loop_vinfo)->vectorization_factor, simdlen))) | |||
3210 | { | |||
3211 | delete first_loop_vinfo; | |||
3212 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | |||
3213 | simdlen = 0; | |||
3214 | } | |||
3215 | else if (pick_lowest_cost_p | |||
3216 | && first_loop_vinfo | |||
3217 | && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo)) | |||
3218 | { | |||
3219 | /* Pick loop_vinfo over first_loop_vinfo. */ | |||
3220 | delete first_loop_vinfo; | |||
3221 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | |||
3222 | } | |||
3223 | if (first_loop_vinfo == NULLnullptr) | |||
3224 | first_loop_vinfo = loop_vinfo; | |||
3225 | else | |||
3226 | { | |||
3227 | delete loop_vinfo; | |||
3228 | loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | |||
3229 | } | |||
3230 | ||||
3231 | /* Commit to first_loop_vinfo if we have no reason to try | |||
3232 | alternatives. */ | |||
3233 | if (!simdlen && !pick_lowest_cost_p) | |||
3234 | break; | |||
3235 | } | |||
3236 | if (mode_i == vector_modes.length () | |||
3237 | || autodetected_vector_mode == VOIDmode((void) 0, E_VOIDmode)) | |||
3238 | break; | |||
3239 | ||||
3240 | /* Try the next biggest vector size. */ | |||
3241 | if (dump_enabled_p ()) | |||
3242 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3243 | "***** Re-trying analysis with vector mode %s\n", | |||
3244 | GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]); | |||
3245 | } | |||
3246 | if (!first_loop_vinfo) | |||
3247 | return opt_loop_vec_info::propagate_failure (res); | |||
3248 | ||||
3249 | if (dump_enabled_p ()) | |||
3250 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3251 | "***** Choosing vector mode %s\n", | |||
3252 | GET_MODE_NAME (first_loop_vinfo->vector_mode)mode_name[first_loop_vinfo->vector_mode]); | |||
3253 | ||||
3254 | /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is | |||
3255 | enabled, SIMDUID is not set, it is the innermost loop and we have | |||
3256 | either already found the loop's SIMDLEN or there was no SIMDLEN to | |||
3257 | begin with. | |||
3258 | TODO: Enable epilogue vectorization for loops with SIMDUID set. */ | |||
3259 | bool vect_epilogues = (!simdlen | |||
3260 | && loop->inner == NULLnullptr | |||
3261 | && param_vect_epilogues_nomaskglobal_options.x_param_vect_epilogues_nomask | |||
3262 | && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)(first_loop_vinfo)->peeling_for_niter | |||
3263 | && !loop->simduid); | |||
3264 | if (!vect_epilogues) | |||
3265 | return first_loop_vinfo; | |||
3266 | ||||
3267 | /* Now analyze first_loop_vinfo for epilogue vectorization. */ | |||
3268 | poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold; | |||
3269 | ||||
3270 | /* For epilogues start the analysis from the first mode. The motivation | |||
3271 | behind starting from the beginning comes from cases where the VECTOR_MODES | |||
3272 | array may contain length-agnostic and length-specific modes. Their | |||
3273 | ordering is not guaranteed, so we could end up picking a mode for the main | |||
3274 | loop that is after the epilogue's optimal mode. */ | |||
3275 | vector_modes[0] = autodetected_vector_mode; | |||
3276 | mode_i = 0; | |||
3277 | ||||
3278 | bool supports_partial_vectors = | |||
3279 | partial_vectors_supported_p () && param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0; | |||
3280 | poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo)(first_loop_vinfo)->vectorization_factor; | |||
3281 | ||||
3282 | while (1) | |||
3283 | { | |||
3284 | /* If the target does not support partial vectors we can shorten the | |||
3285 | number of modes to analyze for the epilogue as we know we can't pick a | |||
3286 | mode that would lead to a VF at least as big as the | |||
3287 | FIRST_VINFO_VF. */ | |||
3288 | if (!supports_partial_vectors | |||
3289 | && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)maybe_le (first_vinfo_vf, cached_vf_per_mode[mode_i])) | |||
3290 | { | |||
3291 | mode_i++; | |||
3292 | if (mode_i == vector_modes.length ()) | |||
3293 | break; | |||
3294 | continue; | |||
3295 | } | |||
3296 | ||||
3297 | if (dump_enabled_p ()) | |||
3298 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3299 | "***** Re-trying epilogue analysis with vector " | |||
3300 | "mode %s\n", GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]); | |||
3301 | ||||
3302 | bool fatal; | |||
3303 | opt_loop_vec_info loop_vinfo | |||
3304 | = vect_analyze_loop_1 (loop, shared, &loop_form_info, | |||
3305 | first_loop_vinfo, | |||
3306 | vector_modes, mode_i, | |||
3307 | autodetected_vector_mode, fatal); | |||
3308 | if (fatal) | |||
3309 | break; | |||
3310 | ||||
3311 | if (loop_vinfo) | |||
3312 | { | |||
3313 | if (pick_lowest_cost_p) | |||
3314 | { | |||
3315 | /* Keep trying to roll back vectorization attempts while the | |||
3316 | loop_vec_infos they produced were worse than this one. */ | |||
3317 | vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos; | |||
3318 | while (!vinfos.is_empty () | |||
3319 | && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ())) | |||
3320 | { | |||
3321 | gcc_assert (vect_epilogues)((void)(!(vect_epilogues) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3321, __FUNCTION__), 0 : 0)); | |||
3322 | delete vinfos.pop (); | |||
3323 | } | |||
3324 | } | |||
3325 | /* For now only allow one epilogue loop. */ | |||
3326 | if (first_loop_vinfo->epilogue_vinfos.is_empty ()) | |||
3327 | { | |||
3328 | first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo); | |||
3329 | poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold; | |||
3330 | gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || ( loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo )->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th, 0U)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3331, __FUNCTION__), 0 : 0)) | |||
3331 | || maybe_ne (lowest_th, 0U))((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || ( loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo )->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th, 0U)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3331, __FUNCTION__), 0 : 0)); | |||
3332 | /* Keep track of the known smallest versioning | |||
3333 | threshold. */ | |||
3334 | if (ordered_p (lowest_th, th)) | |||
3335 | lowest_th = ordered_min (lowest_th, th); | |||
3336 | } | |||
3337 | else | |||
3338 | { | |||
3339 | delete loop_vinfo; | |||
3340 | loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | |||
3341 | } | |||
3342 | ||||
3343 | /* For now only allow one epilogue loop, but allow | |||
3344 | pick_lowest_cost_p to replace it, so commit to the | |||
3345 | first epilogue if we have no reason to try alternatives. */ | |||
3346 | if (!pick_lowest_cost_p) | |||
3347 | break; | |||
3348 | } | |||
3349 | ||||
3350 | if (mode_i == vector_modes.length ()) | |||
3351 | break; | |||
3352 | ||||
3353 | } | |||
3354 | ||||
3355 | if (!first_loop_vinfo->epilogue_vinfos.is_empty ()) | |||
3356 | { | |||
3357 | LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold = lowest_th; | |||
3358 | if (dump_enabled_p ()) | |||
3359 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3360 | "***** Choosing epilogue vector mode %s\n", | |||
3361 | GET_MODE_NAMEmode_name[first_loop_vinfo->epilogue_vinfos[0]->vector_mode ] | |||
3362 | (first_loop_vinfo->epilogue_vinfos[0]->vector_mode)mode_name[first_loop_vinfo->epilogue_vinfos[0]->vector_mode ]); | |||
3363 | } | |||
3364 | ||||
3365 | return first_loop_vinfo; | |||
3366 | } | |||
3367 | ||||
3368 | /* Return true if there is an in-order reduction function for CODE, storing | |||
3369 | it in *REDUC_FN if so. */ | |||
3370 | ||||
3371 | static bool | |||
3372 | fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn) | |||
3373 | { | |||
3374 | if (code == PLUS_EXPR) | |||
3375 | { | |||
3376 | *reduc_fn = IFN_FOLD_LEFT_PLUS; | |||
3377 | return true; | |||
3378 | } | |||
3379 | return false; | |||
3380 | } | |||
3381 | ||||
3382 | /* Function reduction_fn_for_scalar_code | |||
3383 | ||||
3384 | Input: | |||
3385 | CODE - tree_code of a reduction operations. | |||
3386 | ||||
3387 | Output: | |||
3388 | REDUC_FN - the corresponding internal function to be used to reduce the | |||
3389 | vector of partial results into a single scalar result, or IFN_LAST | |||
3390 | if the operation is a supported reduction operation, but does not have | |||
3391 | such an internal function. | |||
3392 | ||||
3393 | Return FALSE if CODE currently cannot be vectorized as reduction. */ | |||
3394 | ||||
3395 | bool | |||
3396 | reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn) | |||
3397 | { | |||
3398 | if (code.is_tree_code ()) | |||
3399 | switch (tree_code (code)) | |||
3400 | { | |||
3401 | case MAX_EXPR: | |||
3402 | *reduc_fn = IFN_REDUC_MAX; | |||
3403 | return true; | |||
3404 | ||||
3405 | case MIN_EXPR: | |||
3406 | *reduc_fn = IFN_REDUC_MIN; | |||
3407 | return true; | |||
3408 | ||||
3409 | case PLUS_EXPR: | |||
3410 | *reduc_fn = IFN_REDUC_PLUS; | |||
3411 | return true; | |||
3412 | ||||
3413 | case BIT_AND_EXPR: | |||
3414 | *reduc_fn = IFN_REDUC_AND; | |||
3415 | return true; | |||
3416 | ||||
3417 | case BIT_IOR_EXPR: | |||
3418 | *reduc_fn = IFN_REDUC_IOR; | |||
3419 | return true; | |||
3420 | ||||
3421 | case BIT_XOR_EXPR: | |||
3422 | *reduc_fn = IFN_REDUC_XOR; | |||
3423 | return true; | |||
3424 | ||||
3425 | case MULT_EXPR: | |||
3426 | case MINUS_EXPR: | |||
3427 | *reduc_fn = IFN_LAST; | |||
3428 | return true; | |||
3429 | ||||
3430 | default: | |||
3431 | return false; | |||
3432 | } | |||
3433 | else | |||
3434 | switch (combined_fn (code)) | |||
3435 | { | |||
3436 | CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX : case CFN_BUILT_IN_FMAXL: | |||
3437 | *reduc_fn = IFN_REDUC_FMAX; | |||
3438 | return true; | |||
3439 | ||||
3440 | CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN : case CFN_BUILT_IN_FMINL: | |||
3441 | *reduc_fn = IFN_REDUC_FMIN; | |||
3442 | return true; | |||
3443 | ||||
3444 | default: | |||
3445 | return false; | |||
3446 | } | |||
3447 | } | |||
3448 | ||||
3449 | /* If there is a neutral value X such that a reduction would not be affected | |||
3450 | by the introduction of additional X elements, return that X, otherwise | |||
3451 | return null. CODE is the code of the reduction and SCALAR_TYPE is type | |||
3452 | of the scalar elements. If the reduction has just a single initial value | |||
3453 | then INITIAL_VALUE is that value, otherwise it is null. */ | |||
3454 | ||||
3455 | tree | |||
3456 | neutral_op_for_reduction (tree scalar_type, code_helper code, | |||
3457 | tree initial_value) | |||
3458 | { | |||
3459 | if (code.is_tree_code ()) | |||
3460 | switch (tree_code (code)) | |||
3461 | { | |||
3462 | case WIDEN_SUM_EXPR: | |||
3463 | case DOT_PROD_EXPR: | |||
3464 | case SAD_EXPR: | |||
3465 | case PLUS_EXPR: | |||
3466 | case MINUS_EXPR: | |||
3467 | case BIT_IOR_EXPR: | |||
3468 | case BIT_XOR_EXPR: | |||
3469 | return build_zero_cst (scalar_type); | |||
3470 | ||||
3471 | case MULT_EXPR: | |||
3472 | return build_one_cst (scalar_type); | |||
3473 | ||||
3474 | case BIT_AND_EXPR: | |||
3475 | return build_all_ones_cst (scalar_type); | |||
3476 | ||||
3477 | case MAX_EXPR: | |||
3478 | case MIN_EXPR: | |||
3479 | return initial_value; | |||
3480 | ||||
3481 | default: | |||
3482 | return NULL_TREE(tree) nullptr; | |||
3483 | } | |||
3484 | else | |||
3485 | switch (combined_fn (code)) | |||
3486 | { | |||
3487 | CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN : case CFN_BUILT_IN_FMINL: | |||
3488 | CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX : case CFN_BUILT_IN_FMAXL: | |||
3489 | return initial_value; | |||
3490 | ||||
3491 | default: | |||
3492 | return NULL_TREE(tree) nullptr; | |||
3493 | } | |||
3494 | } | |||
3495 | ||||
3496 | /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement | |||
3497 | STMT is printed with a message MSG. */ | |||
3498 | ||||
3499 | static void | |||
3500 | report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg) | |||
3501 | { | |||
3502 | dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt); | |||
3503 | } | |||
3504 | ||||
3505 | /* Return true if we need an in-order reduction for operation CODE | |||
3506 | on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer | |||
3507 | overflow must wrap. */ | |||
3508 | ||||
3509 | bool | |||
3510 | needs_fold_left_reduction_p (tree type, code_helper code) | |||
3511 | { | |||
3512 | /* CHECKME: check for !flag_finite_math_only too? */ | |||
3513 | if (SCALAR_FLOAT_TYPE_P (type)(((enum tree_code) (type)->base.code) == REAL_TYPE)) | |||
3514 | { | |||
3515 | if (code.is_tree_code ()) | |||
3516 | switch (tree_code (code)) | |||
3517 | { | |||
3518 | case MIN_EXPR: | |||
3519 | case MAX_EXPR: | |||
3520 | return false; | |||
3521 | ||||
3522 | default: | |||
3523 | return !flag_associative_mathglobal_options.x_flag_associative_math; | |||
3524 | } | |||
3525 | else | |||
3526 | switch (combined_fn (code)) | |||
3527 | { | |||
3528 | CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN : case CFN_BUILT_IN_FMINL: | |||
3529 | CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX : case CFN_BUILT_IN_FMAXL: | |||
3530 | return false; | |||
3531 | ||||
3532 | default: | |||
3533 | return !flag_associative_mathglobal_options.x_flag_associative_math; | |||
3534 | } | |||
3535 | } | |||
3536 | ||||
3537 | if (INTEGRAL_TYPE_P (type)(((enum tree_code) (type)->base.code) == ENUMERAL_TYPE || ( (enum tree_code) (type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (type)->base.code) == INTEGER_TYPE)) | |||
3538 | return (!code.is_tree_code () | |||
3539 | || !operation_no_trapping_overflow (type, tree_code (code))); | |||
3540 | ||||
3541 | if (SAT_FIXED_POINT_TYPE_P (type)(((enum tree_code) (type)->base.code) == FIXED_POINT_TYPE && ((tree_not_check4 ((type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3541, __FUNCTION__, (RECORD_TYPE), (UNION_TYPE), (QUAL_UNION_TYPE ), (ARRAY_TYPE)))->base.u.bits.saturating_flag))) | |||
3542 | return true; | |||
3543 | ||||
3544 | return false; | |||
3545 | } | |||
3546 | ||||
3547 | /* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and | |||
3548 | has a handled computation expression. Store the main reduction | |||
3549 | operation in *CODE. */ | |||
3550 | ||||
3551 | static bool | |||
3552 | check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, | |||
3553 | tree loop_arg, code_helper *code, | |||
3554 | vec<std::pair<ssa_op_iter, use_operand_p> > &path) | |||
3555 | { | |||
3556 | auto_bitmap visited; | |||
3557 | tree lookfor = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); | |||
3558 | ssa_op_iter curri; | |||
3559 | use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE0x01); | |||
3560 | while (USE_FROM_PTR (curr)get_use_from_ptr (curr) != loop_arg) | |||
3561 | curr = op_iter_next_use (&curri); | |||
3562 | curri.i = curri.numops; | |||
3563 | do | |||
3564 | { | |||
3565 | path.safe_push (std::make_pair (curri, curr)); | |||
3566 | tree use = USE_FROM_PTR (curr)get_use_from_ptr (curr); | |||
3567 | if (use == lookfor) | |||
3568 | break; | |||
3569 | gimple *def = SSA_NAME_DEF_STMT (use)(tree_check ((use), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3569, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; | |||
3570 | if (gimple_nop_p (def) | |||
3571 | || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) | |||
3572 | { | |||
3573 | pop: | |||
3574 | do | |||
3575 | { | |||
3576 | std::pair<ssa_op_iter, use_operand_p> x = path.pop (); | |||
3577 | curri = x.first; | |||
3578 | curr = x.second; | |||
3579 | do | |||
3580 | curr = op_iter_next_use (&curri); | |||
3581 | /* Skip already visited or non-SSA operands (from iterating | |||
3582 | over PHI args). */ | |||
3583 | while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr) | |||
3584 | && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME | |||
3585 | || ! bitmap_set_bit (visited, | |||
3586 | SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3587, __FUNCTION__, (SSA_NAME)))->base.u.version | |||
3587 | (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3587, __FUNCTION__, (SSA_NAME)))->base.u.version))); | |||
3588 | } | |||
3589 | while (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr) && ! path.is_empty ()); | |||
3590 | if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr)) | |||
3591 | break; | |||
3592 | } | |||
3593 | else | |||
3594 | { | |||
3595 | if (gimple_code (def) == GIMPLE_PHI) | |||
3596 | curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE0x01); | |||
3597 | else | |||
3598 | curr = op_iter_init_use (&curri, def, SSA_OP_USE0x01); | |||
3599 | while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr) | |||
3600 | && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME | |||
3601 | || ! bitmap_set_bit (visited, | |||
3602 | SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3603, __FUNCTION__, (SSA_NAME)))->base.u.version | |||
3603 | (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3603, __FUNCTION__, (SSA_NAME)))->base.u.version))) | |||
3604 | curr = op_iter_next_use (&curri); | |||
3605 | if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr)) | |||
3606 | goto pop; | |||
3607 | } | |||
3608 | } | |||
3609 | while (1); | |||
3610 | if (dump_file && (dump_flags & TDF_DETAILS)) | |||
3611 | { | |||
3612 | dump_printf_loc (MSG_NOTE, loc, "reduction path: "); | |||
3613 | unsigned i; | |||
3614 | std::pair<ssa_op_iter, use_operand_p> *x; | |||
3615 | FOR_EACH_VEC_ELT (path, i, x)for (i = 0; (path).iterate ((i), &(x)); ++(i)) | |||
3616 | dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second)get_use_from_ptr (x->second)); | |||
3617 | dump_printf (MSG_NOTE, "\n"); | |||
3618 | } | |||
3619 | ||||
3620 | /* Check whether the reduction path detected is valid. */ | |||
3621 | bool fail = path.length () == 0; | |||
3622 | bool neg = false; | |||
3623 | int sign = -1; | |||
3624 | *code = ERROR_MARK; | |||
3625 | for (unsigned i = 1; i < path.length (); ++i) | |||
3626 | { | |||
3627 | gimple *use_stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt; | |||
3628 | gimple_match_op op; | |||
3629 | if (!gimple_extract_op (use_stmt, &op)) | |||
3630 | { | |||
3631 | fail = true; | |||
3632 | break; | |||
3633 | } | |||
3634 | unsigned int opi = op.num_ops; | |||
3635 | if (gassign *assign = dyn_cast<gassign *> (use_stmt)) | |||
3636 | { | |||
3637 | /* The following make sure we can compute the operand index | |||
3638 | easily plus it mostly disallows chaining via COND_EXPR condition | |||
3639 | operands. */ | |||
3640 | for (opi = 0; opi < op.num_ops; ++opi) | |||
3641 | if (gimple_assign_rhs1_ptr (assign) + opi == path[i].second->use) | |||
3642 | break; | |||
3643 | } | |||
3644 | else if (gcall *call = dyn_cast<gcall *> (use_stmt)) | |||
3645 | { | |||
3646 | for (opi = 0; opi < op.num_ops; ++opi) | |||
3647 | if (gimple_call_arg_ptr (call, opi) == path[i].second->use) | |||
3648 | break; | |||
3649 | } | |||
3650 | if (opi == op.num_ops) | |||
3651 | { | |||
3652 | fail = true; | |||
3653 | break; | |||
3654 | } | |||
3655 | op.code = canonicalize_code (op.code, op.type); | |||
3656 | if (op.code == MINUS_EXPR) | |||
3657 | { | |||
3658 | op.code = PLUS_EXPR; | |||
3659 | /* Track whether we negate the reduction value each iteration. */ | |||
3660 | if (op.ops[1] == op.ops[opi]) | |||
3661 | neg = ! neg; | |||
3662 | } | |||
3663 | if (CONVERT_EXPR_CODE_P (op.code)((op.code) == NOP_EXPR || (op.code) == CONVERT_EXPR) | |||
3664 | && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])((contains_struct_check ((op.ops[0]), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3664, __FUNCTION__))->typed.type))) | |||
3665 | ; | |||
3666 | else if (*code == ERROR_MARK) | |||
3667 | { | |||
3668 | *code = op.code; | |||
3669 | sign = TYPE_SIGN (op.type)((signop) ((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3669, __FUNCTION__))->base.u.bits.unsigned_flag)); | |||
3670 | } | |||
3671 | else if (op.code != *code) | |||
3672 | { | |||
3673 | fail = true; | |||
3674 | break; | |||
3675 | } | |||
3676 | else if ((op.code == MIN_EXPR | |||
3677 | || op.code == MAX_EXPR) | |||
3678 | && sign != TYPE_SIGN (op.type)((signop) ((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3678, __FUNCTION__))->base.u.bits.unsigned_flag))) | |||
3679 | { | |||
3680 | fail = true; | |||
3681 | break; | |||
3682 | } | |||
3683 | /* Check there's only a single stmt the op is used on. For the | |||
3684 | not value-changing tail and the last stmt allow out-of-loop uses. | |||
3685 | ??? We could relax this and handle arbitrary live stmts by | |||
3686 | forcing a scalar epilogue for example. */ | |||
3687 | imm_use_iterator imm_iter; | |||
3688 | gimple *op_use_stmt; | |||
3689 | unsigned cnt = 0; | |||
3690 | FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi])for (struct auto_end_imm_use_stmt_traverse auto_end_imm_use_stmt_traverse ((((op_use_stmt) = first_imm_use_stmt (&(imm_iter), (op. ops[opi]))), &(imm_iter))); !end_imm_use_stmt_p (&(imm_iter )); (void) ((op_use_stmt) = next_imm_use_stmt (&(imm_iter )))) | |||
3691 | if (!is_gimple_debug (op_use_stmt) | |||
3692 | && (*code != ERROR_MARK | |||
3693 | || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))) | |||
3694 | { | |||
3695 | /* We want to allow x + x but not x < 1 ? x : 2. */ | |||
3696 | if (is_gimple_assign (op_use_stmt) | |||
3697 | && gimple_assign_rhs_code (op_use_stmt) == COND_EXPR) | |||
3698 | { | |||
3699 | use_operand_p use_p; | |||
3700 | FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)for ((use_p) = first_imm_use_on_stmt (&(imm_iter)); !end_imm_use_on_stmt_p (&(imm_iter)); (void) ((use_p) = next_imm_use_on_stmt (& (imm_iter)))) | |||
3701 | cnt++; | |||
3702 | } | |||
3703 | else | |||
3704 | cnt++; | |||
3705 | } | |||
3706 | if (cnt != 1) | |||
3707 | { | |||
3708 | fail = true; | |||
3709 | break; | |||
3710 | } | |||
3711 | } | |||
3712 | return ! fail && ! neg && *code != ERROR_MARK; | |||
3713 | } | |||
3714 | ||||
3715 | bool | |||
3716 | check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, | |||
3717 | tree loop_arg, enum tree_code code) | |||
3718 | { | |||
3719 | auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; | |||
3720 | code_helper code_; | |||
3721 | return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path) | |||
3722 | && code_ == code); | |||
3723 | } | |||
3724 | ||||
3725 | ||||
3726 | ||||
3727 | /* Function vect_is_simple_reduction | |||
3728 | ||||
3729 | (1) Detect a cross-iteration def-use cycle that represents a simple | |||
3730 | reduction computation. We look for the following pattern: | |||
3731 | ||||
3732 | loop_header: | |||
3733 | a1 = phi < a0, a2 > | |||
3734 | a3 = ... | |||
3735 | a2 = operation (a3, a1) | |||
3736 | ||||
3737 | or | |||
3738 | ||||
3739 | a3 = ... | |||
3740 | loop_header: | |||
3741 | a1 = phi < a0, a2 > | |||
3742 | a2 = operation (a3, a1) | |||
3743 | ||||
3744 | such that: | |||
3745 | 1. operation is commutative and associative and it is safe to | |||
3746 | change the order of the computation | |||
3747 | 2. no uses for a2 in the loop (a2 is used out of the loop) | |||
3748 | 3. no uses of a1 in the loop besides the reduction operation | |||
3749 | 4. no uses of a1 outside the loop. | |||
3750 | ||||
3751 | Conditions 1,4 are tested here. | |||
3752 | Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. | |||
3753 | ||||
3754 | (2) Detect a cross-iteration def-use cycle in nested loops, i.e., | |||
3755 | nested cycles. | |||
3756 | ||||
3757 | (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double | |||
3758 | reductions: | |||
3759 | ||||
3760 | a1 = phi < a0, a2 > | |||
3761 | inner loop (def of a3) | |||
3762 | a2 = phi < a3 > | |||
3763 | ||||
3764 | (4) Detect condition expressions, ie: | |||
3765 | for (int i = 0; i < N; i++) | |||
3766 | if (a[i] < val) | |||
3767 | ret_val = a[i]; | |||
3768 | ||||
3769 | */ | |||
3770 | ||||
3771 | static stmt_vec_info | |||
3772 | vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, | |||
3773 | bool *double_reduc, bool *reduc_chain_p, bool slp) | |||
3774 | { | |||
3775 | gphi *phi = as_a <gphi *> (phi_info->stmt); | |||
3776 | gimple *phi_use_stmt = NULLnullptr; | |||
3777 | imm_use_iterator imm_iter; | |||
3778 | use_operand_p use_p; | |||
3779 | ||||
3780 | *double_reduc = false; | |||
3781 | *reduc_chain_p = false; | |||
3782 | STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = TREE_CODE_REDUCTION; | |||
3783 | ||||
3784 | tree phi_name = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); | |||
3785 | /* ??? If there are no uses of the PHI result the inner loop reduction | |||
3786 | won't be detected as possibly double-reduction by vectorizable_reduction | |||
3787 | because that tries to walk the PHI arg from the preheader edge which | |||
3788 | can be constant. See PR60382. */ | |||
3789 | if (has_zero_uses (phi_name)) | |||
3790 | return NULLnullptr; | |||
3791 | class loop *loop = (gimple_bb (phi))->loop_father; | |||
3792 | unsigned nphi_def_loop_uses = 0; | |||
3793 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)for ((use_p) = first_readonly_imm_use (&(imm_iter), (phi_name )); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p ) = next_readonly_imm_use (&(imm_iter)))) | |||
3794 | { | |||
3795 | gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt; | |||
3796 | if (is_gimple_debug (use_stmt)) | |||
3797 | continue; | |||
3798 | ||||
3799 | if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) | |||
3800 | { | |||
3801 | if (dump_enabled_p ()) | |||
3802 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
3803 | "intermediate value used outside loop.\n"); | |||
3804 | ||||
3805 | return NULLnullptr; | |||
3806 | } | |||
3807 | ||||
3808 | nphi_def_loop_uses++; | |||
3809 | phi_use_stmt = use_stmt; | |||
3810 | } | |||
3811 | ||||
3812 | tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop))gimple_phi_arg_def (((phi)), ((loop_latch_edge (loop))->dest_idx )); | |||
3813 | if (TREE_CODE (latch_def)((enum tree_code) (latch_def)->base.code) != SSA_NAME) | |||
3814 | { | |||
3815 | if (dump_enabled_p ()) | |||
3816 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
3817 | "reduction: not ssa_name: %T\n", latch_def); | |||
3818 | return NULLnullptr; | |||
3819 | } | |||
3820 | ||||
3821 | stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def); | |||
3822 | if (!def_stmt_info | |||
3823 | || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))) | |||
3824 | return NULLnullptr; | |||
3825 | ||||
3826 | bool nested_in_vect_loop | |||
3827 | = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info)(loop_info)->loop, loop); | |||
3828 | unsigned nlatch_def_loop_uses = 0; | |||
3829 | auto_vec<gphi *, 3> lcphis; | |||
3830 | bool inner_loop_of_double_reduc = false; | |||
3831 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (latch_def )); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p ) = next_readonly_imm_use (&(imm_iter)))) | |||
3832 | { | |||
3833 | gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt; | |||
3834 | if (is_gimple_debug (use_stmt)) | |||
3835 | continue; | |||
3836 | if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) | |||
3837 | nlatch_def_loop_uses++; | |||
3838 | else | |||
3839 | { | |||
3840 | /* We can have more than one loop-closed PHI. */ | |||
3841 | lcphis.safe_push (as_a <gphi *> (use_stmt)); | |||
3842 | if (nested_in_vect_loop | |||
3843 | && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))(loop_info->lookup_stmt (use_stmt))->def_type | |||
3844 | == vect_double_reduction_def)) | |||
3845 | inner_loop_of_double_reduc = true; | |||
3846 | } | |||
3847 | } | |||
3848 | ||||
3849 | /* If we are vectorizing an inner reduction we are executing that | |||
3850 | in the original order only in case we are not dealing with a | |||
3851 | double reduction. */ | |||
3852 | if (nested_in_vect_loop && !inner_loop_of_double_reduc) | |||
3853 | { | |||
3854 | if (dump_enabled_p ()) | |||
3855 | report_vect_op (MSG_NOTE, def_stmt_info->stmt, | |||
3856 | "detected nested cycle: "); | |||
3857 | return def_stmt_info; | |||
3858 | } | |||
3859 | ||||
3860 | /* When the inner loop of a double reduction ends up with more than | |||
3861 | one loop-closed PHI we have failed to classify alternate such | |||
3862 | PHIs as double reduction, leading to wrong code. See PR103237. */ | |||
3863 | if (inner_loop_of_double_reduc && lcphis.length () != 1) | |||
3864 | { | |||
3865 | if (dump_enabled_p ()) | |||
3866 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
3867 | "unhandle double reduction\n"); | |||
3868 | return NULLnullptr; | |||
3869 | } | |||
3870 | ||||
3871 | /* If this isn't a nested cycle or if the nested cycle reduction value | |||
3872 | is used ouside of the inner loop we cannot handle uses of the reduction | |||
3873 | value. */ | |||
3874 | if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1) | |||
3875 | { | |||
3876 | if (dump_enabled_p ()) | |||
3877 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
3878 | "reduction used in loop.\n"); | |||
3879 | return NULLnullptr; | |||
3880 | } | |||
3881 | ||||
3882 | /* If DEF_STMT is a phi node itself, we expect it to have a single argument | |||
3883 | defined in the inner loop. */ | |||
3884 | if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt)) | |||
3885 | { | |||
3886 | tree op1 = PHI_ARG_DEF (def_stmt, 0)gimple_phi_arg_def ((def_stmt), (0)); | |||
3887 | if (gimple_phi_num_args (def_stmt) != 1 | |||
3888 | || TREE_CODE (op1)((enum tree_code) (op1)->base.code) != SSA_NAME) | |||
3889 | { | |||
3890 | if (dump_enabled_p ()) | |||
3891 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
3892 | "unsupported phi node definition.\n"); | |||
3893 | ||||
3894 | return NULLnullptr; | |||
3895 | } | |||
3896 | ||||
3897 | /* Verify there is an inner cycle composed of the PHI phi_use_stmt | |||
3898 | and the latch definition op1. */ | |||
3899 | gimple *def1 = SSA_NAME_DEF_STMT (op1)(tree_check ((op1), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3899, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; | |||
3900 | if (gimple_bb (def1) | |||
3901 | && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | |||
3902 | && loop->inner | |||
3903 | && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) | |||
3904 | && (is_gimple_assign (def1) || is_gimple_call (def1)) | |||
3905 | && is_a <gphi *> (phi_use_stmt) | |||
3906 | && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)) | |||
3907 | && (op1 == PHI_ARG_DEF_FROM_EDGE (phi_use_stmt,gimple_phi_arg_def (((phi_use_stmt)), ((loop_latch_edge (loop ->inner))->dest_idx)) | |||
3908 | loop_latch_edge (loop->inner))gimple_phi_arg_def (((phi_use_stmt)), ((loop_latch_edge (loop ->inner))->dest_idx)))) | |||
3909 | { | |||
3910 | if (dump_enabled_p ()) | |||
3911 | report_vect_op (MSG_NOTE, def_stmt, | |||
3912 | "detected double reduction: "); | |||
3913 | ||||
3914 | *double_reduc = true; | |||
3915 | return def_stmt_info; | |||
3916 | } | |||
3917 | ||||
3918 | return NULLnullptr; | |||
3919 | } | |||
3920 | ||||
3921 | /* Look for the expression computing latch_def from then loop PHI result. */ | |||
3922 | auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; | |||
3923 | code_helper code; | |||
3924 | if (check_reduction_path (vect_location, loop, phi, latch_def, &code, | |||
3925 | path)) | |||
3926 | { | |||
3927 | STMT_VINFO_REDUC_CODE (phi_info)(phi_info)->reduc_code = code; | |||
3928 | if (code == COND_EXPR && !nested_in_vect_loop) | |||
3929 | STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = COND_REDUCTION; | |||
3930 | ||||
3931 | /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP | |||
3932 | reduction chain for which the additional restriction is that | |||
3933 | all operations in the chain are the same. */ | |||
3934 | auto_vec<stmt_vec_info, 8> reduc_chain; | |||
3935 | unsigned i; | |||
3936 | bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR; | |||
3937 | for (i = path.length () - 1; i >= 1; --i) | |||
3938 | { | |||
3939 | gimple *stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt; | |||
3940 | stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt); | |||
3941 | gimple_match_op op; | |||
3942 | if (!gimple_extract_op (stmt, &op)) | |||
3943 | gcc_unreachable ()(fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3943, __FUNCTION__)); | |||
3944 | if (gassign *assign = dyn_cast<gassign *> (stmt)) | |||
3945 | STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx | |||
3946 | = path[i].second->use - gimple_assign_rhs1_ptr (assign); | |||
3947 | else | |||
3948 | { | |||
3949 | gcall *call = as_a<gcall *> (stmt); | |||
3950 | STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx | |||
3951 | = path[i].second->use - gimple_call_arg_ptr (call, 0); | |||
3952 | } | |||
3953 | bool leading_conversion = (CONVERT_EXPR_CODE_P (op.code)((op.code) == NOP_EXPR || (op.code) == CONVERT_EXPR) | |||
3954 | && (i == 1 || i == path.length () - 1)); | |||
3955 | if ((op.code != code && !leading_conversion) | |||
3956 | /* We can only handle the final value in epilogue | |||
3957 | generation for reduction chains. */ | |||
3958 | || (i != 1 && !has_single_use (gimple_get_lhs (stmt)))) | |||
3959 | is_slp_reduc = false; | |||
3960 | /* For reduction chains we support a trailing/leading | |||
3961 | conversions. We do not store those in the actual chain. */ | |||
3962 | if (leading_conversion) | |||
3963 | continue; | |||
3964 | reduc_chain.safe_push (stmt_info); | |||
3965 | } | |||
3966 | if (slp && is_slp_reduc && reduc_chain.length () > 1) | |||
3967 | { | |||
3968 | for (unsigned i = 0; i < reduc_chain.length () - 1; ++i) | |||
3969 | { | |||
3970 | REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3970, __FUNCTION__), 0 : 0)), (reduc_chain[i])->first_element ) = reduc_chain[0]; | |||
3971 | REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3971, __FUNCTION__), 0 : 0)), (reduc_chain[i])->next_element ) = reduc_chain[i+1]; | |||
3972 | } | |||
3973 | REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3973, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->first_element ) = reduc_chain[0]; | |||
3974 | REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3974, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->next_element ) = NULLnullptr; | |||
3975 | ||||
3976 | /* Save the chain for further analysis in SLP detection. */ | |||
3977 | LOOP_VINFO_REDUCTION_CHAINS (loop_info)(loop_info)->reduction_chains.safe_push (reduc_chain[0]); | |||
3978 | REDUC_GROUP_SIZE (reduc_chain[0])(((void)(!(!(reduc_chain[0])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 3978, __FUNCTION__), 0 : 0)), (reduc_chain[0])->size) = reduc_chain.length (); | |||
3979 | ||||
3980 | *reduc_chain_p = true; | |||
3981 | if (dump_enabled_p ()) | |||
3982 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3983 | "reduction: detected reduction chain\n"); | |||
3984 | } | |||
3985 | else if (dump_enabled_p ()) | |||
3986 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3987 | "reduction: detected reduction\n"); | |||
3988 | ||||
3989 | return def_stmt_info; | |||
3990 | } | |||
3991 | ||||
3992 | if (dump_enabled_p ()) | |||
3993 | dump_printf_loc (MSG_NOTE, vect_location, | |||
3994 | "reduction: unknown pattern\n"); | |||
3995 | ||||
3996 | return NULLnullptr; | |||
3997 | } | |||
3998 | ||||
3999 | /* Estimate the number of peeled epilogue iterations for LOOP_VINFO. | |||
4000 | PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations, | |||
4001 | or -1 if not known. */ | |||
4002 | ||||
4003 | static int | |||
4004 | vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue) | |||
4005 | { | |||
4006 | int assumed_vf = vect_vf_for_cost (loop_vinfo); | |||
4007 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) || peel_iters_prologue == -1) | |||
4008 | { | |||
4009 | if (dump_enabled_p ()) | |||
4010 | dump_printf_loc (MSG_NOTE, vect_location, | |||
4011 | "cost model: epilogue peel iters set to vf/2 " | |||
4012 | "because loop iterations are unknown .\n"); | |||
4013 | return assumed_vf / 2; | |||
4014 | } | |||
4015 | else | |||
4016 | { | |||
4017 | int niters = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4017, __FUNCTION__)))); | |||
4018 | peel_iters_prologue = MIN (niters, peel_iters_prologue)((niters) < (peel_iters_prologue) ? (niters) : (peel_iters_prologue )); | |||
4019 | int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf; | |||
4020 | /* If we need to peel for gaps, but no peeling is required, we have to | |||
4021 | peel VF iterations. */ | |||
4022 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps && !peel_iters_epilogue) | |||
4023 | peel_iters_epilogue = assumed_vf; | |||
4024 | return peel_iters_epilogue; | |||
4025 | } | |||
4026 | } | |||
4027 | ||||
4028 | /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ | |||
4029 | int | |||
4030 | vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, | |||
4031 | int *peel_iters_epilogue, | |||
4032 | stmt_vector_for_cost *scalar_cost_vec, | |||
4033 | stmt_vector_for_cost *prologue_cost_vec, | |||
4034 | stmt_vector_for_cost *epilogue_cost_vec) | |||
4035 | { | |||
4036 | int retval = 0; | |||
4037 | ||||
4038 | *peel_iters_epilogue | |||
4039 | = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue); | |||
4040 | ||||
4041 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) | |||
4042 | { | |||
4043 | /* If peeled iterations are known but number of scalar loop | |||
4044 | iterations are unknown, count a taken branch per peeled loop. */ | |||
4045 | if (peel_iters_prologue > 0) | |||
4046 | retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, | |||
4047 | vect_prologue); | |||
4048 | if (*peel_iters_epilogue > 0) | |||
4049 | retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken, | |||
4050 | vect_epilogue); | |||
4051 | } | |||
4052 | ||||
4053 | stmt_info_for_cost *si; | |||
4054 | int j; | |||
4055 | if (peel_iters_prologue) | |||
4056 | FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j )) | |||
4057 | retval += record_stmt_cost (prologue_cost_vec, | |||
4058 | si->count * peel_iters_prologue, | |||
4059 | si->kind, si->stmt_info, si->misalign, | |||
4060 | vect_prologue); | |||
4061 | if (*peel_iters_epilogue) | |||
4062 | FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j )) | |||
4063 | retval += record_stmt_cost (epilogue_cost_vec, | |||
4064 | si->count * *peel_iters_epilogue, | |||
4065 | si->kind, si->stmt_info, si->misalign, | |||
4066 | vect_epilogue); | |||
4067 | ||||
4068 | return retval; | |||
4069 | } | |||
4070 | ||||
4071 | /* Function vect_estimate_min_profitable_iters | |||
4072 | ||||
4073 | Return the number of iterations required for the vector version of the | |||
4074 | loop to be profitable relative to the cost of the scalar version of the | |||
4075 | loop. | |||
4076 | ||||
4077 | *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold | |||
4078 | of iterations for vectorization. -1 value means loop vectorization | |||
4079 | is not profitable. This returned value may be used for dynamic | |||
4080 | profitability check. | |||
4081 | ||||
4082 | *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used | |||
4083 | for static check against estimated number of iterations. */ | |||
4084 | ||||
4085 | static void | |||
4086 | vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, | |||
4087 | int *ret_min_profitable_niters, | |||
4088 | int *ret_min_profitable_estimate, | |||
4089 | unsigned *suggested_unroll_factor) | |||
4090 | { | |||
4091 | int min_profitable_iters; | |||
4092 | int min_profitable_estimate; | |||
4093 | int peel_iters_prologue; | |||
4094 | int peel_iters_epilogue; | |||
4095 | unsigned vec_inside_cost = 0; | |||
4096 | int vec_outside_cost = 0; | |||
4097 | unsigned vec_prologue_cost = 0; | |||
4098 | unsigned vec_epilogue_cost = 0; | |||
4099 | int scalar_single_iter_cost = 0; | |||
4100 | int scalar_outside_cost = 0; | |||
4101 | int assumed_vf = vect_vf_for_cost (loop_vinfo); | |||
4102 | int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; | |||
4103 | vector_costs *target_cost_data = loop_vinfo->vector_costs; | |||
4104 | ||||
4105 | /* Cost model disabled. */ | |||
4106 | if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)) | |||
4107 | { | |||
4108 | if (dump_enabled_p ()) | |||
4109 | dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n"); | |||
4110 | *ret_min_profitable_niters = 0; | |||
4111 | *ret_min_profitable_estimate = 0; | |||
4112 | return; | |||
4113 | } | |||
4114 | ||||
4115 | /* Requires loop versioning tests to handle misalignment. */ | |||
4116 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)((loop_vinfo)->may_misalign_stmts.length () > 0)) | |||
4117 | { | |||
4118 | /* FIXME: Make cost depend on complexity of individual check. */ | |||
4119 | unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.length (); | |||
4120 | (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue); | |||
4121 | if (dump_enabled_p ()) | |||
4122 | dump_printf (MSG_NOTE, | |||
4123 | "cost model: Adding cost of checks for loop " | |||
4124 | "versioning to treat misalignment.\n"); | |||
4125 | } | |||
4126 | ||||
4127 | /* Requires loop versioning with alias checks. */ | |||
4128 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)((loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0)) | |||
4129 | { | |||
4130 | /* FIXME: Make cost depend on complexity of individual check. */ | |||
4131 | unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.length (); | |||
4132 | (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue); | |||
4133 | len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.length (); | |||
4134 | if (len) | |||
4135 | /* Count LEN - 1 ANDs and LEN comparisons. */ | |||
4136 | (void) add_stmt_cost (target_cost_data, len * 2 - 1, | |||
4137 | scalar_stmt, vect_prologue); | |||
4138 | len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.length (); | |||
4139 | if (len) | |||
4140 | { | |||
4141 | /* Count LEN - 1 ANDs and LEN comparisons. */ | |||
4142 | unsigned int nstmts = len * 2 - 1; | |||
4143 | /* +1 for each bias that needs adding. */ | |||
4144 | for (unsigned int i = 0; i < len; ++i) | |||
4145 | if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds[i].unsigned_p) | |||
4146 | nstmts += 1; | |||
4147 | (void) add_stmt_cost (target_cost_data, nstmts, | |||
4148 | scalar_stmt, vect_prologue); | |||
4149 | } | |||
4150 | if (dump_enabled_p ()) | |||
4151 | dump_printf (MSG_NOTE, | |||
4152 | "cost model: Adding cost of checks for loop " | |||
4153 | "versioning aliasing.\n"); | |||
4154 | } | |||
4155 | ||||
4156 | /* Requires loop versioning with niter checks. */ | |||
4157 | if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo)((loop_vinfo)->num_iters_assumptions)) | |||
4158 | { | |||
4159 | /* FIXME: Make cost depend on complexity of individual check. */ | |||
4160 | (void) add_stmt_cost (target_cost_data, 1, vector_stmt, | |||
4161 | NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | |||
4162 | if (dump_enabled_p ()) | |||
4163 | dump_printf (MSG_NOTE, | |||
4164 | "cost model: Adding cost of checks for loop " | |||
4165 | "versioning niters.\n"); | |||
4166 | } | |||
4167 | ||||
4168 | if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) | |||
4169 | (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, | |||
4170 | vect_prologue); | |||
4171 | ||||
4172 | /* Count statements in scalar loop. Using this as scalar cost for a single | |||
4173 | iteration for now. | |||
4174 | ||||
4175 | TODO: Add outer loop support. | |||
4176 | ||||
4177 | TODO: Consider assigning different costs to different scalar | |||
4178 | statements. */ | |||
4179 | ||||
4180 | scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost (); | |||
4181 | ||||
4182 | /* Add additional cost for the peeled instructions in prologue and epilogue | |||
4183 | loop. (For fully-masked loops there will be no peeling.) | |||
4184 | ||||
4185 | FORNOW: If we don't know the value of peel_iters for prologue or epilogue | |||
4186 | at compile-time - we assume it's vf/2 (the worst would be vf-1). | |||
4187 | ||||
4188 | TODO: Build an expression that represents peel_iters for prologue and | |||
4189 | epilogue to be used in a run-time test. */ | |||
4190 | ||||
4191 | bool prologue_need_br_taken_cost = false; | |||
4192 | bool prologue_need_br_not_taken_cost = false; | |||
4193 | ||||
4194 | /* Calculate peel_iters_prologue. */ | |||
4195 | if (vect_use_loop_mask_for_alignment_p (loop_vinfo)) | |||
4196 | peel_iters_prologue = 0; | |||
4197 | else if (npeel < 0) | |||
4198 | { | |||
4199 | peel_iters_prologue = assumed_vf / 2; | |||
4200 | if (dump_enabled_p ()) | |||
4201 | dump_printf (MSG_NOTE, "cost model: " | |||
4202 | "prologue peel iters set to vf/2.\n"); | |||
4203 | ||||
4204 | /* If peeled iterations are unknown, count a taken branch and a not taken | |||
4205 | branch per peeled loop. Even if scalar loop iterations are known, | |||
4206 | vector iterations are not known since peeled prologue iterations are | |||
4207 | not known. Hence guards remain the same. */ | |||
4208 | prologue_need_br_taken_cost = true; | |||
4209 | prologue_need_br_not_taken_cost = true; | |||
4210 | } | |||
4211 | else | |||
4212 | { | |||
4213 | peel_iters_prologue = npeel; | |||
4214 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && peel_iters_prologue > 0) | |||
4215 | /* If peeled iterations are known but number of scalar loop | |||
4216 | iterations are unknown, count a taken branch per peeled loop. */ | |||
4217 | prologue_need_br_taken_cost = true; | |||
4218 | } | |||
4219 | ||||
4220 | bool epilogue_need_br_taken_cost = false; | |||
4221 | bool epilogue_need_br_not_taken_cost = false; | |||
4222 | ||||
4223 | /* Calculate peel_iters_epilogue. */ | |||
4224 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
4225 | /* We need to peel exactly one iteration for gaps. */ | |||
4226 | peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps ? 1 : 0; | |||
4227 | else if (npeel < 0) | |||
4228 | { | |||
4229 | /* If peeling for alignment is unknown, loop bound of main loop | |||
4230 | becomes unknown. */ | |||
4231 | peel_iters_epilogue = assumed_vf / 2; | |||
4232 | if (dump_enabled_p ()) | |||
4233 | dump_printf (MSG_NOTE, "cost model: " | |||
4234 | "epilogue peel iters set to vf/2 because " | |||
4235 | "peeling for alignment is unknown.\n"); | |||
4236 | ||||
4237 | /* See the same reason above in peel_iters_prologue calculation. */ | |||
4238 | epilogue_need_br_taken_cost = true; | |||
4239 | epilogue_need_br_not_taken_cost = true; | |||
4240 | } | |||
4241 | else | |||
4242 | { | |||
4243 | peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel); | |||
4244 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && peel_iters_epilogue > 0) | |||
4245 | /* If peeled iterations are known but number of scalar loop | |||
4246 | iterations are unknown, count a taken branch per peeled loop. */ | |||
4247 | epilogue_need_br_taken_cost = true; | |||
4248 | } | |||
4249 | ||||
4250 | stmt_info_for_cost *si; | |||
4251 | int j; | |||
4252 | /* Add costs associated with peel_iters_prologue. */ | |||
4253 | if (peel_iters_prologue) | |||
4254 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) | |||
4255 | { | |||
4256 | (void) add_stmt_cost (target_cost_data, | |||
4257 | si->count * peel_iters_prologue, si->kind, | |||
4258 | si->stmt_info, si->node, si->vectype, | |||
4259 | si->misalign, vect_prologue); | |||
4260 | } | |||
4261 | ||||
4262 | /* Add costs associated with peel_iters_epilogue. */ | |||
4263 | if (peel_iters_epilogue) | |||
4264 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) | |||
4265 | { | |||
4266 | (void) add_stmt_cost (target_cost_data, | |||
4267 | si->count * peel_iters_epilogue, si->kind, | |||
4268 | si->stmt_info, si->node, si->vectype, | |||
4269 | si->misalign, vect_epilogue); | |||
4270 | } | |||
4271 | ||||
4272 | /* Add possible cond_branch_taken/cond_branch_not_taken cost. */ | |||
4273 | ||||
4274 | if (prologue_need_br_taken_cost) | |||
4275 | (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, | |||
4276 | vect_prologue); | |||
4277 | ||||
4278 | if (prologue_need_br_not_taken_cost) | |||
4279 | (void) add_stmt_cost (target_cost_data, 1, | |||
4280 | cond_branch_not_taken, vect_prologue); | |||
4281 | ||||
4282 | if (epilogue_need_br_taken_cost) | |||
4283 | (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, | |||
4284 | vect_epilogue); | |||
4285 | ||||
4286 | if (epilogue_need_br_not_taken_cost) | |||
4287 | (void) add_stmt_cost (target_cost_data, 1, | |||
4288 | cond_branch_not_taken, vect_epilogue); | |||
4289 | ||||
4290 | /* Take care of special costs for rgroup controls of partial vectors. */ | |||
4291 | if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo )->masks.is_empty ())) | |||
4292 | { | |||
4293 | /* Calculate how many masks we need to generate. */ | |||
4294 | unsigned int num_masks = 0; | |||
4295 | rgroup_controls *rgm; | |||
4296 | unsigned int num_vectors_m1; | |||
4297 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)for (num_vectors_m1 = 0; ((loop_vinfo)->masks).iterate ((num_vectors_m1 ), &(rgm)); ++(num_vectors_m1)) | |||
4298 | if (rgm->type) | |||
4299 | num_masks += num_vectors_m1 + 1; | |||
4300 | gcc_assert (num_masks > 0)((void)(!(num_masks > 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4300, __FUNCTION__), 0 : 0)); | |||
4301 | ||||
4302 | /* In the worst case, we need to generate each mask in the prologue | |||
4303 | and in the loop body. One of the loop body mask instructions | |||
4304 | replaces the comparison in the scalar loop, and since we don't | |||
4305 | count the scalar comparison against the scalar body, we shouldn't | |||
4306 | count that vector instruction against the vector body either. | |||
4307 | ||||
4308 | Sometimes we can use unpacks instead of generating prologue | |||
4309 | masks and sometimes the prologue mask will fold to a constant, | |||
4310 | so the actual prologue cost might be smaller. However, it's | |||
4311 | simpler and safer to use the worst-case cost; if this ends up | |||
4312 | being the tie-breaker between vectorizing or not, then it's | |||
4313 | probably better not to vectorize. */ | |||
4314 | (void) add_stmt_cost (target_cost_data, num_masks, | |||
4315 | vector_stmt, NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0, | |||
4316 | vect_prologue); | |||
4317 | (void) add_stmt_cost (target_cost_data, num_masks - 1, | |||
4318 | vector_stmt, NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0, | |||
4319 | vect_body); | |||
4320 | } | |||
4321 | else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo )->lens.is_empty ())) | |||
4322 | { | |||
4323 | /* Referring to the functions vect_set_loop_condition_partial_vectors | |||
4324 | and vect_set_loop_controls_directly, we need to generate each | |||
4325 | length in the prologue and in the loop body if required. Although | |||
4326 | there are some possible optimizations, we consider the worst case | |||
4327 | here. */ | |||
4328 | ||||
4329 | bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0); | |||
4330 | signed char partial_load_store_bias | |||
4331 | = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)(loop_vinfo)->partial_load_store_bias; | |||
4332 | bool need_iterate_p | |||
4333 | = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) | |||
4334 | && !vect_known_niters_smaller_than_vf (loop_vinfo)); | |||
4335 | ||||
4336 | /* Calculate how many statements to be added. */ | |||
4337 | unsigned int prologue_stmts = 0; | |||
4338 | unsigned int body_stmts = 0; | |||
4339 | ||||
4340 | rgroup_controls *rgc; | |||
4341 | unsigned int num_vectors_m1; | |||
4342 | FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)for (num_vectors_m1 = 0; ((loop_vinfo)->lens).iterate ((num_vectors_m1 ), &(rgc)); ++(num_vectors_m1)) | |||
4343 | if (rgc->type) | |||
4344 | { | |||
4345 | /* May need one SHIFT for nitems_total computation. */ | |||
4346 | unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor; | |||
4347 | if (nitems != 1 && !niters_known_p) | |||
4348 | prologue_stmts += 1; | |||
4349 | ||||
4350 | /* May need one MAX and one MINUS for wrap around. */ | |||
4351 | if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc)) | |||
4352 | prologue_stmts += 2; | |||
4353 | ||||
4354 | /* Need one MAX and one MINUS for each batch limit excepting for | |||
4355 | the 1st one. */ | |||
4356 | prologue_stmts += num_vectors_m1 * 2; | |||
4357 | ||||
4358 | unsigned int num_vectors = num_vectors_m1 + 1; | |||
4359 | ||||
4360 | /* Need to set up lengths in prologue, only one MIN required | |||
4361 | for each since start index is zero. */ | |||
4362 | prologue_stmts += num_vectors; | |||
4363 | ||||
4364 | /* If we have a non-zero partial load bias, we need one PLUS | |||
4365 | to adjust the load length. */ | |||
4366 | if (partial_load_store_bias != 0) | |||
4367 | body_stmts += 1; | |||
4368 | ||||
4369 | /* Each may need two MINs and one MINUS to update lengths in body | |||
4370 | for next iteration. */ | |||
4371 | if (need_iterate_p) | |||
4372 | body_stmts += 3 * num_vectors; | |||
4373 | } | |||
4374 | ||||
4375 | (void) add_stmt_cost (target_cost_data, prologue_stmts, | |||
4376 | scalar_stmt, vect_prologue); | |||
4377 | (void) add_stmt_cost (target_cost_data, body_stmts, | |||
4378 | scalar_stmt, vect_body); | |||
4379 | } | |||
4380 | ||||
4381 | /* FORNOW: The scalar outside cost is incremented in one of the | |||
4382 | following ways: | |||
4383 | ||||
4384 | 1. The vectorizer checks for alignment and aliasing and generates | |||
4385 | a condition that allows dynamic vectorization. A cost model | |||
4386 | check is ANDED with the versioning condition. Hence scalar code | |||
4387 | path now has the added cost of the versioning check. | |||
4388 | ||||
4389 | if (cost > th & versioning_check) | |||
4390 | jmp to vector code | |||
4391 | ||||
4392 | Hence run-time scalar is incremented by not-taken branch cost. | |||
4393 | ||||
4394 | 2. The vectorizer then checks if a prologue is required. If the | |||
4395 | cost model check was not done before during versioning, it has to | |||
4396 | be done before the prologue check. | |||
4397 | ||||
4398 | if (cost <= th) | |||
4399 | prologue = scalar_iters | |||
4400 | if (prologue == 0) | |||
4401 | jmp to vector code | |||
4402 | else | |||
4403 | execute prologue | |||
4404 | if (prologue == num_iters) | |||
4405 | go to exit | |||
4406 | ||||
4407 | Hence the run-time scalar cost is incremented by a taken branch, | |||
4408 | plus a not-taken branch, plus a taken branch cost. | |||
4409 | ||||
4410 | 3. The vectorizer then checks if an epilogue is required. If the | |||
4411 | cost model check was not done before during prologue check, it | |||
4412 | has to be done with the epilogue check. | |||
4413 | ||||
4414 | if (prologue == 0) | |||
4415 | jmp to vector code | |||
4416 | else | |||
4417 | execute prologue | |||
4418 | if (prologue == num_iters) | |||
4419 | go to exit | |||
4420 | vector code: | |||
4421 | if ((cost <= th) | (scalar_iters-prologue-epilogue == 0)) | |||
4422 | jmp to epilogue | |||
4423 | ||||
4424 | Hence the run-time scalar cost should be incremented by 2 taken | |||
4425 | branches. | |||
4426 | ||||
4427 | TODO: The back end may reorder the BBS's differently and reverse | |||
4428 | conditions/branch directions. Change the estimates below to | |||
4429 | something more reasonable. */ | |||
4430 | ||||
4431 | /* If the number of iterations is known and we do not do versioning, we can | |||
4432 | decide whether to vectorize at compile time. Hence the scalar version | |||
4433 | do not carry cost model guard costs. */ | |||
4434 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) | |||
4435 | || LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) | |||
4436 | { | |||
4437 | /* Cost model check occurs at versioning. */ | |||
4438 | if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) | |||
4439 | scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken); | |||
4440 | else | |||
4441 | { | |||
4442 | /* Cost model check occurs at prologue generation. */ | |||
4443 | if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0) | |||
4444 | scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken) | |||
4445 | + vect_get_stmt_cost (cond_branch_not_taken); | |||
4446 | /* Cost model check occurs at epilogue generation. */ | |||
4447 | else | |||
4448 | scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken); | |||
4449 | } | |||
4450 | } | |||
4451 | ||||
4452 | /* Complete the target-specific cost calculations. */ | |||
4453 | finish_cost (loop_vinfo->vector_costs, loop_vinfo->scalar_costs, | |||
4454 | &vec_prologue_cost, &vec_inside_cost, &vec_epilogue_cost, | |||
4455 | suggested_unroll_factor); | |||
4456 | ||||
4457 | if (suggested_unroll_factor && *suggested_unroll_factor > 1 | |||
4458 | && LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor != MAX_VECTORIZATION_FACTOR2147483647 | |||
4459 | && !known_le (LOOP_VINFO_VECT_FACTOR (loop_vinfo) *(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo )->vectorization_factor * *suggested_unroll_factor)) | |||
4460 | *suggested_unroll_factor,(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo )->vectorization_factor * *suggested_unroll_factor)) | |||
4461 | LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo))(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo )->vectorization_factor * *suggested_unroll_factor))) | |||
4462 | { | |||
4463 | if (dump_enabled_p ()) | |||
4464 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
4465 | "can't unroll as unrolled vectorization factor larger" | |||
4466 | " than maximum vectorization factor: " | |||
4467 | HOST_WIDE_INT_PRINT_UNSIGNED"%" "l" "u" "\n", | |||
4468 | LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor); | |||
4469 | *suggested_unroll_factor = 1; | |||
4470 | } | |||
4471 | ||||
4472 | vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost); | |||
4473 | ||||
4474 | if (dump_enabled_p ()) | |||
4475 | { | |||
4476 | dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n"); | |||
4477 | dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n", | |||
4478 | vec_inside_cost); | |||
4479 | dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", | |||
4480 | vec_prologue_cost); | |||
4481 | dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", | |||
4482 | vec_epilogue_cost); | |||
4483 | dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n", | |||
4484 | scalar_single_iter_cost); | |||
4485 | dump_printf (MSG_NOTE, " Scalar outside cost: %d\n", | |||
4486 | scalar_outside_cost); | |||
4487 | dump_printf (MSG_NOTE, " Vector outside cost: %d\n", | |||
4488 | vec_outside_cost); | |||
4489 | dump_printf (MSG_NOTE, " prologue iterations: %d\n", | |||
4490 | peel_iters_prologue); | |||
4491 | dump_printf (MSG_NOTE, " epilogue iterations: %d\n", | |||
4492 | peel_iters_epilogue); | |||
4493 | } | |||
4494 | ||||
4495 | /* Calculate number of iterations required to make the vector version | |||
4496 | profitable, relative to the loop bodies only. The following condition | |||
4497 | must hold true: | |||
4498 | SIC * niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC | |||
4499 | where | |||
4500 | SIC = scalar iteration cost, VIC = vector iteration cost, | |||
4501 | VOC = vector outside cost, VF = vectorization factor, | |||
4502 | NPEEL = prologue iterations + epilogue iterations, | |||
4503 | SOC = scalar outside cost for run time cost model check. */ | |||
4504 | ||||
4505 | int saving_per_viter = (scalar_single_iter_cost * assumed_vf | |||
4506 | - vec_inside_cost); | |||
4507 | if (saving_per_viter <= 0) | |||
4508 | { | |||
4509 | if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->force_vectorize) | |||
4510 | warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd, | |||
4511 | "vectorization did not happen for a simd loop"); | |||
4512 | ||||
4513 | if (dump_enabled_p ()) | |||
4514 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |||
4515 | "cost model: the vector iteration cost = %d " | |||
4516 | "divided by the scalar iteration cost = %d " | |||
4517 | "is greater or equal to the vectorization factor = %d" | |||
4518 | ".\n", | |||
4519 | vec_inside_cost, scalar_single_iter_cost, assumed_vf); | |||
4520 | *ret_min_profitable_niters = -1; | |||
4521 | *ret_min_profitable_estimate = -1; | |||
4522 | return; | |||
4523 | } | |||
4524 | ||||
4525 | /* ??? The "if" arm is written to handle all cases; see below for what | |||
4526 | we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ | |||
4527 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
4528 | { | |||
4529 | /* Rewriting the condition above in terms of the number of | |||
4530 | vector iterations (vniters) rather than the number of | |||
4531 | scalar iterations (niters) gives: | |||
4532 | ||||
4533 | SIC * (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC | |||
4534 | ||||
4535 | <==> vniters * (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC | |||
4536 | ||||
4537 | For integer N, X and Y when X > 0: | |||
4538 | ||||
4539 | N * X > Y <==> N >= (Y /[floor] X) + 1. */ | |||
4540 | int outside_overhead = (vec_outside_cost | |||
4541 | - scalar_single_iter_cost * peel_iters_prologue | |||
4542 | - scalar_single_iter_cost * peel_iters_epilogue | |||
4543 | - scalar_outside_cost); | |||
4544 | /* We're only interested in cases that require at least one | |||
4545 | vector iteration. */ | |||
4546 | int min_vec_niters = 1; | |||
4547 | if (outside_overhead > 0) | |||
4548 | min_vec_niters = outside_overhead / saving_per_viter + 1; | |||
4549 | ||||
4550 | if (dump_enabled_p ()) | |||
4551 | dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n", | |||
4552 | min_vec_niters); | |||
4553 | ||||
4554 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
4555 | { | |||
4556 | /* Now that we know the minimum number of vector iterations, | |||
4557 | find the minimum niters for which the scalar cost is larger: | |||
4558 | ||||
4559 | SIC * niters > VIC * vniters + VOC - SOC | |||
4560 | ||||
4561 | We know that the minimum niters is no more than | |||
4562 | vniters * VF + NPEEL, but it might be (and often is) less | |||
4563 | than that if a partial vector iteration is cheaper than the | |||
4564 | equivalent scalar code. */ | |||
4565 | int threshold = (vec_inside_cost * min_vec_niters | |||
4566 | + vec_outside_cost | |||
4567 | - scalar_outside_cost); | |||
4568 | if (threshold <= 0) | |||
4569 | min_profitable_iters = 1; | |||
4570 | else | |||
4571 | min_profitable_iters = threshold / scalar_single_iter_cost + 1; | |||
4572 | } | |||
4573 | else | |||
4574 | /* Convert the number of vector iterations into a number of | |||
4575 | scalar iterations. */ | |||
4576 | min_profitable_iters = (min_vec_niters * assumed_vf | |||
4577 | + peel_iters_prologue | |||
4578 | + peel_iters_epilogue); | |||
4579 | } | |||
4580 | else | |||
4581 | { | |||
4582 | min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) | |||
4583 | * assumed_vf | |||
4584 | - vec_inside_cost * peel_iters_prologue | |||
4585 | - vec_inside_cost * peel_iters_epilogue); | |||
4586 | if (min_profitable_iters <= 0) | |||
4587 | min_profitable_iters = 0; | |||
4588 | else | |||
4589 | { | |||
4590 | min_profitable_iters /= saving_per_viter; | |||
4591 | ||||
4592 | if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters) | |||
4593 | <= (((int) vec_inside_cost * min_profitable_iters) | |||
4594 | + (((int) vec_outside_cost - scalar_outside_cost) | |||
4595 | * assumed_vf))) | |||
4596 | min_profitable_iters++; | |||
4597 | } | |||
4598 | } | |||
4599 | ||||
4600 | if (dump_enabled_p ()) | |||
4601 | dump_printf (MSG_NOTE, | |||
4602 | " Calculated minimum iters for profitability: %d\n", | |||
4603 | min_profitable_iters); | |||
4604 | ||||
4605 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p | |||
4606 | && min_profitable_iters < (assumed_vf + peel_iters_prologue)) | |||
4607 | /* We want the vectorized loop to execute at least once. */ | |||
4608 | min_profitable_iters = assumed_vf + peel_iters_prologue; | |||
4609 | else if (min_profitable_iters < peel_iters_prologue) | |||
4610 | /* For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the | |||
4611 | vectorized loop executes at least once. */ | |||
4612 | min_profitable_iters = peel_iters_prologue; | |||
4613 | ||||
4614 | if (dump_enabled_p ()) | |||
4615 | dump_printf_loc (MSG_NOTE, vect_location, | |||
4616 | " Runtime profitability threshold = %d\n", | |||
4617 | min_profitable_iters); | |||
4618 | ||||
4619 | *ret_min_profitable_niters = min_profitable_iters; | |||
4620 | ||||
4621 | /* Calculate number of iterations required to make the vector version | |||
4622 | profitable, relative to the loop bodies only. | |||
4623 | ||||
4624 | Non-vectorized variant is SIC * niters and it must win over vector | |||
4625 | variant on the expected loop trip count. The following condition must hold true: | |||
4626 | SIC * niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC */ | |||
4627 | ||||
4628 | if (vec_outside_cost <= 0) | |||
4629 | min_profitable_estimate = 0; | |||
4630 | /* ??? This "else if" arm is written to handle all cases; see below for | |||
4631 | what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ | |||
4632 | else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
4633 | { | |||
4634 | /* This is a repeat of the code above, but with + SOC rather | |||
4635 | than - SOC. */ | |||
4636 | int outside_overhead = (vec_outside_cost | |||
4637 | - scalar_single_iter_cost * peel_iters_prologue | |||
4638 | - scalar_single_iter_cost * peel_iters_epilogue | |||
4639 | + scalar_outside_cost); | |||
4640 | int min_vec_niters = 1; | |||
4641 | if (outside_overhead > 0) | |||
4642 | min_vec_niters = outside_overhead / saving_per_viter + 1; | |||
4643 | ||||
4644 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | |||
4645 | { | |||
4646 | int threshold = (vec_inside_cost * min_vec_niters | |||
4647 | + vec_outside_cost | |||
4648 | + scalar_outside_cost); | |||
4649 | min_profitable_estimate = threshold / scalar_single_iter_cost + 1; | |||
4650 | } | |||
4651 | else | |||
4652 | min_profitable_estimate = (min_vec_niters * assumed_vf | |||
4653 | + peel_iters_prologue | |||
4654 | + peel_iters_epilogue); | |||
4655 | } | |||
4656 | else | |||
4657 | { | |||
4658 | min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) | |||
4659 | * assumed_vf | |||
4660 | - vec_inside_cost * peel_iters_prologue | |||
4661 | - vec_inside_cost * peel_iters_epilogue) | |||
4662 | / ((scalar_single_iter_cost * assumed_vf) | |||
4663 | - vec_inside_cost); | |||
4664 | } | |||
4665 | min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters)((min_profitable_estimate) > (min_profitable_iters) ? (min_profitable_estimate ) : (min_profitable_iters)); | |||
4666 | if (dump_enabled_p ()) | |||
4667 | dump_printf_loc (MSG_NOTE, vect_location, | |||
4668 | " Static estimate profitability threshold = %d\n", | |||
4669 | min_profitable_estimate); | |||
4670 | ||||
4671 | *ret_min_profitable_estimate = min_profitable_estimate; | |||
4672 | } | |||
4673 | ||||
4674 | /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET | |||
4675 | vector elements (not bits) for a vector with NELT elements. */ | |||
4676 | static void | |||
4677 | calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, | |||
4678 | vec_perm_builder *sel) | |||
4679 | { | |||
4680 | /* The encoding is a single stepped pattern. Any wrap-around is handled | |||
4681 | by vec_perm_indices. */ | |||
4682 | sel->new_vector (nelt, 1, 3); | |||
4683 | for (unsigned int i = 0; i < 3; i++) | |||
4684 | sel->quick_push (i + offset); | |||
4685 | } | |||
4686 | ||||
4687 | /* Checks whether the target supports whole-vector shifts for vectors of mode | |||
4688 | MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_ | |||
4689 | it supports vec_perm_const with masks for all necessary shift amounts. */ | |||
4690 | static bool | |||
4691 | have_whole_vector_shift (machine_mode mode) | |||
4692 | { | |||
4693 | if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) | |||
4694 | return true; | |||
4695 | ||||
4696 | /* Variable-length vectors should be handled via the optab. */ | |||
4697 | unsigned int nelt; | |||
4698 | if (!GET_MODE_NUNITS (mode).is_constant (&nelt)) | |||
4699 | return false; | |||
4700 | ||||
4701 | vec_perm_builder sel; | |||
4702 | vec_perm_indices indices; | |||
4703 | for (unsigned int i = nelt / 2; i >= 1; i /= 2) | |||
4704 | { | |||
4705 | calc_vec_perm_mask_for_shift (i, nelt, &sel); | |||
4706 | indices.new_vector (sel, 2, nelt); | |||
4707 | if (!can_vec_perm_const_p (mode, mode, indices, false)) | |||
4708 | return false; | |||
4709 | } | |||
4710 | return true; | |||
4711 | } | |||
4712 | ||||
4713 | /* Return true if (a) STMT_INFO is a DOT_PROD_EXPR reduction whose | |||
4714 | multiplication operands have differing signs and (b) we intend | |||
4715 | to emulate the operation using a series of signed DOT_PROD_EXPRs. | |||
4716 | See vect_emulate_mixed_dot_prod for the actual sequence used. */ | |||
4717 | ||||
4718 | static bool | |||
4719 | vect_is_emulated_mixed_dot_prod (loop_vec_info loop_vinfo, | |||
4720 | stmt_vec_info stmt_info) | |||
4721 | { | |||
4722 | gassign *assign = dyn_cast<gassign *> (stmt_info->stmt); | |||
4723 | if (!assign || gimple_assign_rhs_code (assign) != DOT_PROD_EXPR) | |||
4724 | return false; | |||
4725 | ||||
4726 | tree rhs1 = gimple_assign_rhs1 (assign); | |||
4727 | tree rhs2 = gimple_assign_rhs2 (assign); | |||
4728 | if (TYPE_SIGN (TREE_TYPE (rhs1))((signop) ((tree_class_check ((((contains_struct_check ((rhs1 ), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4728, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4728, __FUNCTION__))->base.u.bits.unsigned_flag)) == TYPE_SIGN (TREE_TYPE (rhs2))((signop) ((tree_class_check ((((contains_struct_check ((rhs2 ), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4728, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4728, __FUNCTION__))->base.u.bits.unsigned_flag))) | |||
4729 | return false; | |||
4730 | ||||
4731 | stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); | |||
4732 | gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4732, __FUNCTION__), 0 : 0)); | |||
4733 | return !directly_supported_p (DOT_PROD_EXPR, | |||
4734 | STMT_VINFO_REDUC_VECTYPE_IN (reduc_info)(reduc_info)->reduc_vectype_in, | |||
4735 | optab_vector_mixed_sign); | |||
4736 | } | |||
4737 | ||||
4738 | /* TODO: Close dependency between vect_model_*_cost and vectorizable_* | |||
4739 | functions. Design better to avoid maintenance issues. */ | |||
4740 | ||||
4741 | /* Function vect_model_reduction_cost. | |||
4742 | ||||
4743 | Models cost for a reduction operation, including the vector ops | |||
4744 | generated within the strip-mine loop in some cases, the initial | |||
4745 | definition before the loop, and the epilogue code that must be generated. */ | |||
4746 | ||||
4747 | static void | |||
4748 | vect_model_reduction_cost (loop_vec_info loop_vinfo, | |||
4749 | stmt_vec_info stmt_info, internal_fn reduc_fn, | |||
4750 | vect_reduction_type reduction_type, | |||
4751 | int ncopies, stmt_vector_for_cost *cost_vec) | |||
4752 | { | |||
4753 | int prologue_cost = 0, epilogue_cost = 0, inside_cost = 0; | |||
4754 | tree vectype; | |||
4755 | machine_mode mode; | |||
4756 | class loop *loop = NULLnullptr; | |||
4757 | ||||
4758 | if (loop_vinfo) | |||
4759 | loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
4760 | ||||
4761 | /* Condition reductions generate two reductions in the loop. */ | |||
4762 | if (reduction_type == COND_REDUCTION) | |||
4763 | ncopies *= 2; | |||
4764 | ||||
4765 | vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; | |||
4766 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4766, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); | |||
4767 | stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); | |||
4768 | ||||
4769 | gimple_match_op op; | |||
4770 | if (!gimple_extract_op (orig_stmt_info->stmt, &op)) | |||
4771 | gcc_unreachable ()(fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4771, __FUNCTION__)); | |||
4772 | ||||
4773 | bool emulated_mixed_dot_prod | |||
4774 | = vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info); | |||
4775 | if (reduction_type == EXTRACT_LAST_REDUCTION) | |||
4776 | /* No extra instructions are needed in the prologue. The loop body | |||
4777 | operations are costed in vectorizable_condition. */ | |||
4778 | inside_cost = 0; | |||
4779 | else if (reduction_type == FOLD_LEFT_REDUCTION) | |||
4780 | { | |||
4781 | /* No extra instructions needed in the prologue. */ | |||
4782 | prologue_cost = 0; | |||
4783 | ||||
4784 | if (reduc_fn != IFN_LAST) | |||
4785 | /* Count one reduction-like operation per vector. */ | |||
4786 | inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar, | |||
4787 | stmt_info, 0, vect_body); | |||
4788 | else | |||
4789 | { | |||
4790 | /* Use NELEMENTS extracts and NELEMENTS scalar ops. */ | |||
4791 | unsigned int nelements = ncopies * vect_nunits_for_cost (vectype); | |||
4792 | inside_cost = record_stmt_cost (cost_vec, nelements, | |||
4793 | vec_to_scalar, stmt_info, 0, | |||
4794 | vect_body); | |||
4795 | inside_cost += record_stmt_cost (cost_vec, nelements, | |||
4796 | scalar_stmt, stmt_info, 0, | |||
4797 | vect_body); | |||
4798 | } | |||
4799 | } | |||
4800 | else | |||
4801 | { | |||
4802 | /* Add in the cost of the initial definitions. */ | |||
4803 | int prologue_stmts; | |||
4804 | if (reduction_type == COND_REDUCTION) | |||
4805 | /* For cond reductions we have four vectors: initial index, step, | |||
4806 | initial result of the data reduction, initial value of the index | |||
4807 | reduction. */ | |||
4808 | prologue_stmts = 4; | |||
4809 | else if (emulated_mixed_dot_prod) | |||
4810 | /* We need the initial reduction value and two invariants: | |||
4811 | one that contains the minimum signed value and one that | |||
4812 | contains half of its negative. */ | |||
4813 | prologue_stmts = 3; | |||
4814 | else | |||
4815 | prologue_stmts = 1; | |||
4816 | prologue_cost += record_stmt_cost (cost_vec, prologue_stmts, | |||
4817 | scalar_to_vec, stmt_info, 0, | |||
4818 | vect_prologue); | |||
4819 | } | |||
4820 | ||||
4821 | /* Determine cost of epilogue code. | |||
4822 | ||||
4823 | We have a reduction operator that will reduce the vector in one statement. | |||
4824 | Also requires scalar extract. */ | |||
4825 | ||||
4826 | if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info)) | |||
4827 | { | |||
4828 | if (reduc_fn != IFN_LAST) | |||
4829 | { | |||
4830 | if (reduction_type == COND_REDUCTION) | |||
4831 | { | |||
4832 | /* An EQ stmt and an COND_EXPR stmt. */ | |||
4833 | epilogue_cost += record_stmt_cost (cost_vec, 2, | |||
4834 | vector_stmt, stmt_info, 0, | |||
4835 | vect_epilogue); | |||
4836 | /* Reduction of the max index and a reduction of the found | |||
4837 | values. */ | |||
4838 | epilogue_cost += record_stmt_cost (cost_vec, 2, | |||
4839 | vec_to_scalar, stmt_info, 0, | |||
4840 | vect_epilogue); | |||
4841 | /* A broadcast of the max value. */ | |||
4842 | epilogue_cost += record_stmt_cost (cost_vec, 1, | |||
4843 | scalar_to_vec, stmt_info, 0, | |||
4844 | vect_epilogue); | |||
4845 | } | |||
4846 | else | |||
4847 | { | |||
4848 | epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, | |||
4849 | stmt_info, 0, vect_epilogue); | |||
4850 | epilogue_cost += record_stmt_cost (cost_vec, 1, | |||
4851 | vec_to_scalar, stmt_info, 0, | |||
4852 | vect_epilogue); | |||
4853 | } | |||
4854 | } | |||
4855 | else if (reduction_type == COND_REDUCTION) | |||
4856 | { | |||
4857 | unsigned estimated_nunits = vect_nunits_for_cost (vectype); | |||
4858 | /* Extraction of scalar elements. */ | |||
4859 | epilogue_cost += record_stmt_cost (cost_vec, | |||
4860 | 2 * estimated_nunits, | |||
4861 | vec_to_scalar, stmt_info, 0, | |||
4862 | vect_epilogue); | |||
4863 | /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ | |||
4864 | epilogue_cost += record_stmt_cost (cost_vec, | |||
4865 | 2 * estimated_nunits - 3, | |||
4866 | scalar_stmt, stmt_info, 0, | |||
4867 | vect_epilogue); | |||
4868 | } | |||
4869 | else if (reduction_type == EXTRACT_LAST_REDUCTION | |||
4870 | || reduction_type == FOLD_LEFT_REDUCTION) | |||
4871 | /* No extra instructions need in the epilogue. */ | |||
4872 | ; | |||
4873 | else | |||
4874 | { | |||
4875 | int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)((tree_class_check ((vectype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4875, __FUNCTION__))->type_common.size)); | |||
4876 | tree bitsize = TYPE_SIZE (op.type)((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4876, __FUNCTION__))->type_common.size); | |||
4877 | int element_bitsize = tree_to_uhwi (bitsize); | |||
4878 | int nelements = vec_size_in_bits / element_bitsize; | |||
4879 | ||||
4880 | if (op.code == COND_EXPR) | |||
4881 | op.code = MAX_EXPR; | |||
4882 | ||||
4883 | /* We have a whole vector shift available. */ | |||
4884 | if (VECTOR_MODE_P (mode)(((enum mode_class) mode_class[mode]) == MODE_VECTOR_BOOL || ( (enum mode_class) mode_class[mode]) == MODE_VECTOR_INT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_UACCUM) | |||
4885 | && directly_supported_p (op.code, vectype) | |||
4886 | && have_whole_vector_shift (mode)) | |||
4887 | { | |||
4888 | /* Final reduction via vector shifts and the reduction operator. | |||
4889 | Also requires scalar extract. */ | |||
4890 | epilogue_cost += record_stmt_cost (cost_vec, | |||
4891 | exact_log2 (nelements) * 2, | |||
4892 | vector_stmt, stmt_info, 0, | |||
4893 | vect_epilogue); | |||
4894 | epilogue_cost += record_stmt_cost (cost_vec, 1, | |||
4895 | vec_to_scalar, stmt_info, 0, | |||
4896 | vect_epilogue); | |||
4897 | } | |||
4898 | else | |||
4899 | /* Use extracts and reduction op for final reduction. For N | |||
4900 | elements, we have N extracts and N-1 reduction ops. */ | |||
4901 | epilogue_cost += record_stmt_cost (cost_vec, | |||
4902 | nelements + nelements - 1, | |||
4903 | vector_stmt, stmt_info, 0, | |||
4904 | vect_epilogue); | |||
4905 | } | |||
4906 | } | |||
4907 | ||||
4908 | if (dump_enabled_p ()) | |||
4909 | dump_printf (MSG_NOTE, | |||
4910 | "vect_model_reduction_cost: inside_cost = %d, " | |||
4911 | "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost, | |||
4912 | prologue_cost, epilogue_cost); | |||
4913 | } | |||
4914 | ||||
4915 | /* SEQ is a sequence of instructions that initialize the reduction | |||
4916 | described by REDUC_INFO. Emit them in the appropriate place. */ | |||
4917 | ||||
4918 | static void | |||
4919 | vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo, | |||
4920 | stmt_vec_info reduc_info, gimple *seq) | |||
4921 | { | |||
4922 | if (reduc_info->reused_accumulator) | |||
4923 | { | |||
4924 | /* When reusing an accumulator from the main loop, we only need | |||
4925 | initialization instructions if the main loop can be skipped. | |||
4926 | In that case, emit the initialization instructions at the end | |||
4927 | of the guard block that does the skip. */ | |||
4928 | edge skip_edge = loop_vinfo->skip_main_loop_edge; | |||
4929 | gcc_assert (skip_edge)((void)(!(skip_edge) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4929, __FUNCTION__), 0 : 0)); | |||
4930 | gimple_stmt_iterator gsi = gsi_last_bb (skip_edge->src); | |||
4931 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); | |||
4932 | } | |||
4933 | else | |||
4934 | { | |||
4935 | /* The normal case: emit the initialization instructions on the | |||
4936 | preheader edge. */ | |||
4937 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
4938 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), seq); | |||
4939 | } | |||
4940 | } | |||
4941 | ||||
4942 | /* Function get_initial_def_for_reduction | |||
4943 | ||||
4944 | Input: | |||
4945 | REDUC_INFO - the info_for_reduction | |||
4946 | INIT_VAL - the initial value of the reduction variable | |||
4947 | NEUTRAL_OP - a value that has no effect on the reduction, as per | |||
4948 | neutral_op_for_reduction | |||
4949 | ||||
4950 | Output: | |||
4951 | Return a vector variable, initialized according to the operation that | |||
4952 | STMT_VINFO performs. This vector will be used as the initial value | |||
4953 | of the vector of partial results. | |||
4954 | ||||
4955 | The value we need is a vector in which element 0 has value INIT_VAL | |||
4956 | and every other element has value NEUTRAL_OP. */ | |||
4957 | ||||
4958 | static tree | |||
4959 | get_initial_def_for_reduction (loop_vec_info loop_vinfo, | |||
4960 | stmt_vec_info reduc_info, | |||
4961 | tree init_val, tree neutral_op) | |||
4962 | { | |||
4963 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | |||
4964 | tree scalar_type = TREE_TYPE (init_val)((contains_struct_check ((init_val), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4964, __FUNCTION__))->typed.type); | |||
4965 | tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); | |||
4966 | tree init_def; | |||
4967 | gimple_seq stmts = NULLnullptr; | |||
4968 | ||||
4969 | gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4969, __FUNCTION__), 0 : 0)); | |||
4970 | ||||
4971 | gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE || ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE || ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE )) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4972, __FUNCTION__), 0 : 0)) | |||
4972 | || SCALAR_FLOAT_TYPE_P (scalar_type))((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE || ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE || ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE )) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4972, __FUNCTION__), 0 : 0)); | |||
4973 | ||||
4974 | gcc_assert (nested_in_vect_loop_p (loop, reduc_info)((void)(!(nested_in_vect_loop_p (loop, reduc_info) || loop == (gimple_bb (reduc_info->stmt))->loop_father) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4975, __FUNCTION__), 0 : 0)) | |||
4975 | || loop == (gimple_bb (reduc_info->stmt))->loop_father)((void)(!(nested_in_vect_loop_p (loop, reduc_info) || loop == (gimple_bb (reduc_info->stmt))->loop_father) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4975, __FUNCTION__), 0 : 0)); | |||
4976 | ||||
4977 | if (operand_equal_p (init_val, neutral_op)) | |||
4978 | { | |||
4979 | /* If both elements are equal then the vector described above is | |||
4980 | just a splat. */ | |||
4981 | neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4981, __FUNCTION__))->typed.type), neutral_op); | |||
4982 | init_def = gimple_build_vector_from_val (&stmts, vectype, neutral_op); | |||
4983 | } | |||
4984 | else | |||
4985 | { | |||
4986 | neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4986, __FUNCTION__))->typed.type), neutral_op); | |||
4987 | init_val = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 4987, __FUNCTION__))->typed.type), init_val); | |||
4988 | if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) | |||
4989 | { | |||
4990 | /* Construct a splat of NEUTRAL_OP and insert INIT_VAL into | |||
4991 | element 0. */ | |||
4992 | init_def = gimple_build_vector_from_val (&stmts, vectype, | |||
4993 | neutral_op); | |||
4994 | init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, | |||
4995 | vectype, init_def, init_val); | |||
4996 | } | |||
4997 | else | |||
4998 | { | |||
4999 | /* Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. */ | |||
5000 | tree_vector_builder elts (vectype, 1, 2); | |||
5001 | elts.quick_push (init_val); | |||
5002 | elts.quick_push (neutral_op); | |||
5003 | init_def = gimple_build_vector (&stmts, &elts); | |||
5004 | } | |||
5005 | } | |||
5006 | ||||
5007 | if (stmts) | |||
5008 | vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, stmts); | |||
5009 | return init_def; | |||
5010 | } | |||
5011 | ||||
5012 | /* Get at the initial defs for the reduction PHIs for REDUC_INFO, | |||
5013 | which performs a reduction involving GROUP_SIZE scalar statements. | |||
5014 | NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP | |||
5015 | is nonnull, introducing extra elements of that value will not change the | |||
5016 | result. */ | |||
5017 | ||||
5018 | static void | |||
5019 | get_initial_defs_for_reduction (loop_vec_info loop_vinfo, | |||
5020 | stmt_vec_info reduc_info, | |||
5021 | vec<tree> *vec_oprnds, | |||
5022 | unsigned int number_of_vectors, | |||
5023 | unsigned int group_size, tree neutral_op) | |||
5024 | { | |||
5025 | vec<tree> &initial_values = reduc_info->reduc_initial_values; | |||
5026 | unsigned HOST_WIDE_INTlong nunits; | |||
5027 | unsigned j, number_of_places_left_in_vector; | |||
5028 | tree vector_type = STMT_VINFO_VECTYPE (reduc_info)(reduc_info)->vectype; | |||
5029 | unsigned int i; | |||
5030 | ||||
5031 | gcc_assert (group_size == initial_values.length () || neutral_op)((void)(!(group_size == initial_values.length () || neutral_op ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5031, __FUNCTION__), 0 : 0)); | |||
5032 | ||||
5033 | /* NUMBER_OF_COPIES is the number of times we need to use the same values in | |||
5034 | created vectors. It is greater than 1 if unrolling is performed. | |||
5035 | ||||
5036 | For example, we have two scalar operands, s1 and s2 (e.g., group of | |||
5037 | strided accesses of size two), while NUNITS is four (i.e., four scalars | |||
5038 | of this type can be packed in a vector). The output vector will contain | |||
5039 | two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES | |||
5040 | will be 2). | |||
5041 | ||||
5042 | If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several | |||
5043 | vectors containing the operands. | |||
5044 | ||||
5045 | For example, NUNITS is four as before, and the group size is 8 | |||
5046 | (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and | |||
5047 | {s5, s6, s7, s8}. */ | |||
5048 | ||||
5049 | if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) | |||
5050 | nunits = group_size; | |||
5051 | ||||
5052 | number_of_places_left_in_vector = nunits; | |||
5053 | bool constant_p = true; | |||
5054 | tree_vector_builder elts (vector_type, nunits, 1); | |||
5055 | elts.quick_grow (nunits); | |||
5056 | gimple_seq ctor_seq = NULLnullptr; | |||
5057 | for (j = 0; j < nunits * number_of_vectors; ++j) | |||
5058 | { | |||
5059 | tree op; | |||
5060 | i = j % group_size; | |||
5061 | ||||
5062 | /* Get the def before the loop. In reduction chain we have only | |||
5063 | one initial value. Else we have as many as PHIs in the group. */ | |||
5064 | if (i >= initial_values.length () || (j > i && neutral_op)) | |||
5065 | op = neutral_op; | |||
5066 | else | |||
5067 | op = initial_values[i]; | |||
5068 | ||||
5069 | /* Create 'vect_ = {op0,op1,...,opn}'. */ | |||
5070 | number_of_places_left_in_vector--; | |||
5071 | elts[nunits - number_of_places_left_in_vector - 1] = op; | |||
5072 | if (!CONSTANT_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code ) (op)->base.code))] == tcc_constant)) | |||
5073 | constant_p = false; | |||
5074 | ||||
5075 | if (number_of_places_left_in_vector == 0) | |||
5076 | { | |||
5077 | tree init; | |||
5078 | if (constant_p && !neutral_op | |||
5079 | ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) | |||
5080 | : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), nunits))) | |||
5081 | /* Build the vector directly from ELTS. */ | |||
5082 | init = gimple_build_vector (&ctor_seq, &elts); | |||
5083 | else if (neutral_op) | |||
5084 | { | |||
5085 | /* Build a vector of the neutral value and shift the | |||
5086 | other elements into place. */ | |||
5087 | init = gimple_build_vector_from_val (&ctor_seq, vector_type, | |||
5088 | neutral_op); | |||
5089 | int k = nunits; | |||
5090 | while (k > 0 && elts[k - 1] == neutral_op) | |||
5091 | k -= 1; | |||
5092 | while (k > 0) | |||
5093 | { | |||
5094 | k -= 1; | |||
5095 | init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, | |||
5096 | vector_type, init, elts[k]); | |||
5097 | } | |||
5098 | } | |||
5099 | else | |||
5100 | { | |||
5101 | /* First time round, duplicate ELTS to fill the | |||
5102 | required number of vectors. */ | |||
5103 | duplicate_and_interleave (loop_vinfo, &ctor_seq, vector_type, | |||
5104 | elts, number_of_vectors, *vec_oprnds); | |||
5105 | break; | |||
5106 | } | |||
5107 | vec_oprnds->quick_push (init); | |||
5108 | ||||
5109 | number_of_places_left_in_vector = nunits; | |||
5110 | elts.new_vector (vector_type, nunits, 1); | |||
5111 | elts.quick_grow (nunits); | |||
5112 | constant_p = true; | |||
5113 | } | |||
5114 | } | |||
5115 | if (ctor_seq != NULLnullptr) | |||
5116 | vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq); | |||
5117 | } | |||
5118 | ||||
5119 | /* For a statement STMT_INFO taking part in a reduction operation return | |||
5120 | the stmt_vec_info the meta information is stored on. */ | |||
5121 | ||||
5122 | stmt_vec_info | |||
5123 | info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) | |||
5124 | { | |||
5125 | stmt_info = vect_orig_stmt (stmt_info); | |||
5126 | gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info))((void)(!((stmt_info)->reduc_def) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5126, __FUNCTION__), 0 : 0)); | |||
5127 | if (!is_a <gphi *> (stmt_info->stmt) | |||
5128 | || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) | |||
5129 | stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def; | |||
5130 | gphi *phi = as_a <gphi *> (stmt_info->stmt); | |||
5131 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) | |||
5132 | { | |||
5133 | if (gimple_phi_num_args (phi) == 1) | |||
5134 | stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def; | |||
5135 | } | |||
5136 | else if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle) | |||
5137 | { | |||
5138 | stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi)); | |||
5139 | if (info && STMT_VINFO_DEF_TYPE (info)(info)->def_type == vect_double_reduction_def) | |||
5140 | stmt_info = info; | |||
5141 | } | |||
5142 | return stmt_info; | |||
5143 | } | |||
5144 | ||||
5145 | /* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that | |||
5146 | REDUC_INFO can build on. Adjust REDUC_INFO and return true if so, otherwise | |||
5147 | return false. */ | |||
5148 | ||||
5149 | static bool | |||
5150 | vect_find_reusable_accumulator (loop_vec_info loop_vinfo, | |||
5151 | stmt_vec_info reduc_info) | |||
5152 | { | |||
5153 | loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; | |||
5154 | if (!main_loop_vinfo) | |||
5155 | return false; | |||
5156 | ||||
5157 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type != TREE_CODE_REDUCTION) | |||
5158 | return false; | |||
5159 | ||||
5160 | unsigned int num_phis = reduc_info->reduc_initial_values.length (); | |||
5161 | auto_vec<tree, 16> main_loop_results (num_phis); | |||
5162 | auto_vec<tree, 16> initial_values (num_phis); | |||
5163 | if (edge main_loop_edge = loop_vinfo->main_loop_edge) | |||
5164 | { | |||
5165 | /* The epilogue loop can be entered either from the main loop or | |||
5166 | from an earlier guard block. */ | |||
5167 | edge skip_edge = loop_vinfo->skip_main_loop_edge; | |||
5168 | for (tree incoming_value : reduc_info->reduc_initial_values) | |||
5169 | { | |||
5170 | /* Look for: | |||
5171 | ||||
5172 | INCOMING_VALUE = phi<MAIN_LOOP_RESULT(main loop), | |||
5173 | INITIAL_VALUE(guard block)>. */ | |||
5174 | gcc_assert (TREE_CODE (incoming_value) == SSA_NAME)((void)(!(((enum tree_code) (incoming_value)->base.code) == SSA_NAME) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5174, __FUNCTION__), 0 : 0)); | |||
5175 | ||||
5176 | gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (incoming_value)(tree_check ((incoming_value), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5176, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt); | |||
5177 | gcc_assert (gimple_bb (phi) == main_loop_edge->dest)((void)(!(gimple_bb (phi) == main_loop_edge->dest) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5177, __FUNCTION__), 0 : 0)); | |||
5178 | ||||
5179 | tree from_main_loop = PHI_ARG_DEF_FROM_EDGE (phi, main_loop_edge)gimple_phi_arg_def (((phi)), ((main_loop_edge)->dest_idx)); | |||
5180 | tree from_skip = PHI_ARG_DEF_FROM_EDGE (phi, skip_edge)gimple_phi_arg_def (((phi)), ((skip_edge)->dest_idx)); | |||
5181 | ||||
5182 | main_loop_results.quick_push (from_main_loop); | |||
5183 | initial_values.quick_push (from_skip); | |||
5184 | } | |||
5185 | } | |||
5186 | else | |||
5187 | /* The main loop dominates the epilogue loop. */ | |||
5188 | main_loop_results.splice (reduc_info->reduc_initial_values); | |||
5189 | ||||
5190 | /* See if the main loop has the kind of accumulator we need. */ | |||
5191 | vect_reusable_accumulator *accumulator | |||
5192 | = main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]); | |||
5193 | if (!accumulator | |||
5194 | || num_phis != accumulator->reduc_info->reduc_scalar_results.length () | |||
5195 | || !std::equal (main_loop_results.begin (), main_loop_results.end (), | |||
5196 | accumulator->reduc_info->reduc_scalar_results.begin ())) | |||
5197 | return false; | |||
5198 | ||||
5199 | /* Handle the case where we can reduce wider vectors to narrower ones. */ | |||
5200 | tree vectype = STMT_VINFO_VECTYPE (reduc_info)(reduc_info)->vectype; | |||
5201 | tree old_vectype = TREE_TYPE (accumulator->reduc_input)((contains_struct_check ((accumulator->reduc_input), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5201, __FUNCTION__))->typed.type); | |||
5202 | unsigned HOST_WIDE_INTlong m; | |||
5203 | if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype), | |||
5204 | TYPE_VECTOR_SUBPARTS (vectype), &m)) | |||
5205 | return false; | |||
5206 | /* Check the intermediate vector types and operations are available. */ | |||
5207 | tree prev_vectype = old_vectype; | |||
5208 | poly_uint64 intermediate_nunits = TYPE_VECTOR_SUBPARTS (old_vectype); | |||
5209 | while (known_gt (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype))(!maybe_le (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype )))) | |||
5210 | { | |||
5211 | intermediate_nunits = exact_div (intermediate_nunits, 2); | |||
5212 | tree intermediate_vectype = get_related_vectype_for_scalar_type | |||
5213 | (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5213, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5213, __FUNCTION__))->typed.type), intermediate_nunits); | |||
5214 | if (!intermediate_vectype | |||
5215 | || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code, | |||
5216 | intermediate_vectype) | |||
5217 | || !can_vec_extract (TYPE_MODE (prev_vectype)((((enum tree_code) ((tree_class_check ((prev_vectype), (tcc_type ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5217, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (prev_vectype) : (prev_vectype)->type_common.mode), | |||
5218 | TYPE_MODE (intermediate_vectype)((((enum tree_code) ((tree_class_check ((intermediate_vectype ), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5218, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (intermediate_vectype) : (intermediate_vectype)->type_common .mode))) | |||
5219 | return false; | |||
5220 | prev_vectype = intermediate_vectype; | |||
5221 | } | |||
5222 | ||||
5223 | /* Non-SLP reductions might apply an adjustment after the reduction | |||
5224 | operation, in order to simplify the initialization of the accumulator. | |||
5225 | If the epilogue loop carries on from where the main loop left off, | |||
5226 | it should apply the same adjustment to the final reduction result. | |||
5227 | ||||
5228 | If the epilogue loop can also be entered directly (rather than via | |||
5229 | the main loop), we need to be able to handle that case in the same way, | |||
5230 | with the same adjustment. (In principle we could add a PHI node | |||
5231 | to select the correct adjustment, but in practice that shouldn't be | |||
5232 | necessary.) */ | |||
5233 | tree main_adjustment | |||
5234 | = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info)(accumulator->reduc_info)->reduc_epilogue_adjustment; | |||
5235 | if (loop_vinfo->main_loop_edge && main_adjustment) | |||
5236 | { | |||
5237 | gcc_assert (num_phis == 1)((void)(!(num_phis == 1) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5237, __FUNCTION__), 0 : 0)); | |||
5238 | tree initial_value = initial_values[0]; | |||
5239 | /* Check that we can use INITIAL_VALUE as the adjustment and | |||
5240 | initialize the accumulator with a neutral value instead. */ | |||
5241 | if (!operand_equal_p (initial_value, main_adjustment)) | |||
5242 | return false; | |||
5243 | code_helper code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code; | |||
5244 | initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value)((contains_struct_check ((initial_value), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5244, __FUNCTION__))->typed.type), | |||
5245 | code, initial_value); | |||
5246 | } | |||
5247 | STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment = main_adjustment; | |||
5248 | reduc_info->reduc_initial_values.truncate (0); | |||
5249 | reduc_info->reduc_initial_values.splice (initial_values); | |||
5250 | reduc_info->reused_accumulator = accumulator; | |||
5251 | return true; | |||
5252 | } | |||
5253 | ||||
5254 | /* Reduce the vector VEC_DEF down to VECTYPE with reduction operation | |||
5255 | CODE emitting stmts before GSI. Returns a vector def of VECTYPE. */ | |||
5256 | ||||
5257 | static tree | |||
5258 | vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code, | |||
5259 | gimple_seq *seq) | |||
5260 | { | |||
5261 | unsigned nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_def)((contains_struct_check ((vec_def), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5261, __FUNCTION__))->typed.type)).to_constant (); | |||
5262 | unsigned nunits1 = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); | |||
5263 | tree stype = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5263, __FUNCTION__))->typed.type); | |||
5264 | tree new_temp = vec_def; | |||
5265 | while (nunits > nunits1) | |||
5266 | { | |||
5267 | nunits /= 2; | |||
5268 | tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5268, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), | |||
5269 | stype, nunits); | |||
5270 | unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5270, __FUNCTION__))->type_common.size)); | |||
5271 | ||||
5272 | /* The target has to make sure we support lowpart/highpart | |||
5273 | extraction, either via direct vector extract or through | |||
5274 | an integer mode punning. */ | |||
5275 | tree dst1, dst2; | |||
5276 | gimple *epilog_stmt; | |||
5277 | if (convert_optab_handler (vec_extract_optab, | |||
5278 | TYPE_MODE (TREE_TYPE (new_temp))((((enum tree_code) ((tree_class_check ((((contains_struct_check ((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5278, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5278, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (((contains_struct_check ((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5278, __FUNCTION__))->typed.type)) : (((contains_struct_check ((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5278, __FUNCTION__))->typed.type))->type_common.mode), | |||
5279 | TYPE_MODE (vectype1)((((enum tree_code) ((tree_class_check ((vectype1), (tcc_type ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5279, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype1) : (vectype1)->type_common.mode)) | |||
5280 | != CODE_FOR_nothing) | |||
5281 | { | |||
5282 | /* Extract sub-vectors directly once vec_extract becomes | |||
5283 | a conversion optab. */ | |||
5284 | dst1 = make_ssa_name (vectype1); | |||
5285 | epilog_stmt | |||
5286 | = gimple_build_assign (dst1, BIT_FIELD_REF, | |||
5287 | build3 (BIT_FIELD_REF, vectype1, | |||
5288 | new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5288, __FUNCTION__))->type_common.size), | |||
5289 | bitsize_int (0)size_int_kind (0, stk_bitsizetype))); | |||
5290 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); | |||
5291 | dst2 = make_ssa_name (vectype1); | |||
5292 | epilog_stmt | |||
5293 | = gimple_build_assign (dst2, BIT_FIELD_REF, | |||
5294 | build3 (BIT_FIELD_REF, vectype1, | |||
5295 | new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5295, __FUNCTION__))->type_common.size), | |||
5296 | bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype))); | |||
5297 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); | |||
5298 | } | |||
5299 | else | |||
5300 | { | |||
5301 | /* Extract via punning to appropriately sized integer mode | |||
5302 | vector. */ | |||
5303 | tree eltype = build_nonstandard_integer_type (bitsize, 1); | |||
5304 | tree etype = build_vector_type (eltype, 2); | |||
5305 | gcc_assert (convert_optab_handler (vec_extract_optab,((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5308, __FUNCTION__), 0 : 0)) | |||
5306 | TYPE_MODE (etype),((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5308, __FUNCTION__), 0 : 0)) | |||
5307 | TYPE_MODE (eltype))((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5308, __FUNCTION__), 0 : 0)) | |||
5308 | != CODE_FOR_nothing)((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5308, __FUNCTION__), 0 : 0)); | |||
5309 | tree tem = make_ssa_name (etype); | |||
5310 | epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR, | |||
5311 | build1 (VIEW_CONVERT_EXPR, | |||
5312 | etype, new_temp)); | |||
5313 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); | |||
5314 | new_temp = tem; | |||
5315 | tem = make_ssa_name (eltype); | |||
5316 | epilog_stmt | |||
5317 | = gimple_build_assign (tem, BIT_FIELD_REF, | |||
5318 | build3 (BIT_FIELD_REF, eltype, | |||
5319 | new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5319, __FUNCTION__))->type_common.size), | |||
5320 | bitsize_int (0)size_int_kind (0, stk_bitsizetype))); | |||
5321 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); | |||
5322 | dst1 = make_ssa_name (vectype1); | |||
5323 | epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR, | |||
5324 | build1 (VIEW_CONVERT_EXPR, | |||
5325 | vectype1, tem)); | |||
5326 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); | |||
5327 | tem = make_ssa_name (eltype); | |||
5328 | epilog_stmt | |||
5329 | = gimple_build_assign (tem, BIT_FIELD_REF, | |||
5330 | build3 (BIT_FIELD_REF, eltype, | |||
5331 | new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5331, __FUNCTION__))->type_common.size), | |||
5332 | bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype))); | |||
5333 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); | |||
5334 | dst2 = make_ssa_name (vectype1); | |||
5335 | epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR, | |||
5336 | build1 (VIEW_CONVERT_EXPR, | |||
5337 | vectype1, tem)); | |||
5338 | gimple_seq_add_stmt_without_update (seq, epilog_stmt); | |||
5339 | } | |||
5340 | ||||
5341 | new_temp = gimple_build (seq, code, vectype1, dst1, dst2); | |||
5342 | } | |||
5343 | ||||
5344 | return new_temp; | |||
5345 | } | |||
5346 | ||||
5347 | /* Function vect_create_epilog_for_reduction | |||
5348 | ||||
5349 | Create code at the loop-epilog to finalize the result of a reduction | |||
5350 | computation. | |||
5351 | ||||
5352 | STMT_INFO is the scalar reduction stmt that is being vectorized. | |||
5353 | SLP_NODE is an SLP node containing a group of reduction statements. The | |||
5354 | first one in this group is STMT_INFO. | |||
5355 | SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE | |||
5356 | REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi | |||
5357 | (counting from 0) | |||
5358 | ||||
5359 | This function: | |||
5360 | 1. Completes the reduction def-use cycles. | |||
5361 | 2. "Reduces" each vector of partial results VECT_DEFS into a single result, | |||
5362 | by calling the function specified by REDUC_FN if available, or by | |||
5363 | other means (whole-vector shifts or a scalar loop). | |||
5364 | The function also creates a new phi node at the loop exit to preserve | |||
5365 | loop-closed form, as illustrated below. | |||
5366 | ||||
5367 | The flow at the entry to this function: | |||
5368 | ||||
5369 | loop: | |||
5370 | vec_def = phi <vec_init, null> # REDUCTION_PHI | |||
5371 | VECT_DEF = vector_stmt # vectorized form of STMT_INFO | |||
5372 | s_loop = scalar_stmt # (scalar) STMT_INFO | |||
5373 | loop_exit: | |||
5374 | s_out0 = phi <s_loop> # (scalar) EXIT_PHI | |||
5375 | use <s_out0> | |||
5376 | use <s_out0> | |||
5377 | ||||
5378 | The above is transformed by this function into: | |||
5379 | ||||
5380 | loop: | |||
5381 | vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI | |||
5382 | VECT_DEF = vector_stmt # vectorized form of STMT_INFO | |||
5383 | s_loop = scalar_stmt # (scalar) STMT_INFO | |||
5384 | loop_exit: | |||
5385 | s_out0 = phi <s_loop> # (scalar) EXIT_PHI | |||
5386 | v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI | |||
5387 | v_out2 = reduce <v_out1> | |||
5388 | s_out3 = extract_field <v_out2, 0> | |||
5389 | s_out4 = adjust_result <s_out3> | |||
5390 | use <s_out4> | |||
5391 | use <s_out4> | |||
5392 | */ | |||
5393 | ||||
5394 | static void | |||
5395 | vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, | |||
5396 | stmt_vec_info stmt_info, | |||
5397 | slp_tree slp_node, | |||
5398 | slp_instance slp_node_instance) | |||
5399 | { | |||
5400 | stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); | |||
5401 | gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5401, __FUNCTION__), 0 : 0)); | |||
5402 | /* For double reductions we need to get at the inner loop reduction | |||
5403 | stmt which has the meta info attached. Our stmt_info is that of the | |||
5404 | loop-closed PHI of the inner loop which we remember as | |||
5405 | def for the reduction PHI generation. */ | |||
5406 | bool double_reduc = false; | |||
5407 | stmt_vec_info rdef_info = stmt_info; | |||
5408 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) | |||
5409 | { | |||
5410 | gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5410, __FUNCTION__), 0 : 0)); | |||
5411 | double_reduc = true; | |||
5412 | stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def | |||
5413 | (stmt_info->stmt, 0)); | |||
5414 | stmt_info = vect_stmt_to_vectorize (stmt_info); | |||
5415 | } | |||
5416 | gphi *reduc_def_stmt | |||
5417 | = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))(vect_orig_stmt (stmt_info))->reduc_def->stmt); | |||
5418 | code_helper code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code; | |||
5419 | internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info)(reduc_info)->reduc_fn; | |||
5420 | tree vectype; | |||
5421 | machine_mode mode; | |||
5422 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, *outer_loop = NULLnullptr; | |||
5423 | basic_block exit_bb; | |||
5424 | tree scalar_dest; | |||
5425 | tree scalar_type; | |||
5426 | gimple *new_phi = NULLnullptr, *phi; | |||
5427 | gimple_stmt_iterator exit_gsi; | |||
5428 | tree new_temp = NULL_TREE(tree) nullptr, new_name, new_scalar_dest; | |||
5429 | gimple *epilog_stmt = NULLnullptr; | |||
5430 | gimple *exit_phi; | |||
5431 | tree bitsize; | |||
5432 | tree def; | |||
5433 | tree orig_name, scalar_result; | |||
5434 | imm_use_iterator imm_iter, phi_imm_iter; | |||
5435 | use_operand_p use_p, phi_use_p; | |||
5436 | gimple *use_stmt; | |||
5437 | auto_vec<tree> reduc_inputs; | |||
5438 | int j, i; | |||
5439 | vec<tree> &scalar_results = reduc_info->reduc_scalar_results; | |||
5440 | unsigned int group_size = 1, k; | |||
5441 | auto_vec<gimple *> phis; | |||
5442 | /* SLP reduction without reduction chain, e.g., | |||
5443 | # a1 = phi <a2, a0> | |||
5444 | # b1 = phi <b2, b0> | |||
5445 | a2 = operation (a1) | |||
5446 | b2 = operation (b1) */ | |||
5447 | bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5447, __FUNCTION__), 0 : 0)), (stmt_info)->first_element )); | |||
5448 | bool direct_slp_reduc; | |||
5449 | tree induction_index = NULL_TREE(tree) nullptr; | |||
5450 | ||||
5451 | if (slp_node) | |||
5452 | group_size = SLP_TREE_LANES (slp_node)(slp_node)->lanes; | |||
5453 | ||||
5454 | if (nested_in_vect_loop_p (loop, stmt_info)) | |||
5455 | { | |||
5456 | outer_loop = loop; | |||
5457 | loop = loop->inner; | |||
5458 | gcc_assert (!slp_node && double_reduc)((void)(!(!slp_node && double_reduc) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5458, __FUNCTION__), 0 : 0)); | |||
5459 | } | |||
5460 | ||||
5461 | vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info)(reduc_info)->reduc_vectype; | |||
5462 | gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5462, __FUNCTION__), 0 : 0)); | |||
5463 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5463, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); | |||
5464 | ||||
5465 | tree induc_val = NULL_TREE(tree) nullptr; | |||
5466 | tree adjustment_def = NULLnullptr; | |||
5467 | if (slp_node) | |||
5468 | ; | |||
5469 | else | |||
5470 | { | |||
5471 | /* Optimize: for induction condition reduction, if we can't use zero | |||
5472 | for induc_val, use initial_def. */ | |||
5473 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION) | |||
5474 | induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)(reduc_info)->induc_cond_initial_val; | |||
5475 | else if (double_reduc) | |||
5476 | ; | |||
5477 | else | |||
5478 | adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment; | |||
5479 | } | |||
5480 | ||||
5481 | stmt_vec_info single_live_out_stmt[] = { stmt_info }; | |||
5482 | array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt; | |||
5483 | if (slp_reduc) | |||
5484 | /* All statements produce live-out values. */ | |||
5485 | live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts; | |||
5486 | else if (slp_node) | |||
5487 | { | |||
5488 | /* The last statement in the reduction chain produces the live-out | |||
5489 | value. Note SLP optimization can shuffle scalar stmts to | |||
5490 | optimize permutations so we have to search for the last stmt. */ | |||
5491 | for (k = 0; k < group_size; ++k) | |||
5492 | if (!REDUC_GROUP_NEXT_ELEMENT (SLP_TREE_SCALAR_STMTS (slp_node)[k])(((void)(!(!((slp_node)->stmts[k])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5492, __FUNCTION__), 0 : 0)), ((slp_node)->stmts[k])-> next_element)) | |||
5493 | { | |||
5494 | single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts[k]; | |||
5495 | break; | |||
5496 | } | |||
5497 | } | |||
5498 | ||||
5499 | unsigned vec_num; | |||
5500 | int ncopies; | |||
5501 | if (slp_node) | |||
5502 | { | |||
5503 | vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)(slp_node_instance->reduc_phis)->vec_stmts.length (); | |||
5504 | ncopies = 1; | |||
5505 | } | |||
5506 | else | |||
5507 | { | |||
5508 | stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt); | |||
5509 | vec_num = 1; | |||
5510 | ncopies = STMT_VINFO_VEC_STMTS (reduc_info)(reduc_info)->vec_stmts.length (); | |||
5511 | } | |||
5512 | ||||
5513 | /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) | |||
5514 | which is updated with the current index of the loop for every match of | |||
5515 | the original loop's cond_expr (VEC_STMT). This results in a vector | |||
5516 | containing the last time the condition passed for that vector lane. | |||
5517 | The first match will be a 1 to allow 0 to be used for non-matching | |||
5518 | indexes. If there are no matches at all then the vector will be all | |||
5519 | zeroes. | |||
5520 | ||||
5521 | PR92772: This algorithm is broken for architectures that support | |||
5522 | masked vectors, but do not provide fold_extract_last. */ | |||
5523 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION) | |||
5524 | { | |||
5525 | auto_vec<std::pair<tree, bool>, 2> ccompares; | |||
5526 | stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info)(reduc_info)->reduc_def; | |||
5527 | cond_info = vect_stmt_to_vectorize (cond_info); | |||
5528 | while (cond_info != reduc_info) | |||
5529 | { | |||
5530 | if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) | |||
5531 | { | |||
5532 | gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)(cond_info)->vec_stmts[0]; | |||
5533 | gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)((void)(!(gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5533, __FUNCTION__), 0 : 0)); | |||
5534 | ccompares.safe_push | |||
5535 | (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)), | |||
5536 | STMT_VINFO_REDUC_IDX (cond_info)(cond_info)->reduc_idx == 2)); | |||
5537 | } | |||
5538 | cond_info | |||
5539 | = loop_vinfo->lookup_def (gimple_op (cond_info->stmt, | |||
5540 | 1 + STMT_VINFO_REDUC_IDX(cond_info)->reduc_idx | |||
5541 | (cond_info)(cond_info)->reduc_idx)); | |||
5542 | cond_info = vect_stmt_to_vectorize (cond_info); | |||
5543 | } | |||
5544 | gcc_assert (ccompares.length () != 0)((void)(!(ccompares.length () != 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5544, __FUNCTION__), 0 : 0)); | |||
5545 | ||||
5546 | tree indx_before_incr, indx_after_incr; | |||
5547 | poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); | |||
5548 | int scalar_precision | |||
5549 | = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))(as_a <scalar_mode> ((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5549, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5549, __FUNCTION__))->type_common.mode))); | |||
5550 | tree cr_index_scalar_type = make_unsigned_type (scalar_precision); | |||
5551 | tree cr_index_vector_type = get_related_vectype_for_scalar_type | |||
5552 | (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5552, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), cr_index_scalar_type, | |||
5553 | TYPE_VECTOR_SUBPARTS (vectype)); | |||
5554 | ||||
5555 | /* First we create a simple vector induction variable which starts | |||
5556 | with the values {1,2,3,...} (SERIES_VECT) and increments by the | |||
5557 | vector size (STEP). */ | |||
5558 | ||||
5559 | /* Create a {1,2,3,...} vector. */ | |||
5560 | tree series_vect = build_index_vector (cr_index_vector_type, 1, 1); | |||
5561 | ||||
5562 | /* Create a vector of the step value. */ | |||
5563 | tree step = build_int_cst (cr_index_scalar_type, nunits_out); | |||
5564 | tree vec_step = build_vector_from_val (cr_index_vector_type, step); | |||
5565 | ||||
5566 | /* Create an induction variable. */ | |||
5567 | gimple_stmt_iterator incr_gsi; | |||
5568 | bool insert_after; | |||
5569 | standard_iv_increment_position (loop, &incr_gsi, &insert_after); | |||
5570 | create_iv (series_vect, vec_step, NULL_TREE(tree) nullptr, loop, &incr_gsi, | |||
5571 | insert_after, &indx_before_incr, &indx_after_incr); | |||
5572 | ||||
5573 | /* Next create a new phi node vector (NEW_PHI_TREE) which starts | |||
5574 | filled with zeros (VEC_ZERO). */ | |||
5575 | ||||
5576 | /* Create a vector of 0s. */ | |||
5577 | tree zero = build_zero_cst (cr_index_scalar_type); | |||
5578 | tree vec_zero = build_vector_from_val (cr_index_vector_type, zero); | |||
5579 | ||||
5580 | /* Create a vector phi node. */ | |||
5581 | tree new_phi_tree = make_ssa_name (cr_index_vector_type); | |||
5582 | new_phi = create_phi_node (new_phi_tree, loop->header); | |||
5583 | add_phi_arg (as_a <gphi *> (new_phi), vec_zero, | |||
5584 | loop_preheader_edge (loop), UNKNOWN_LOCATION((location_t) 0)); | |||
5585 | ||||
5586 | /* Now take the condition from the loops original cond_exprs | |||
5587 | and produce a new cond_exprs (INDEX_COND_EXPR) which for | |||
5588 | every match uses values from the induction variable | |||
5589 | (INDEX_BEFORE_INCR) otherwise uses values from the phi node | |||
5590 | (NEW_PHI_TREE). | |||
5591 | Finally, we update the phi (NEW_PHI_TREE) to take the value of | |||
5592 | the new cond_expr (INDEX_COND_EXPR). */ | |||
5593 | gimple_seq stmts = NULLnullptr; | |||
5594 | for (int i = ccompares.length () - 1; i != -1; --i) | |||
5595 | { | |||
5596 | tree ccompare = ccompares[i].first; | |||
5597 | if (ccompares[i].second) | |||
5598 | new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, | |||
5599 | cr_index_vector_type, | |||
5600 | ccompare, | |||
5601 | indx_before_incr, new_phi_tree); | |||
5602 | else | |||
5603 | new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, | |||
5604 | cr_index_vector_type, | |||
5605 | ccompare, | |||
5606 | new_phi_tree, indx_before_incr); | |||
5607 | } | |||
5608 | gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT); | |||
5609 | ||||
5610 | /* Update the phi with the vec cond. */ | |||
5611 | induction_index = new_phi_tree; | |||
5612 | add_phi_arg (as_a <gphi *> (new_phi), induction_index, | |||
5613 | loop_latch_edge (loop), UNKNOWN_LOCATION((location_t) 0)); | |||
5614 | } | |||
5615 | ||||
5616 | /* 2. Create epilog code. | |||
5617 | The reduction epilog code operates across the elements of the vector | |||
5618 | of partial results computed by the vectorized loop. | |||
5619 | The reduction epilog code consists of: | |||
5620 | ||||
5621 | step 1: compute the scalar result in a vector (v_out2) | |||
5622 | step 2: extract the scalar result (s_out3) from the vector (v_out2) | |||
5623 | step 3: adjust the scalar result (s_out3) if needed. | |||
5624 | ||||
5625 | Step 1 can be accomplished using one the following three schemes: | |||
5626 | (scheme 1) using reduc_fn, if available. | |||
5627 | (scheme 2) using whole-vector shifts, if available. | |||
5628 | (scheme 3) using a scalar loop. In this case steps 1+2 above are | |||
5629 | combined. | |||
5630 | ||||
5631 | The overall epilog code looks like this: | |||
5632 | ||||
5633 | s_out0 = phi <s_loop> # original EXIT_PHI | |||
5634 | v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI | |||
5635 | v_out2 = reduce <v_out1> # step 1 | |||
5636 | s_out3 = extract_field <v_out2, 0> # step 2 | |||
5637 | s_out4 = adjust_result <s_out3> # step 3 | |||
5638 | ||||
5639 | (step 3 is optional, and steps 1 and 2 may be combined). | |||
5640 | Lastly, the uses of s_out0 are replaced by s_out4. */ | |||
5641 | ||||
5642 | ||||
5643 | /* 2.1 Create new loop-exit-phis to preserve loop-closed form: | |||
5644 | v_out1 = phi <VECT_DEF> | |||
5645 | Store them in NEW_PHIS. */ | |||
5646 | if (double_reduc) | |||
5647 | loop = outer_loop; | |||
5648 | exit_bb = single_exit (loop)->dest; | |||
5649 | exit_gsi = gsi_after_labels (exit_bb); | |||
5650 | reduc_inputs.create (slp_node ? vec_num : ncopies); | |||
5651 | for (unsigned i = 0; i < vec_num; i++) | |||
5652 | { | |||
5653 | gimple_seq stmts = NULLnullptr; | |||
5654 | if (slp_node) | |||
5655 | def = vect_get_slp_vect_def (slp_node, i); | |||
5656 | else | |||
5657 | def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[0]); | |||
5658 | for (j = 0; j < ncopies; j++) | |||
5659 | { | |||
5660 | tree new_def = copy_ssa_name (def); | |||
5661 | phi = create_phi_node (new_def, exit_bb); | |||
5662 | if (j) | |||
5663 | def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[j]); | |||
5664 | SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def)set_ssa_use_from_ptr (gimple_phi_arg_imm_use_ptr (((phi)), (( single_exit (loop)->dest_idx))), (def)); | |||
5665 | new_def = gimple_convert (&stmts, vectype, new_def); | |||
5666 | reduc_inputs.quick_push (new_def); | |||
5667 | } | |||
5668 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); | |||
5669 | } | |||
5670 | ||||
5671 | /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 | |||
5672 | (i.e. when reduc_fn is not available) and in the final adjustment | |||
5673 | code (if needed). Also get the original scalar reduction variable as | |||
5674 | defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it | |||
5675 | represents a reduction pattern), the tree-code and scalar-def are | |||
5676 | taken from the original stmt that the pattern-stmt (STMT) replaces. | |||
5677 | Otherwise (it is a regular reduction) - the tree-code and scalar-def | |||
5678 | are taken from STMT. */ | |||
5679 | ||||
5680 | stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); | |||
5681 | if (orig_stmt_info != stmt_info) | |||
5682 | { | |||
5683 | /* Reduction pattern */ | |||
5684 | gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info))((void)(!((orig_stmt_info)->in_pattern_p) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5684, __FUNCTION__), 0 : 0)); | |||
5685 | gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info)((void)(!((orig_stmt_info)->related_stmt == stmt_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5685, __FUNCTION__), 0 : 0)); | |||
5686 | } | |||
5687 | ||||
5688 | scalar_dest = gimple_get_lhs (orig_stmt_info->stmt); | |||
5689 | scalar_type = TREE_TYPE (scalar_dest)((contains_struct_check ((scalar_dest), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc" , 5689, __FUNCTION__))->typed.type); | |||
5690 | scalar_results.truncate (0); | |||
5691 | scalar_results.reserve_exact (group_size); | |||
5692 | new_scalar_dest = vect_creat |